# Heart Rate Processing for segmenting particpants Script
This notebook demonstrates how to load, clean, and extract HR data for each person from Firstbeat IBI files.


## 1. Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

## 2. Function: Load Firstbeat IBI File
This function reads and processes the raw `.csv` exported from Firstbeat.

In [2]:
def load_firstbeat_file(path):
    """
    Load a Firstbeat IBI file.
    - First two lines contain metadata.
    - Third line contains column labels.
    - All remaining rows contain semicolon-separated values.
    Returns:
        DataFrame with RR, ArtifactCorrectedRR, RawArtifact, ts_raw
        Raw start timestamp
    """

    with open(path, "r") as f:
        lines = f.readlines()

    raw_start_str = lines[1].split("Start time:")[1].strip()
    raw_start_ts = datetime.strptime(raw_start_str, "%d.%m.%Y %H:%M:%S")

    df = pd.read_csv(path, header=None, skiprows=3, names=["raw"], dtype=str, engine="python")
    df = df[df["raw"].notna() & (df["raw"].str.strip() != "")]

    raw_split = df["raw"].str.split(";", expand=True).iloc[:, :3]
    raw_split.columns = ["RR", "ArtifactCorrectedRR", "RawArtifact"]

    raw_split = raw_split.apply(pd.to_numeric, errors="coerce")
    raw_split = raw_split.dropna(subset=["ArtifactCorrectedRR"]).reset_index(drop=True)

    rr_seconds = raw_split["ArtifactCorrectedRR"] / 1000
    cumulative = rr_seconds.cumsum()
    raw_split["ts_raw"] = cumulative.apply(lambda s: raw_start_ts + timedelta(seconds=float(s)))

    return raw_split, raw_start_ts

## 3. Load Metadata
Metadata includes participant start/end timestamps for extraction.

In [3]:
EXTRACTIONS = {
    "test39_39_20251126_145748_IBI.csv": [23],
}

metadata_path = "trials_sensor11.csv"
hr_data_folder = "sensor11/"
output_folder = "participants_split_sensor11/"

os.makedirs(output_folder, exist_ok=True)

meta = pd.read_csv(metadata_path)
meta["Date_dt"] = pd.to_datetime(meta["Date"], format="%d-%b-%y")
meta["Start_ts"] = pd.to_datetime(meta["Date_dt"].astype(str) + " " + meta["Start time (firstbeat)"])
meta["End_ts"] = pd.to_datetime(meta["Date_dt"].astype(str) + " " + meta["End time (firstbeat)"])

## 4. Extraction Loop
Iterates over selected files and extracts HR segments for chosen participants.

In [4]:
for filename, participants in EXTRACTIONS.items():
    filepath = os.path.join(hr_data_folder, filename)
    if not os.path.exists(filepath):
        print(f"❌ File not found: {filepath}")
        continue

    print(f"\nProcessing file: {filename}")
    file_date = pd.to_datetime(filename.split("_")[2], format="%Y%m%d")
    day_meta = meta[meta["Date_dt"] == file_date]

    df_raw, raw_start_ts = load_firstbeat_file(filepath)

    for pid in participants:
        row = day_meta[day_meta["Participant ID"] == pid]
        if row.empty:
            print(f"⚠ Participant {pid} missing for this date.")
            continue

        row = row.iloc[0]
        start_ts, end_ts = row["Start_ts"], row["End_ts"]

        print(f"→ Extracting participant {pid} ({start_ts} → {end_ts})")

        time_offset = start_ts - df_raw["ts_raw"].iloc[0]
        df_raw["ts"] = df_raw["ts_raw"] + time_offset

        p_df = df_raw[(df_raw["ts"] >= start_ts) & (df_raw["ts"] <= end_ts)]
        if p_df.empty:
            print(f"⚠ No HR data found for participant {pid}")
            continue

        clean_df = p_df[["RR", "ArtifactCorrectedRR", "RawArtifact", "ts"]]

        outpath = os.path.join(output_folder, f"participant{pid}.csv")

        with open(outpath, "w") as f:
            f.write(f"Participant ID: {pid}\n")
            f.write(f"Start time: {start_ts}\n")
            f.write(f"End time: {end_ts}\n\n")

        clean_df.to_csv(outpath, mode="a", index=False)
        print(f"✓ Saved to {outpath}")


Processing file: test39_39_20251126_145748_IBI.csv
→ Extracting participant 23 (2025-11-26 14:57:00 → 2025-11-26 15:12:00)
✓ Saved to participants_split_sensor11/participant23.csv
