## Load heart-rate CSV for one participant

In [1]:
from datetime import datetime
import pandas as pd

def load_hr_file(path: str) -> pd.DataFrame:
    """
    Read a participantX.csv into a tidy DataFrame with columns:
    RR, ArtifactCorrectedRR, RawArtifact, ts (datetime)
    """
    # file is one-column with ';' separator
    raw = pd.read_csv(path, sep=';', engine='python')
    lines = raw.iloc[:, 0]

    # header line
    header = lines.iloc[2].split(',')
    data_lines = lines.iloc[3:]

    df = data_lines.str.split(',', expand=True)
    df.columns = header

    # types
    df["RR"] = df["RR"].astype(float)
    df["ArtifactCorrectedRR"] = df["ArtifactCorrectedRR"].astype(float)
    df["RawArtifact"] = df["RawArtifact"].astype(int)
    df["ts"] = pd.to_datetime(df["ts"])   # '2025-11-12 14:44:00.880'

    return df


In [2]:
hr_df = load_hr_file("hr_data/participant1.csv")
print(hr_df.head())


FileNotFoundError: [Errno 2] No such file or directory: 'hr_data/participant1.csv'

## Parse video timestamps from the JSON

In [None]:
from datetime import datetime

def parse_exptimestamp(s: str) -> datetime:
    """
    '2025-11-12 14h50.45.373178 +0100' -> aware datetime
    """
    date, time_part, tz = s.split(" ")
    time_part = time_part.replace("h", ":")
    time_part = time_part.replace(".", ":", 1)  # first '.' -> ':'
    fixed = f"{date} {time_part} {tz}"
    return datetime.strptime(fixed, "%Y-%m-%d %H:%M:%S.%f %z")


In [None]:
def json_time_to_naive(s: str):
    if s is None:
        return None
    dt = parse_exptimestamp(s)
    return dt.replace(tzinfo=None)


## Match each video to the heart-rate segment

In [None]:
import json

with open("experiment_data.json", "r", encoding="utf-8") as f:
    participants = json.load(f)


In [None]:
# pick participant 1 from JSON
p1 = next(p for p in participants if p["participant_id"] == 1)

# load HR csv for participant 1
hr_df = load_hr_file("hr_data/participant1.csv")

# make sure ts is datetime (already done in load_hr_file, but just to be safe)
hr_df["ts"] = pd.to_datetime(hr_df["ts"])

# for each video, grab the matching rows
video_segments = {}

for stim in p1["stimuli"]:
    v_start = json_time_to_naive(stim["video_start"])
    v_end   = json_time_to_naive(stim["video_end"])

    if v_start is None or v_end is None:
        continue  # skip if missing

    mask = (hr_df["ts"] >= v_start) & (hr_df["ts"] <= v_end)
    seg = hr_df.loc[mask].copy()

    video_id = stim["video_id"]
    video_segments[video_id] = seg

    print(video_id, len(seg), "HR rows")


4171487-uhd_3840_2160_30fps.mp4 139 HR rows
5768645-uhd_3840_2160_25fps.mp4 149 HR rows
18840567-hd_1920_1080_30fps.mp4 129 HR rows
11946387_3840_2160_30fps.mp4 150 HR rows
