## Function

In [26]:
from pathlib import Path
import math
from datetime import datetime, timezone
import pandas as pd


def parse_participant_csv(path: str) -> dict:
    """Parse one PsychoPy CSV into the nested structure."""

    df = pd.read_csv(path)

    # --- participant-level fields ---
    # participant id: from column if present, otherwise from filename prefix
    try:
        participant_id = int(df["participant"].dropna().iloc[0])
    except Exception:
        participant_id = int(Path(path).name.split("_", 1)[0])

    # focus: all focus_slider responses (per video) + mean
    focus_ratings = []
    if "stimuli.focus_slider.response" in df.columns:
        focus_ratings = df["stimuli.focus_slider.response"].dropna().tolist()

    focus_mean = float(sum(focus_ratings) / len(focus_ratings)) if focus_ratings else None

    # opinion: final opinion slider
    if "opinion_slider.response" in df.columns and df["opinion_slider.response"].notna().any():
        opinion = float(df["opinion_slider.response"].dropna().iloc[0])
    else:
        opinion = None

    # --- overall timestamps ---
    if "expStart" in df.columns and df["expStart"].notna().any():
        raw_start = df["expStart"].dropna().iloc[0]
        exp_start_dt = parse_expstart(raw_start)
        # keep the original nice string for start
        exp_start = raw_start
    else:
        exp_start_dt = None
        exp_start = None

    if (
        "expEndTime" in df.columns
        and df["expEndTime"].notna().any()
        and exp_start_dt is not None
    ):
        end_epoch = float(df["expEndTime"].dropna().iloc[0])
        # convert from Unix seconds â†’ datetime in same timezone as start
        exp_end_dt = datetime.fromtimestamp(end_epoch, tz=timezone.utc).astimezone(
            exp_start_dt.tzinfo
        )
        # format to same style as expStart: 'YYYY-MM-DD HHhMM.SS.mmmmmm +ZZZZ'
        exp_end = exp_end_dt.strftime("%Y-%m-%d %Hh%M.%S.%f %z")

        # (optional) if you want to *see* the duration in minutes:
        duration_minutes = (exp_end_dt - exp_start_dt).total_seconds() / 60
        print(f"Participant {participant_id} duration: {duration_minutes:.2f} minutes")
    else:
        exp_end = None

    # --- trial-level: one entry per video ---
    if "video_index" in df.columns:
        trial_rows = df[df["video_index"].notna()]
    else:
        trial_rows = pd.DataFrame()

    stimuli = []
    for _, row in trial_rows.iterrows():
        # SAM ratings for the video
        sam_cols = [
            "stimuli.rating_sam_1.keys",
            "stimuli.rating_sam_2.keys",
            "stimuli.rating_sam_3.keys",
        ]
        sam_values = []
        for col in sam_cols:
            if col in df.columns and not pd.isna(row[col]):
                sam_values.append(float(row[col]))
            else:
                sam_values.append(None)

        # trusted = all three SAM ratings present
        trusted = all(v is not None for v in sam_values)

        stim_struct = {
            "video_id": int(row["video_index"]),
            "video_id": row["video_id"],
            "video_response": {
                "belief": row.get("belief"),
                "sam": sam_values,  # [SAM1, SAM2, SAM3]
                "timestamps": {
                    "sam1_start": row.get("r_SAM_1.started"),
                    "sam1_end": row.get("r_SAM_1.stopped"),
                    "sam2_start": row.get("r_SAM_2.started"),
                    "sam2_end": row.get("r_SAM_2.stopped"),
                    "sam3_start": row.get("r_SAM_3.started"),
                    "sam3_end": row.get("r_SAM_3.stopped"),
                },
                "trusted": trusted,
            },
        }
        stimuli.append(stim_struct)

    # final participant object
    return {
        "participant_id": participant_id,
        "focus": {
            "mean": focus_mean,
            "all_ratings": focus_ratings,  # you can remove this if you only want mean
        },
        "opinion": opinion,
        "timestamp_start": exp_start,
        "timestamp_end": exp_end,
        "stimuli": stimuli,
    }


In [27]:
def parse_expstart(exp_start_str: str) -> datetime:
    """
    Convert '2025-11-12 14h45.49.380213 +0100'
    -> datetime(2025-11-12 14:45:49.380213+01:00)
    """
    date, time_part, tz = exp_start_str.split(" ")

    # 14h45.49.380213 -> 14:45:49.380213
    time_part = time_part.replace("h", ":")
    time_part = time_part.replace(".", ":", 1)  # only first '.' -> ':'

    fixed = f"{date} {time_part} {tz}"
    return datetime.strptime(fixed, "%Y-%m-%d %H:%M:%S.%f %z")


## Looping over all CSV files

In [None]:
from pathlib import Path
from glob import glob

notebook_dir = Path.cwd()
root = notebook_dir.parent
data_folder = root / "trials"
pattern = str(data_folder / "*_manipulation-of-belief_*.csv")

print("Looking for:", pattern)
print("Found:", glob(pattern))

all_participants = []

for csv_path in glob(pattern):
    participant_struct = parse_participant_csv(csv_path)
    all_participants.append(participant_struct)

# sort by participant id just to be neat ..
all_participants.sort(key=lambda p: p["participant_id"])

Looking for: c:\Users\jasmi\OneDrive\Skrivebord\DTU\5-semester\Eksperimenter i Kognitionsvidenskab\experiment\02455-Experiment-In-Cognitive-Science\trials\*_manipulation-of-belief_*.csv
Found: ['c:\\Users\\jasmi\\OneDrive\\Skrivebord\\DTU\\5-semester\\Eksperimenter i Kognitionsvidenskab\\experiment\\02455-Experiment-In-Cognitive-Science\\trials\\11_manipulation-of-belief_2025-11-14_15h40.56.727.csv', 'c:\\Users\\jasmi\\OneDrive\\Skrivebord\\DTU\\5-semester\\Eksperimenter i Kognitionsvidenskab\\experiment\\02455-Experiment-In-Cognitive-Science\\trials\\12_manipulation-of-belief_2025-11-15_09h47.21.280.csv', 'c:\\Users\\jasmi\\OneDrive\\Skrivebord\\DTU\\5-semester\\Eksperimenter i Kognitionsvidenskab\\experiment\\02455-Experiment-In-Cognitive-Science\\trials\\13_manipulation-of-belief_2025-11-15_10h16.28.762.csv', 'c:\\Users\\jasmi\\OneDrive\\Skrivebord\\DTU\\5-semester\\Eksperimenter i Kognitionsvidenskab\\experiment\\02455-Experiment-In-Cognitive-Science\\trials\\15_manipulation-of-bel

In [29]:
import json

with open("experiment_data.json", "w", encoding="utf-8") as f:
    json.dump(all_participants, f, indent=2)
