## Function

In [1]:
from pathlib import Path
import math
import pandas as pd


def parse_participant_csv(path: str) -> dict:
    """Parse one PsychoPy CSV into the nested structure."""

    df = pd.read_csv(path)

    # --- participant-level fields ---
    # participant id: from column if present, otherwise from filename prefix
    try:
        participant_id = int(df["participant"].dropna().iloc[0])
    except Exception:
        participant_id = int(Path(path).name.split("_", 1)[0])

    # focus: all focus_slider responses (per video) + mean
    focus_ratings = []
    if "stimuli.focus_slider.response" in df.columns:
        focus_ratings = df["stimuli.focus_slider.response"].dropna().tolist()

    focus_mean = float(sum(focus_ratings) / len(focus_ratings)) if focus_ratings else None

    # opinion: final opinion slider
    if "opinion_slider.response" in df.columns and df["opinion_slider.response"].notna().any():
        opinion = float(df["opinion_slider.response"].dropna().iloc[0])
    else:
        opinion = None

    # overall timestamps
    exp_start = df["expStart"].dropna().iloc[0] if "expStart" in df.columns else None
    if "expEndTime" in df.columns and df["expEndTime"].notna().any():
        exp_end = df["expEndTime"].dropna().iloc[0]
    else:
        exp_end = None

    # --- trial-level: one entry per video ---
    if "video_index" in df.columns:
        trial_rows = df[df["video_index"].notna()]
    else:
        trial_rows = pd.DataFrame()

    stimuli = []
    for _, row in trial_rows.iterrows():
        # SAM ratings for the video
        sam_cols = [
            "stimuli.rating_sam_1.keys",
            "stimuli.rating_sam_2.keys",
            "stimuli.rating_sam_3.keys",
        ]
        sam_values = []
        for col in sam_cols:
            if col in df.columns and not pd.isna(row[col]):
                sam_values.append(float(row[col]))
            else:
                sam_values.append(None)

        # trusted = all three SAM ratings present
        trusted = all(v is not None for v in sam_values)

        stim_struct = {
            "id": int(row["video_index"]),
            "video_id": row["video_id"],
            "video_response": {
                "belief": row.get("belief"),
                "sam": sam_values,  # [SAM1, SAM2, SAM3]
                "timestamps": {
                    "sam1_start": row.get("r_SAM_1.started"),
                    "sam1_end": row.get("r_SAM_1.stopped"),
                    "sam2_start": row.get("r_SAM_2.started"),
                    "sam2_end": row.get("r_SAM_2.stopped"),
                    "sam3_start": row.get("r_SAM_3.started"),
                    "sam3_end": row.get("r_SAM_3.stopped"),
                },
                "trusted": trusted,
            },
        }
        stimuli.append(stim_struct)

    # final participant object
    return {
        "id": participant_id,
        "focus": {
            "mean": focus_mean,
            "all_ratings": focus_ratings,  # you can remove this if you only want mean
        },
        "opinion": opinion,
        "timestamp_start": exp_start,
        "timestamp_end": exp_end,
        "stimuli": stimuli,
    }


## Looping over all CSV files

In [None]:
from glob import glob

data_folder = "02455-EXPERIMENT-IN-COGNITIVE-SCIENCE/trials"
pattern = f"{data_folder}/*_manipulation-of-belief_*.csv"

all_participants = []

for csv_path in glob(pattern):
    participant_struct = parse_participant_csv(csv_path)
    all_participants.append(participant_struct)

# sort by participant id just to be neat
all_participants.sort(key=lambda p: p["id"])

In [5]:
import json

with open("experiment_data.json", "w", encoding="utf-8") as f:
    json.dump(all_participants, f, indent=2)
