In [9]:
import pandas as pd
from pathlib import Path


In [10]:
def extract_temporal_discounting(df):
    """
    Temporal discounting trials:
    phase == 'response'
    """
    td = df[df["phase"] == "response"].copy()

    keep_cols = [
        "participant_id",
        "trial_index",
        "rt",
        "choice_dir",
        "choseLL",
        "r_a", "l_a",
        "r_d", "l_d",
        "best_side",
        "chose_best",
        "trial_pointsTempDiscoun",
        "totalPointsTempDiscoun"
    ]

    keep_cols = [c for c in keep_cols if c in td.columns]
    return td[keep_cols].reset_index(drop=True)


def extract_interval_production(df):
    """
    Interval production trials:
    phase == 'intervalProdTrial'
    """
    ip = df[df["phase"] == "intervalProdTrial"].copy()

    keep_cols = [
        "participant_id",
        "trial_index",
        "delayLenIP",
        "rt_production",
        "stimulus_start_time",
        "trial_num_intProd"
    ]

    keep_cols = [c for c in keep_cols if c in ip.columns]
    return ip[keep_cols].reset_index(drop=True)


In [11]:
def process_csv_to_h5(csv_path, h5_dir):
    csv_path = Path(csv_path)
    h5_dir = Path(h5_dir)
    h5_dir.mkdir(parents=True, exist_ok=True)

    # --- Load CSV ---
    df = pd.read_csv(csv_path)
    df["phase"] = df["phase"].astype(str).str.strip()

    # Subject ID
    sub_id = df["participant_id"].dropna().iloc[0]
    h5_path = h5_dir / f"sub-{sub_id}.h5"

    # --- Extract ---
    td = extract_temporal_discounting(df)
    ip = extract_interval_production(df)

    print(f"[INFO] Extracted TD={len(td)} | IP={len(ip)}")

    if td.empty and ip.empty:
        raise RuntimeError("No task trials found â€” aborting write")

    # --- Write ---
    with pd.HDFStore(h5_path, mode="w") as store:
        store.put("temporal_discounting/trials", td, format="table")
        store.put("interval_production/trials", ip, format="table")

    print(f"[SUCCESS] Wrote {h5_path}")

    return td, ip, h5_path


In [12]:
csv_path = r"C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\Preprocessed_Data\combined-temp-discrimination-interval-production-temp-discounting_104.csv"

h5_dir = r"C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\h5_Data"

td_csv, ip_csv, h5_path = process_csv_to_h5(csv_path, h5_dir)


[INFO] Extracted TD=176 | IP=55
[SUCCESS] Wrote C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\h5_Data\sub-P_mkot3mpf_4uu6gy.h5


In [13]:
with pd.HDFStore(h5_path, mode="r") as store:
    print("H5 keys:", store.keys())

    td_h5 = store["temporal_discounting/trials"]
    ip_h5 = store["interval_production/trials"]


H5 keys: ['/temporal_discounting/trials', '/interval_production/trials']


In [14]:
print("TD equal:", td_csv.equals(td_h5))
print("IP equal:", ip_csv.equals(ip_h5))

print("TD rows CSV / H5:", len(td_csv), len(td_h5))
print("IP rows CSV / H5:", len(ip_csv), len(ip_h5))


TD equal: True
IP equal: True
TD rows CSV / H5: 176 176
IP rows CSV / H5: 55 55


In [15]:
def batch_convert_csv_dir_to_h5(csv_dir, h5_dir):
    csv_dir = Path(csv_dir)
    h5_dir = Path(h5_dir)

    for csv_file in csv_dir.glob("*.csv"):
        try:
            process_csv_to_h5(csv_file, h5_dir)
        except Exception as e:
            print(f"[FAILED] {csv_file.name}: {e}")


In [16]:
batch_convert_csv_dir_to_h5(
    r"C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\Preprocessed_Data",
    r"C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\h5_Data"
)


[INFO] Extracted TD=176 | IP=55
[SUCCESS] Wrote C:\Users\Aristotle\Documents\Elston Lab Tasks\Tasks\TD_Normal\h5_Data\sub-P_mkot3mpf_4uu6gy.h5
