In [1]:
import numpy as np
import pandas as pd

In [10]:
df_bc = pd.read_csv('ensemble_avg_per_subject_bc.csv')

features = []

In [11]:
for sid, df_subj in df_bc.groupby("subjectID"):
    df_subj = df_subj.sort_values("time")
    t = df_subj["time"].values
    y = df_subj["pupil_bc"].values

    # --- Peak amplitude and latency ---
    peak_idx = np.argmax(y)
    peak_amp = y[peak_idx]
    latency_peak = t[peak_idx]

    # --- Area under curve (0–2s) ---
    mask_0_2 = (t >= 0) & (t <= 2)
    auc_0_2 = np.trapezoid(y[mask_0_2], t[mask_0_2])

    # --- Early slope (0–0.5s) ---
    mask_0_05 = (t >= 0) & (t <= 0.5)
    if mask_0_05.sum() >= 2:
        slope, intercept = np.polyfit(t[mask_0_05], y[mask_0_05], 1)
    else:
        slope = np.nan

    # --- Mean response in 1–2s window ---
    mask_1_2 = (t >= 1.0) & (t <= 2.0)
    mean_1_2 = y[mask_1_2].mean()

    # --- Recovery time (first time after peak when response <= 0) ---
    recovery_time = np.nan
    for ti, yi in zip(t[peak_idx:], y[peak_idx:]):
        if yi <= 0:
            recovery_time = ti
            break

    # Collect features
    features.append({
        "subjectID": sid,
        "peak_amp": peak_amp,
        "latency_peak": latency_peak,
        "auc_0_2": auc_0_2,
        "slope_0_0.5": slope,
        "mean_1_2": mean_1_2,
        "recovery_time": recovery_time
    })

In [12]:
feat_df = pd.DataFrame(features)
feat_df.to_csv("subject_features_from_ensemble.csv", index=False)

print(feat_df.head())

   subjectID  peak_amp  latency_peak   auc_0_2  slope_0_0.5  mean_1_2  \
0          1  1.094160      1.312718  0.615308     0.271465  0.441669   
1          2  0.912698      1.411316  0.521367     0.223003  0.377486   
2          3  0.972606      1.288936  0.540129     0.268325  0.406488   
3          4  1.087494      1.388032  0.561525     0.258321  0.403172   
4          5  1.073620      1.292809  0.650294     0.290882  0.441840   

   recovery_time  
0       2.191484  
1       2.249656  
2       2.239756  
3       2.189638  
4       2.271893  


In [14]:
feet=pd.read_csv('subject_features_from_ensemble.csv')
meta=pd.read_csv('pupil_raw_merged.csv')

df=feet.merge(meta[['subjectID','class']].drop_duplicates(),on='subjectID',how='left')

df.to_csv('final.csv',index=False)
