# Generate mw_fixed eeg-et feature file

In [16]:
import os
import numpy as np
import pandas as pd
import mne

# ==========================
# EEG band definitions
# ==========================
band_names = [
    "theta1", "theta2", "alpha1", "alpha2",
    "beta1", "beta2", "gamma1", "gamma2",
]

band_defs = [
    (4.0, 6.0),    # theta1
    (6.5, 8.0),    # theta2
    (8.5, 10.0),   # alpha1
    (10.5, 13.0),  # alpha2
    (13.5, 18.0),  # beta1
    (18.5, 30.0),  # beta2
    (30.5, 40.0),  # gamma1
    (40.0, 49.5),  # gamma2
]

n_bands = len(band_defs)

# ==========================
# Paths
# ==========================
data_root = "/gpfs1/pi/djangraw/mindless_reading/data"

all_subjects = sorted(
    d for d in os.listdir(data_root)
    if d.startswith("s") and os.path.isdir(os.path.join(data_root, d))
)

col_name = None
eeg_feature_names = None

# ==========================
# Main loop
# ==========================
for subject_id in all_subjects:
    print(f"Processing {subject_id}")
    subject_dir = os.path.join(data_root, subject_id)

    # Page-level dataframe (where features go)
    csv_path = os.path.join(subject_dir, f"{subject_id}_R_features_mw_fixed_sr.csv")
    df = pd.read_csv(csv_path)
    # drop rows with duration less than 2 seconds
    df = df[df["win_dur"] >= 2.0].reset_index(drop=True)

    ml_dir = os.path.join(subject_dir, "ml_data")
    pkl_files = sorted(f for f in os.listdir(ml_dir) if f.endswith(".pkl"))

    for pkl_file in pkl_files:
        df_run = pd.read_pickle(os.path.join(ml_dir, pkl_file))

        # ==========================
        # Channel & feature names (once)
        # ==========================
        if col_name is None:
            col_name = df_run.columns[:64].tolist()

            eeg_feature_names = [
                f"{ch}_{band}"
                for ch in col_name
                for band in band_names
            ]

        # ==========================
        # Run consistency check
        # ==========================
        run_nums = df_run["run_num"].unique()
        if run_nums.size != 1:
            raise ValueError(f"Multiple run numbers in {pkl_file}")

        run_num = run_nums[0]

        # ==========================
        # Page start alignment
        # ==========================
        page_start_time = df_run.loc[df_run["page_num"] == 0, "time"].iloc[0]

        df_page = df[df["run"] == run_num]

        win_start = (
            df_page["win_start"] - df_page["page_start"]
        ) + page_start_time

        win_end = (
            df_page["win_end"] - df_page["page_start"]
        ) + page_start_time

        # ==========================
        # Window loop
        # ==========================
        for row_idx in df_page.index:
            tstart = win_start.loc[row_idx]
            tend = win_end.loc[row_idx]

            eeg = (
                df_run.loc[
                    (df_run["time"] >= tstart) &
                    (df_run["time"] <= tend),
                    col_name
                ]
                .to_numpy()
                .T
            )  # shape: (64, n_times)

            # ==========================
            # PSD computation
            # ==========================
            psds, freqs = mne.time_frequency.psd_array_multitaper(
                eeg,
                sfreq=256,
                fmin=4,
                fmax=50,
                output="power",
                verbose=False,
            )

            # ==========================
            # Band averaging
            # ==========================
            psds_band = np.zeros((64, n_bands), dtype=np.float32)

            for band_i, (fmin, fmax) in enumerate(band_defs):
                freq_mask = (freqs >= fmin) & (freqs <= fmax)
                psds_band[:, band_i] = psds[:, freq_mask].mean(axis=1)

            # ==========================
            # Flattening (channel-major, band-minor)
            # ==========================
            df.loc[row_idx, eeg_feature_names] = psds_band.flatten()

    # ==========================
    # Save per subject
    # ==========================
    out_path = os.path.join(
        subject_dir, f"{subject_id}_R_eeget_features_mw_fixed.csv"
    )
    df.to_csv(out_path, index=False)
    print(f"Saved: {out_path}")


Processing s10014
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10014/s10014_R_eeget_features_mw_fixed.csv
Processing s10052
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10052/s10052_R_eeget_features_mw_fixed.csv
Processing s10059
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10059/s10059_R_eeget_features_mw_fixed.csv
Processing s10073
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10073/s10073_R_eeget_features_mw_fixed.csv
Processing s10081
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10081/s10081_R_eeget_features_mw_fixed.csv
Processing s10084
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10084/s10084_R_eeget_features_mw_fixed.csv
Processing s10085
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10085/s10085_R_eeget_features_mw_fixed.csv
Processing s10089
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10089/s10089_R_eeget_features_mw_fixed.csv
Processing s10094
Saved: /gpfs1/pi/djangraw/mindless_reading/data/s10094/s10094_R_eeget_features_mw_fixed.csv
Processing

## Combine individual files

In [20]:
import os
import pandas as pd

# ==========================
# Paths
# ==========================
data_root = "/gpfs1/pi/djangraw/mindless_reading/data"

all_subjects = sorted(
    d for d in os.listdir(data_root)
    if d.startswith("s") and os.path.isdir(os.path.join(data_root, d))
)

is_balance = True  # whether to balance classes by subsampling majority class

df_list = []
# ==========================
# Main loop
# ==========================
for subject_id in all_subjects:
    print(f"Loading features for {subject_id}")
    subject_dir = os.path.join(data_root, subject_id)
    csv_path = os.path.join(subject_dir, f"{subject_id}_R_eeget_features_mw_fixed.csv")
    df = pd.read_csv(csv_path)
    df['label'] = df["is_MWreported"].astype(int)
    
    if is_balance:
        # Balance classes by subsampling majority class
        class_counts = df["label"].value_counts()
        if len(class_counts) != 2:
            print(f"Warning: {subject_id} does not have exactly 2 classes. Skipping balancing.")
        else:
            min_count = class_counts.min()
            balanced_df = pd.concat([
                df[df["label"] == cls].sample(min_count, random_state=42)
                for cls in class_counts.index
            ])
            df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

    df["subject_id"] = subject_id  # add subject_id column
    df_list.append(df)

df_all = pd.concat(df_list, ignore_index=True)
out_path = os.path.join(data_root, "all_subjects_R_eeget_features_mw_fixed.csv")
df_all.to_csv(out_path, index=False)
print(f"Saved: {out_path}")

Loading features for s10014
Loading features for s10052
Loading features for s10059
Loading features for s10073
Loading features for s10081
Loading features for s10084
Loading features for s10085
Loading features for s10089
Loading features for s10094
Loading features for s10100
Loading features for s10103
Loading features for s10110
Loading features for s10111
Loading features for s10115
Loading features for s10117
Loading features for s10121
Loading features for s10125
Loading features for s10138
Loading features for s10139
Loading features for s10141
Loading features for s10144
Loading features for s10145
Loading features for s10148
Loading features for s10153
Loading features for s10156
Loading features for s10158
Loading features for s10159
Loading features for s10160
Loading features for s10165
Loading features for s10173
Loading features for s10177
Loading features for s10178
Loading features for s10180
Loading features for s10181
Loading features for s10183
Loading features for

# MW classifier

In [1]:
import os
import numpy as np
import pandas as pd

data_root = "/gpfs1/pi/djangraw/mindless_reading/data"

df = pd.read_csv(
    os.path.join(data_root, f"all_subjects_R_eeget_features_mw_fixed.csv")
)

In [2]:
df.columns.tolist()

['eye',
 'fix_num',
 'fix_word_num',
 'norm_fix_word_num',
 'norm_in_word_reg',
 'norm_out_word_reg',
 'zipf_fixdur_corr',
 'word_length_fixdur_corr',
 'norm_total_viewing',
 'fix_dispersion',
 'weighted_vergence',
 'blink_num',
 'blink_dur',
 'blink_freq',
 'ibi',
 'ibi_baseline',
 'sacc_num',
 'norm_sacc_num',
 'sacc_length',
 'horizontal_sacc',
 'pupil_baseline',
 'pupil',
 'pupil_mean',
 'pupil_slope',
 'page_norm_pupil',
 'page_norm_pupil_mean',
 'reading',
 'run',
 'page',
 'page_start',
 'page_end',
 'win_start',
 'win_end',
 'win_dur',
 'task_start',
 'is_MWreported',
 'is_MWvalid',
 'mw_onset',
 'mw_offset',
 'Fp1_theta1',
 'Fp1_theta2',
 'Fp1_alpha1',
 'Fp1_alpha2',
 'Fp1_beta1',
 'Fp1_beta2',
 'Fp1_gamma1',
 'Fp1_gamma2',
 'AF7_theta1',
 'AF7_theta2',
 'AF7_alpha1',
 'AF7_alpha2',
 'AF7_beta1',
 'AF7_beta2',
 'AF7_gamma1',
 'AF7_gamma2',
 'AF3_theta1',
 'AF3_theta2',
 'AF3_alpha1',
 'AF3_alpha2',
 'AF3_beta1',
 'AF3_beta2',
 'AF3_gamma1',
 'AF3_gamma2',
 'F1_theta1',
 'F1_th

In [4]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import LeaveOneGroupOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    roc_auc_score,
    precision_score,
)
from sklearn.base import clone

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# ==========================
# Config & data loading
# ==========================
data_root = "/gpfs1/pi/djangraw/mindless_reading/data"
df = pd.read_csv(
    os.path.join(data_root, f"all_subjects_R_eeget_features_mw_fixed.csv")
)
# get the feature columns for EEG and eye-tracking
eye_features = [
 'norm_fix_word_num',
 'norm_in_word_reg',
 'norm_out_word_reg',
 'zipf_fixdur_corr',
 'word_length_fixdur_corr',
 'norm_total_viewing',
 'fix_dispersion',
 'weighted_vergence',
 'norm_sacc_num',
 'sacc_length',
 'horizontal_sacc',
 'pupil_slope',
 'page_norm_pupil_mean']
eye_idx = [df.columns.get_loc(feat) for feat in eye_features]

band_names = [
    "theta1", "theta2", "alpha1", "alpha2",
    "beta1", "beta2", "gamma1", "gamma2",
]
eeg_idx = [i for i, col in enumerate(df.columns) if any(col.endswith(f"_{band}") for band in band_names)]

X_eye = df.iloc[:, eye_idx].values
X_eeg = df.iloc[:, eeg_idx].values
X = np.hstack([X_eeg, X_eye])  # combined features
y = df["label"].values
groups = df["subject_id"].values  # for LOSO

# ==========================
# Models
# ==========================
pca_variance = 0.95  # keep 95% variance; or set an int for n_components
base_models = {
    "logreg": LogisticRegression(max_iter=1000, n_jobs=-1),
    "linear_svc": LinearSVC(),  # no probas, but we can use decision_function
    "rbf_svc": SVC(kernel="rbf", probability=True),
    "random_forest": RandomForestClassifier(
        n_estimators=200,
        max_depth=None,
        n_jobs=-1,
        random_state=42,
    ),
    "gradient_boosting": GradientBoostingClassifier(random_state=42),
    "knn": KNeighborsClassifier(n_neighbors=5),
    "mlp": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42),
}

# ==========================
# LOSO + prediction logging
# ==========================
logo = LeaveOneGroupOut()

all_preds = []  # per-sample predictions across all folds / models / feature sets

for feature_set, X_data in zip(
    ["EEG + Eye", "EEG", "Eye"],
    [X, X_eeg, X_eye],
):
    # drop naN features (if any) for this feature set
    valid_cols = ~np.isnan(X_data).any(axis=0)
    X_data = X_data[:, valid_cols]
    
    print(f"\n========== Feature set: {feature_set} ==========")

    for model_name, base_clf in base_models.items():
        print(f"\n=== Model: {model_name} ===")

        acc_list = []
        f1_list = []
        prec_list = []
        auc_list = []

        for train_idx, test_idx in logo.split(X_data, y, groups=groups):
            subj_test = np.unique(groups[test_idx])
            assert len(subj_test) == 1  # LOSO: only one subject held out
            subj_test = subj_test[0]

            X_train, X_test = X_data[train_idx], X_data[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]

            # fresh clone of classifier
            clf = clone(base_clf)

            # Pipeline: StandardScaler -> PCA -> classifier
            pipe = Pipeline([
                ("scaler", StandardScaler()),
                ("pca", PCA(n_components=pca_variance)),
                ("clf", clf),
            ])

            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)

            # scores for AUC
            y_scores = None
            if hasattr(pipe, "predict_proba"):
                try:
                    y_scores = pipe.predict_proba(X_test)[:, 1]
                except Exception:
                    y_scores = None
            elif hasattr(pipe, "decision_function"):
                try:
                    y_scores = pipe.decision_function(X_test)
                except Exception:
                    y_scores = None

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred, zero_division=0)

            if (y_scores is not None) and (np.unique(y_test).size == 2):
                auc = roc_auc_score(y_test, y_scores)
            else:
                auc = np.nan

            acc_list.append(acc)
            f1_list.append(f1)
            prec_list.append(prec)
            auc_list.append(auc)

            # log per-subject fold summary
            print(
                f"  Subject {subj_test}: "
                f"acc={acc:.3f}, prec={prec:.3f}, f1={f1:.3f}, "
                f"auc={auc if not np.isnan(auc) else float('nan'):.3f}"
            )

            # store per-sample predictions for this fold
            for i, idx in enumerate(test_idx):
                all_preds.append({
                    "feature_set": feature_set,
                    "model": model_name,
                    "test_subject": subj_test,
                    "sample_idx": int(idx),
                    "y_true": int(y_test[i]),
                    "y_pred": int(y_pred[i]),
                    "y_score": float(y_scores[i]) if y_scores is not None else np.nan,
                })
            
            # break  # TEMP: only do one fold for testing; REMOVE for full LOSO

        # per-model, per-feature-set mean over subjects
        print(
            f"Mean over subjects — acc={np.mean(acc_list):.3f}, "
            f"prec={np.mean(prec_list):.3f}, "
            f"f1={np.mean(f1_list):.3f}, "
            f"auc={np.nanmean(auc_list):.3f}"
        )

# ==========================
# Build DataFrame of predictions
# ==========================
df_preds = pd.DataFrame(all_preds)

results_dir = os.path.join(data_root, "ml_results")
os.makedirs(results_dir, exist_ok=True)

pred_file = os.path.join(
    results_dir,
    f"loso_predictions_mw_fixed.csv",
)
df_preds.to_csv(pred_file, index=False)
print(f"\nSaved per-sample LOSO predictions to {pred_file}")

# ==========================
# Final metrics from saved predictions (overall)
# ==========================
rows = []
for (feature_set, model_name), g in df_preds.groupby(["feature_set", "model"]):
    y_true = g["y_true"].values
    y_pred = g["y_pred"].values
    y_score = g["y_score"].values

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)

    # AUC over all samples for this (feature_set, model)
    if np.unique(y_true).size == 2 and not np.all(np.isnan(y_score)):
        auc = roc_auc_score(y_true, y_score)
    else:
        auc = np.nan

    rows.append({
        "feature_set": feature_set,
        "model": model_name,
        "accuracy": acc,
        "precision": prec,
        "f1": f1,
        "auc": auc,
        "n_samples": len(g),
    })

df_metrics = pd.DataFrame(rows)

metrics_file = os.path.join(
    results_dir,
    f"loso_metrics_mw_fixed.csv",
)
df_metrics.to_csv(metrics_file, index=False)

print(f"\nSaved aggregated metrics to {metrics_file}")
print("\n=== Overall summary ===")
print(df_metrics.sort_values(["feature_set", "accuracy"], ascending=[True, False]))

# ==========================
# Subject-level metrics
# ==========================
rows_subj = []
for (feature_set, model_name, subj), g in df_preds.groupby(
    ["feature_set", "model", "test_subject"]
):
    y_true = g["y_true"].values
    y_pred = g["y_pred"].values
    y_score = g["y_score"].values

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)

    if np.unique(y_true).size == 2 and not np.all(np.isnan(y_score)):
        auc = roc_auc_score(y_true, y_score)
    else:
        auc = np.nan

    rows_subj.append({
        "feature_set": feature_set,
        "model": model_name,
        "subject_id": subj,
        "accuracy": acc,
        "precision": prec,
        "f1": f1,
        "auc": auc,
        "n_samples": len(g),
    })

df_subject_metrics = pd.DataFrame(rows_subj)

subj_metrics_file = os.path.join(
    results_dir,
    f"loso_subject_metrics_mw_fixed.csv",
)
df_subject_metrics.to_csv(subj_metrics_file, index=False)

print(f"\nSaved subject-level metrics to {subj_metrics_file}")





=== Model: logreg ===
  Subject s10014: acc=0.558, prec=0.565, f1=0.531, auc=0.537
  Subject s10052: acc=0.533, prec=1.000, f1=0.125, auc=0.453
  Subject s10059: acc=0.857, prec=0.857, f1=0.857, auc=0.918
  Subject s10073: acc=0.679, prec=0.619, f1=0.743, auc=0.668
  Subject s10081: acc=0.729, prec=0.789, f1=0.698, auc=0.760
  Subject s10084: acc=0.643, prec=0.875, f1=0.483, auc=0.728
  Subject s10085: acc=0.712, prec=0.824, f1=0.651, auc=0.717
  Subject s10089: acc=0.545, prec=0.600, f1=0.375, auc=0.661
  Subject s10094: acc=1.000, prec=1.000, f1=1.000, auc=1.000
  Subject s10100: acc=0.484, prec=0.467, f1=0.304, auc=0.454
  Subject s10103: acc=0.429, prec=0.455, f1=0.556, auc=0.551
  Subject s10110: acc=0.594, prec=0.636, f1=0.519, auc=0.613
  Subject s10111: acc=0.484, prec=0.492, f1=0.652, auc=0.446
  Subject s10115: acc=0.518, prec=0.520, f1=0.491, auc=0.545
  Subject s10117: acc=0.500, prec=0.500, f1=0.154, auc=0.529
  Subject s10121: acc=0.500, prec=0.500, f1=0.632, auc=0.520




  Subject s10110: acc=0.594, prec=1.000, f1=0.316, auc=0.645
  Subject s10111: acc=0.500, prec=0.500, f1=0.667, auc=0.405
  Subject s10115: acc=0.589, prec=0.586, f1=0.596, auc=0.506
  Subject s10117: acc=0.477, prec=0.476, f1=0.465, auc=0.461
  Subject s10121: acc=0.500, prec=0.500, f1=0.667, auc=0.423
  Subject s10125: acc=0.625, prec=0.714, f1=0.526, auc=0.707
  Subject s10138: acc=0.444, prec=0.444, f1=0.444, auc=0.414
  Subject s10139: acc=0.639, prec=0.667, f1=0.606, auc=0.741
  Subject s10141: acc=0.679, prec=0.632, f1=0.727, auc=0.663
  Subject s10144: acc=0.714, prec=0.667, f1=0.750, auc=0.673
  Subject s10145: acc=0.556, prec=0.579, f1=0.478, auc=0.580
  Subject s10148: acc=0.529, prec=0.520, f1=0.619, auc=0.606
  Subject s10153: acc=0.435, prec=0.455, f1=0.536, auc=0.522
  Subject s10156: acc=0.532, prec=0.524, f1=0.603, auc=0.569
  Subject s10158: acc=0.438, prec=0.463, f1=0.581, auc=0.525
  Subject s10159: acc=0.533, prec=0.556, f1=0.417, auc=0.601
  Subject s10160: acc=0.



  Subject s10014: acc=0.519, prec=0.533, f1=0.390, auc=0.429




  Subject s10052: acc=0.500, prec=0.500, f1=0.286, auc=0.609




  Subject s10059: acc=0.643, prec=0.750, f1=0.545, auc=0.776




  Subject s10073: acc=0.714, prec=0.750, f1=0.692, auc=0.796




  Subject s10081: acc=0.688, prec=0.846, f1=0.595, auc=0.776




  Subject s10084: acc=0.619, prec=0.667, f1=0.556, auc=0.692




  Subject s10085: acc=0.654, prec=0.750, f1=0.571, auc=0.759




  Subject s10089: acc=0.636, prec=0.667, f1=0.600, auc=0.537




  Subject s10094: acc=0.500, prec=0.500, f1=0.500, auc=0.750




  Subject s10100: acc=0.548, prec=0.600, f1=0.391, auc=0.477




  Subject s10103: acc=0.571, prec=0.545, f1=0.667, auc=0.571




  Subject s10110: acc=0.531, prec=0.533, f1=0.516, auc=0.496




  Subject s10111: acc=0.581, prec=0.561, f1=0.639, auc=0.692




  Subject s10115: acc=0.589, prec=0.576, f1=0.623, auc=0.587




  Subject s10117: acc=0.455, prec=0.444, f1=0.400, auc=0.436




  Subject s10121: acc=0.571, prec=0.562, f1=0.600, auc=0.612




  Subject s10125: acc=0.521, prec=0.511, f1=0.667, auc=0.597




  Subject s10138: acc=0.537, prec=0.562, f1=0.419, auc=0.664




  Subject s10139: acc=0.833, prec=0.800, f1=0.842, auc=0.929




  Subject s10141: acc=0.536, prec=0.571, f1=0.381, auc=0.526




  Subject s10144: acc=0.786, prec=0.750, f1=0.800, auc=0.816




  Subject s10145: acc=0.426, prec=0.400, f1=0.340, auc=0.412




  Subject s10148: acc=0.529, prec=0.529, f1=0.529, auc=0.578




  Subject s10153: acc=0.478, prec=0.467, f1=0.368, auc=0.514




  Subject s10156: acc=0.758, prec=0.808, f1=0.737, auc=0.862
  Subject s10158: acc=0.656, prec=0.614, f1=0.711, auc=0.718




  Subject s10159: acc=0.550, prec=0.636, f1=0.341, auc=0.637




  Subject s10160: acc=0.654, prec=0.643, f1=0.667, auc=0.621




  Subject s10165: acc=0.474, prec=0.000, f1=0.000, auc=0.499




  Subject s10173: acc=0.700, prec=1.000, f1=0.571, auc=0.680




  Subject s10177: acc=0.575, prec=0.714, f1=0.370, auc=0.532




  Subject s10178: acc=0.538, prec=0.556, f1=0.455, auc=0.698




  Subject s10180: acc=0.677, prec=0.634, f1=0.722, auc=0.786




  Subject s10181: acc=0.515, prec=0.511, f1=0.590, auc=0.514




  Subject s10183: acc=0.500, prec=0.500, f1=0.667, auc=0.651




  Subject s10185: acc=0.750, prec=0.667, f1=0.800, auc=0.500




  Subject s10186: acc=0.769, prec=0.706, f1=0.800, auc=0.876




  Subject s10188: acc=0.524, prec=0.516, f1=0.615, auc=0.615




  Subject s10192: acc=0.606, prec=0.564, f1=0.705, auc=0.662




  Subject s10195: acc=0.576, prec=0.568, f1=0.600, auc=0.594




  Subject s10196: acc=0.900, prec=0.833, f1=0.909, auc=1.000




  Subject s10197: acc=0.500, prec=0.500, f1=0.526, auc=0.667




  Subject s10200: acc=0.600, prec=0.632, f1=0.545, auc=0.595




  Subject s10202: acc=0.500, prec=0.500, f1=0.400, auc=0.611
Mean over subjects — acc=0.597, prec=0.602, f1=0.560, auc=0.644

Saved per-sample LOSO predictions to /gpfs1/pi/djangraw/mindless_reading/data/ml_results/loso_predictions_mw_fixed.csv

Saved aggregated metrics to /gpfs1/pi/djangraw/mindless_reading/data/ml_results/loso_metrics_mw_fixed.csv

=== Overall summary ===
   feature_set              model  accuracy  precision        f1       auc  \
2          EEG         linear_svc  0.507711   0.507898  0.501801  0.504276   
3          EEG             logreg  0.505338   0.505441  0.500599  0.503760   
6          EEG            rbf_svc  0.505338   0.506089  0.472819  0.492686   
5          EEG      random_forest  0.502966   0.502365  0.558947  0.513208   
4          EEG                mlp  0.498221   0.498343  0.516018  0.501843   
0          EEG  gradient_boosting  0.489917   0.490608  0.508009  0.486597   
1          EEG                knn  0.482206   0.483184  0.496830  0.473006   