## 1 - Evaluation

In [None]:
# load dataset
import sys
from pathlib import Path
PROJECT_ROOT = Path("..").resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src.dataset import build_dataset

DATA_DIR = Path("../data/MODMA")
META_PATH = DATA_DIR / "subjects_information_EEG_128channels_resting_lanzhou_2015.xlsx"

df = build_dataset(DATA_DIR, META_PATH)


In [None]:
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

X = df.drop(columns=["subject_id", "label"])
y = df["label"].astype(int)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

pipe_lr = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(
        penalty="l2",
        solver="liblinear",
        class_weight="balanced",
        max_iter=2000,
        random_state=42
    ))
])

scores = cross_val_score(pipe_lr, X, y, cv=cv, scoring="roc_auc")
print("LR ROC AUC per fold:", np.round(scores, 3))
print("LR Mean ROC AUC:", scores.mean(), "+/-", scores.std())


## 2 - Feature Ablations

In [None]:
def eval_features(X_sub, y, cv):
    scores = cross_val_score(pipe_lr, X_sub, y, cv=cv, scoring="roc_auc")
    return scores.mean(), scores.std()


In [None]:
cols = X.columns

band_cols = {
    "delta": [c for c in cols if "rbp_delta_" in c],
    "theta": [c for c in cols if "rbp_theta_" in c],
    "alpha": [c for c in cols if "rbp_alpha_" in c],
    "beta":  [c for c in cols if "rbp_beta_"  in c],
    "gamma": [c for c in cols if "rbp_gamma_" in c],
}

asym_cols = [c for c in cols if "asym" in c]
all_rbp_cols = [c for c in cols if c.startswith("rbp_")]


In [None]:
results = []

m, s = eval_features(X, y, cv)
results.append(("All features", m, s))

m, s = eval_features(X[all_rbp_cols], y, cv)
results.append(("All bandpower (rbp_*)", m, s))

if len(asym_cols) > 0:
    m, s = eval_features(X[asym_cols], y, cv)
    results.append(("Asymmetry only", m, s))

for band, band_list in band_cols.items():
    m, s = eval_features(X[band_list], y, cv)
    results.append((f"{band} only", m, s))

# pretty print
for name, m, s in results:
    print(f"{name:25s}  AUC={m:.3f} +/- {s:.3f}")


In [None]:
alpha_cols = band_cols["alpha"]

pipe_alpha = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(
        penalty="l2",
        solver="liblinear",
        class_weight="balanced",
        max_iter=1000,
    )),
])

pipe_alpha.fit(X[alpha_cols], y)

In [None]:
import pandas as pd
coefs = pipe_alpha.named_steps["clf"].coef_[0]

alpha_importance = (
    pd.Series(coefs, index=alpha_cols)
    .sort_values(key=np.abs, ascending=False)
)

alpha_importance.head(15)


In [None]:
alpha_asym = [c for c in alpha_cols if "asym" in c]
alpha_power = [c for c in alpha_cols if "asym" not in c]

print("Alpha power AUC:", eval_features(X[alpha_power], y, cv))
if alpha_asym:
    print("Alpha asymmetry AUC:", eval_features(X[alpha_asym], y, cv))


In [None]:
from sklearn.model_selection import StratifiedKFold
import numpy as np

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
coef_mat = []

for train_idx, test_idx in cv.split(X, y):
    pipe_alpha.fit(X.iloc[train_idx][alpha_cols], y.iloc[train_idx])
    coef_mat.append(pipe_alpha.named_steps["clf"].coef_[0])

coef_mat = np.vstack(coef_mat)

coef_mean = coef_mat.mean(axis=0)
coef_std = coef_mat.std(axis=0)

alpha_stability = (
    pd.DataFrame({
        "mean_coef": coef_mean,
        "std_coef": coef_std,
    }, index=alpha_cols)
    .assign(abs_mean=lambda df: df.mean_coef.abs())
    .sort_values("abs_mean", ascending=False)
)

alpha_stability.head(10)


In [None]:
alpha_stability["sign"] = np.sign(alpha_stability["mean_coef"])

alpha_stability.head(10)

In [None]:
top10 = alpha_stability.head(10).index.tolist()

scores_top10 = cross_val_score(
    pipe_alpha,
    X[top10],
    y,
    cv=cv,
    scoring="roc_auc"
)

scores_top10.mean(), scores_top10.std()
