# MODMA Depression EEG — Modeling

This notebook runs the **portfolio-facing** results:
1. Feature-group ablation (ROC AUC)
2. Coefficient stability for the best group (typically alpha bandpower)
3. Compact model using top-k stable features

Figures are saved into `assets/figures/`.


In [None]:
from pathlib import Path
import pandas as pd

from modma_depression_eeg.dataset_builder import build_dataset
from modma_depression_eeg import modma_modeling
from modma_depression_eeg import modma_plotting

FIG_DIR = Path("../assets/figures")

DATA_DIR = Path("../data/MODMA")
META_PATH = DATA_DIR / "subjects_information_EEG_128channels_resting_lanzhou_2015.xlsx"

df = build_dataset(DATA_DIR, META_PATH)
df.shape


In [None]:
# 1) Feature ablation
ablation = modma_modeling.run_feature_ablation(df)
display(ablation)

modma_plotting.plot_ablation_results(
    ablation,
    savepath=FIG_DIR / "ablation_auc.png",
)


In [None]:
# Pick the best-performing key from ablation
best_key = ablation.iloc[0]["key"]
best_key


In [None]:
# 2) Coefficient stability on the best group
X, y = modma_modeling.split_xy(df)
groups = modma_modeling.get_feature_groups(X.columns)
best_cols = groups[best_key]

cv = modma_modeling.make_cv(n_splits=5, random_state=42)
pipe = modma_modeling.make_logreg_pipeline()

stability = modma_modeling.coef_stability(X, y, best_cols, cv=cv, pipe=pipe)
display(stability.head(15))

modma_plotting.plot_top_coefficients(
    stability,
    k=10,
    title=f"Top stable coefficients ({best_key})",
    savepath=FIG_DIR / "top_coefficients.png",
)


In [None]:
# 3) Compact model: evaluate top-k stable features
top10 = modma_modeling.top_k_features(stability, k=10)
res_top10 = modma_modeling.evaluate_feature_set(X, y, top10, cv=cv, pipe=pipe)

print(f"Top-10 AUC: {res_top10.mean:.3f} ± {res_top10.std:.3f}")
print("Top-10 features:")
for f in top10:
    print(" -", f)
