In [1]:
import os, json, random
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import joblib
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [None]:
from pathlib import Path

# -------------------------------------------------
# Correct root (notebook is 2 levels deep)
# -------------------------------------------------
PROJECT_ROOT = Path.cwd().resolve().parents[2]

DATASETS_DIR = PROJECT_ROOT / "datasets"
RESULTS_DIR = PROJECT_ROOT / "results"

# -------------------------------------------------
# Tables location (NEW STRUCTURE)
# -------------------------------------------------
TABLES_DIR = RESULTS_DIR / "tables" / "backbone_benchmarking"
TABLES_DIR.mkdir(parents=True, exist_ok=True)

# -------------------------------------------------
# Models location (NEW STRUCTURE)
# -------------------------------------------------
MODELS_ROOT = PROJECT_ROOT / "models" / "backbone_benchmark_models"
EEGNET_DIR = MODELS_ROOT / "EEGNet"
EEGNET_DIR.mkdir(parents=True, exist_ok=True)

print("Project Root:", PROJECT_ROOT)
print("Tables ->", TABLES_DIR)
print("Models ->", MODELS_ROOT)

In [3]:
def seed_everything(seed=42):
    import os, random
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED']=str(seed)
seed_everything(42)

In [None]:
DATA_PATH = DATASETS_DIR / "physionet_dataset" / "processed" / "preprocessed.npz"

if not DATA_PATH.exists():
    raise FileNotFoundError(
        f"Preprocessed file not found at {DATA_PATH}"
    )

d = np.load(DATA_PATH, allow_pickle=True)

X = d["X"]
y = d["y"]

In [5]:
print("X.shape =", X.shape)
print("y.shape =", y.shape)

X.shape = (30, 64, 561)
y.shape = (30,)


In [6]:
USE_CSP = True
CSP_COMPONENTS = 8
if USE_CSP:
    from mne.decoding import CSP
    csp = CSP(n_components=CSP_COMPONENTS, log=True, norm_trace=False)
    X_csp = csp.fit_transform(X, y)  
    print("Computed CSP features:", X_csp.shape)
else:
    X_csp = None

Computing rank from data with rank=None
    Using tolerance 12 (2.2e-16 eps * 64 dim * 8.3e+14  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating class=0 covariance using EMPIRICAL
Done.
Estimating class=1 covariance using EMPIRICAL
Done.
Computed CSP features: (30, 8)


In [7]:
classical_models = {
    'LDA': Pipeline([('sc', StandardScaler()), ('clf', LinearDiscriminantAnalysis())]),
    'SVM-rbf': Pipeline([('sc', StandardScaler()), ('clf', SVC(kernel='rbf', C=1, probability=True))]),
    'RandomForest': RandomForestClassifier(n_estimators=200, random_state=42),
    'MLP': Pipeline([('sc', StandardScaler()), ('clf', MLPClassifier(hidden_layer_sizes=(100,), max_iter=400))])
}

CV_SPLITS = 5
results_classical = {}
if X_csp is not None:
    skf = StratifiedKFold(n_splits=CV_SPLITS, shuffle=True, random_state=42)
    for name, clf in classical_models.items():
        accs=[]; f1s=[]
        for tr, te in skf.split(X_csp, y):
            clf.fit(X_csp[tr], y[tr])
            p = clf.predict(X_csp[te])
            accs.append(accuracy_score(y[te], p))
            f1s.append(f1_score(y[te], p, average='weighted'))
        results_classical[name] = {'acc_mean': float(np.mean(accs)), 'acc_std': float(np.std(accs)),
                                   'f1_mean': float(np.mean(f1s)), 'f1_std': float(np.std(f1s))}
        print(f"[CSP] {name}: acc={results_classical[name]['acc_mean']:.3f} ± {results_classical[name]['acc_std']:.3f}, f1={results_classical[name]['f1_mean']:.3f}")



[CSP] LDA: acc=1.000 ± 0.000, f1=1.000
[CSP] SVM-rbf: acc=1.000 ± 0.000, f1=1.000
[CSP] RandomForest: acc=0.967 ± 0.067, f1=0.966
[CSP] MLP: acc=1.000 ± 0.000, f1=1.000


In [8]:
results_df = pd.DataFrame(results_classical).T
results_df.to_csv(TABLES_DIR / "classical_ml_kfold_results.csv")

print("Saved classical ML results.")

Saved classical ML results.


In [9]:
class EEGDataset(Dataset):
    def __init__(self,X,y): self.X=X.astype(np.float32); self.y=y.astype(np.int64)
    def __len__(self): return len(self.y)
    def __getitem__(self,idx): return self.X[idx], self.y[idx]

class EEGNet(nn.Module):
    def __init__(self, chans, samples, classes=2, kern_len=64, F1=8, D=2, F2=16, dropout=0.5):
        super().__init__()
        self.first = nn.Sequential(
            nn.Conv2d(1, F1, (1, kern_len), padding=(0, kern_len//2), bias=False),
            nn.BatchNorm2d(F1),
            nn.Conv2d(F1, F1*D, (chans, 1), bias=False),
            nn.BatchNorm2d(F1*D),
            nn.ELU(),
            nn.AvgPool2d((1,4)),
            nn.Dropout(dropout)
        )
        self.second = nn.Sequential(
            nn.Conv2d(F1*D, F2, (1, 16), bias=False),
            nn.BatchNorm2d(F2),
            nn.ELU(),
            nn.AvgPool2d((1,8)),
            nn.Flatten()
        )
        with torch.no_grad():
            dummy=torch.zeros(1,1,chans,samples)
            feat=self.first(dummy); feat=self.second(feat)
            hid_dim=feat.shape[1]
        self.classify = nn.Linear(hid_dim, classes)
    def forward(self,x):
        x = x.unsqueeze(1); x = self.first(x); x = self.second(x); return self.classify(x)

def train_epoch(model, loader, opt, loss_fn, device='cpu'):
    model.train(); losses=[]
    for xb,yb in loader:
        xb,yb=xb.to(device), yb.to(device)
        opt.zero_grad(); logits=model(xb); loss=loss_fn(logits,yb); loss.backward(); opt.step()
        losses.append(loss.item())
    return float(np.mean(losses))

def eval_model_preds(model, loader, device='cpu'):
    model.eval(); ys=[]; preds=[]
    with torch.no_grad():
        for xb,yb in loader:
            xb=xb.to(device); logits=model(xb)
            p=logits.argmax(dim=1).cpu().numpy()
            preds.extend(p.tolist()); ys.extend(yb.numpy().tolist())
    return np.array(ys), np.array(preds)

In [10]:
y = d['y'].astype(int)
print("unique labels (raw):", np.unique(y))
print("min, max:", y.min(), y.max())

unique labels (raw): [0 1]
min, max: 0 1


In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
n_epochs, n_chans, n_times = X.shape

tr_idx, te_idx = train_test_split(
    np.arange(n_epochs),
    test_size=0.2,
    stratify=y,
    random_state=42
)

train_loader = DataLoader(
    EEGDataset(X[tr_idx], y[tr_idx]),
    batch_size=32,
    shuffle=True
)

test_loader = DataLoader(
    EEGDataset(X[te_idx], y[te_idx]),
    batch_size=64,
    shuffle=False
)

model = EEGNet(n_chans, n_times, classes=len(np.unique(y))).to(device)
opt = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()

MODEL_PATH = EEGNET_DIR / "eegnet_best.pth"

EPOCHS = 25
best_acc = 0
best_epoch = 0

for ep in range(1, EPOCHS + 1):
    tr_loss = train_epoch(model, train_loader, opt, loss_fn, device)
    ys, preds = eval_model_preds(model, test_loader, device)

    acc = accuracy_score(ys, preds)
    f1v = f1_score(ys, preds, average='weighted')

    if acc > best_acc:
        best_acc = acc
        best_epoch = ep
        torch.save(model.state_dict(), MODEL_PATH)

    if ep == 1 or ep % 5 == 0:
        print(f"[EEGNet] ep{ep}: tr_loss={tr_loss:.4f}, test_acc={acc:.4f}, f1={f1v:.4f}")

print("EEGNet best acc:", best_acc, "at epoch", best_epoch)
print(device)

[EEGNet] ep1: tr_loss=0.7282, test_acc=0.5000, f1=0.4857
[EEGNet] ep5: tr_loss=0.6414, test_acc=0.5000, f1=0.4857
[EEGNet] ep10: tr_loss=0.5989, test_acc=0.3333, f1=0.2500
[EEGNet] ep15: tr_loss=0.5415, test_acc=0.6667, f1=0.6667
[EEGNet] ep20: tr_loss=0.4926, test_acc=0.6667, f1=0.6667
[EEGNet] ep25: tr_loss=0.4355, test_acc=0.6667, f1=0.6667
EEGNet best acc: 0.6666666666666666 at epoch 14
cuda


In [12]:
eeg_summary = {
    "best_accuracy": float(best_acc),
    "best_epoch": int(best_epoch)
}

pd.DataFrame([eeg_summary]).to_csv(
    TABLES_DIR / "eegnet_holdout_results.csv",
    index=False
)

print("Saved EEGNet holdout results.")

Saved EEGNet holdout results.


In [13]:
y = d["y"]
unique, counts = np.unique(y, return_counts=True)
print("Labels:", dict(zip(unique, counts)))

Labels: {np.int64(0): np.int64(14), np.int64(1): np.int64(16)}
