In [None]:
# ===== BNCI-only benchmark (CSP + classical + EEGNet with k-fold) =====
# Requirements: numpy, torch, sklearn, mne, pandas, joblib
import os, math, random, numpy as np, pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import joblib, torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# reproducibility
def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
seed_everything(42)

# ---------- load BNCI preprocessed ----------
BNCI_PREPRO = "preprocessed_BNCI.npz"
if not os.path.exists(BNCI_PREPRO):
    raise FileNotFoundError("preprocessed_BNCI.npz not found. Run BNCI extraction first.")
d = np.load(BNCI_PREPRO, allow_pickle=True)
X = d['X'].astype(np.float32)   # (N, n_ch, n_times)
y = d['y'].astype(int)
meta = dict(d.get('meta', {}))
print("Loaded BNCI:", X.shape, "labels:", dict(zip(*np.unique(y, return_counts=True))))

# ---------- small augmentation helper (on-the-fly) ----------
def random_augment_numpy(epoch):
    # epoch: n_ch x n_times
    e = epoch.copy()
    if np.random.rand() < 0.5:
        e = e + np.random.normal(0, 0.01, e.shape)   # noise
    if np.random.rand() < 0.4:
        shift = np.random.randint(-10, 11)
        e = np.roll(e, shift, axis=1)
    if np.random.rand() < 0.3:
        # channel dropout
        ch = e.shape[0]
        drop_mask = np.random.rand(ch) < 0.05
        e[drop_mask,:] = 0
    return e

# ---------- CSP + classical baseline (k-fold) ----------
use_csp = True
if use_csp:
    from mne.decoding import CSP
    # choose small number of components (<= n_ch)
    n_components = min(8, X.shape[1])
    csp = CSP(n_components=n_components, log=True, norm_trace=False)
    X_csp = csp.fit_transform(X, y)   # shape (N, n_components)
    print("CSP features:", X_csp.shape)
else:
    X_csp = None

classifiers = {
    'LDA': Pipeline([('sc', StandardScaler()), ('clf', LinearDiscriminantAnalysis())]),
    'SVM-rbf': Pipeline([('sc', StandardScaler()), ('clf', SVC(kernel='rbf', C=1, probability=True))]),
    'RandomForest': RandomForestClassifier(n_estimators=200, random_state=42),
    'MLP': Pipeline([('sc', StandardScaler()), ('clf', MLPClassifier(hidden_layer_sizes=(100,), max_iter=400))])
}

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
results_cls = {}
if X_csp is not None:
    for name, clf in classifiers.items():
        accs=[]; f1s=[]
        for tr,te in kf.split(X_csp, y):
            clf.fit(X_csp[tr], y[tr])
            p = clf.predict(X_csp[te])
            accs.append(accuracy_score(y[te], p)); f1s.append(f1_score(y[te], p, average='weighted'))
        results_cls[name] = {'acc_mean': np.mean(accs), 'acc_std': np.std(accs), 'f1_mean': np.mean(f1s)}
        print(f"[CSP] {name}: acc {results_cls[name]['acc_mean']:.3f} Â± {results_cls[name]['acc_std']:.3f}, f1 {results_cls[name]['f1_mean']:.3f}")

# save CSP artifact
if X_csp is not None:
    joblib.dump({'csp': csp, 'cls_results': results_cls}, 'bnci_classical_artifacts.pkl')

# ---------- EEGNet implementation (small, robust) ----------
class BNCI_Dataset(Dataset):
    def __init__(self, X, y, augment=False):
        self.X = X.astype(np.float32); self.y = y.astype(int)
        self.augment = augment
    def __len__(self): return len(self.y)
    def __getitem__(self, idx):
        x = self.X[idx]
        if self.augment:
            x = random_augment_numpy(x)
        return torch.tensor(x, dtype=torch.float32), torch.tensor(int(self.y[idx]), dtype=torch.long)

class EEGNet(nn.Module):
    def __init__(self, chans, samples, classes=2, kern_len=64, F1=8, D=2, F2=16, dropout=0.5):
        super().__init__()
        self.first = nn.Sequential(
            nn.Conv2d(1, F1, (1, kern_len), padding=(0, kern_len//2), bias=False),
            nn.BatchNorm2d(F1),
            nn.Conv2d(F1, F1*D, (chans, 1), bias=False),
            nn.BatchNorm2d(F1*D),
            nn.ELU(), nn.AvgPool2d((1,4)), nn.Dropout(dropout)
        )
        self.second = nn.Sequential(
            nn.Conv2d(F1*D, F2, (1, 16), bias=False),
            nn.BatchNorm2d(F2), nn.ELU(), nn.AvgPool2d((1,8)), nn.Flatten()
        )
        with torch.no_grad():
            dummy = torch.zeros(1,1,chans,samples)
            feat = self.first(dummy); feat = self.second(feat)
            hid_dim = feat.shape[1]
        self.classify = nn.Linear(hid_dim, classes)
    def forward(self,x):
        x = x.unsqueeze(1); x = self.first(x); x = self.second(x); return self.classify(x)

# ---------- k-fold EEGNet training ----------
device = 'cuda' if torch.cuda.is_available() else 'cpu'
n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_results = []
fold_idx = 0
for tr_idx, te_idx in kf.split(X, y):
    fold_idx += 1
    print(f"\n=== Fold {fold_idx}/{n_splits} ===")
    Xtr, Xte = X[tr_idx], X[te_idx]
    ytr, yte = y[tr_idx], y[te_idx]
    ds_tr = BNCI_Dataset(Xtr, ytr, augment=True)
    ds_te = BNCI_Dataset(Xte, yte, augment=False)
    loader_tr = DataLoader(ds_tr, batch_size=32, shuffle=True)
    loader_te = DataLoader(ds_te, batch_size=64, shuffle=False)
    chans, samples = X.shape[1], X.shape[2]
    num_classes = int(len(np.unique(y)))
    model = EEGNet(chans, samples, classes=num_classes).to(device)
    opt = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    loss_fn = nn.CrossEntropyLoss()
    best_acc = 0; best_state = None
    for ep in range(1, 31):  # 30 epochs per fold
        model.train(); losses=[]
        for xb,yb in loader_tr:
            xb,yb = xb.to(device), yb.to(device)
            opt.zero_grad(); logits = model(xb); loss = loss_fn(logits,yb); loss.backward(); opt.step()
            losses.append(loss.item())
        # eval
        model.eval()
        ys=[]; preds=[]
        with torch.no_grad():
            for xb,yb in loader_te:
                xb = xb.to(device)
                logits = model(xb)
                preds.extend(logits.argmax(dim=1).cpu().numpy()); ys.extend(yb.numpy())
        acc = accuracy_score(ys, preds); f1v = f1_score(ys, preds, average='weighted')
        if acc > best_acc:
            best_acc = acc; best_state = model.state_dict()
        if ep==1 or ep%5==0:
            print(f" ep{ep}: tr_loss={np.mean(losses):.4f}, val_acc={acc:.4f}, f1={f1v:.4f}")
    # save best for fold
    if best_state is not None:
        torch.save(best_state, f"eegnet_fold{fold_idx}_best.pth")
    print(f"Fold {fold_idx} best acc: {best_acc:.4f}")
    fold_results.append({'fold':fold_idx, 'best_acc':best_acc})
# summary
print("\n=== EEGNet k-fold results ===")
print(pd.DataFrame(fold_results).set_index('fold'))

# ---------- final summary table (combine CSP/classical + EEGNet mean) ----------
rows=[]
if X_csp is not None:
    for k,v in results_cls.items():
        rows.append({'model': k+' (CSP)', 'acc_mean': v['acc_mean'], 'acc_std': v['acc_std'], 'f1_mean': v['f1_mean']})
# EEGNet fold mean
eeg_mean = np.mean([r['best_acc'] for r in fold_results])
rows.append({'model': 'EEGNet (k-fold)', 'acc_mean': float(eeg_mean), 'acc_std': float(np.std([r['best_acc'] for r in fold_results])), 'f1_mean': None})
df = pd.DataFrame(rows).sort_values('acc_mean', ascending=False).reset_index(drop=True)
print("\n=== Final comparison ===\n", df)
df.to_csv('bnci_benchmark_summary.csv', index=False)
print("Saved bnci_benchmark_summary.csv and fold models eegnet_fold*_best.pth")


TypeError: iteration over a 0-d array