In [None]:
"""
Left vs Right hand EEG decoder (single-sample RidgeClassifierCV)
================================================================
• Pre-processing: 0.1–20 Hz band-pass → 50 Hz resample → epochs (−0.2…+0.3 s)
  → baseline correction → RobustScaler (fit on train only).
• Model: RidgeClassifierCV (alpha grid 1e-2 … 1e8) with StratifiedKFold(20, shuffle).
• Latency search: 5-fold stratified CV over every sample (0.5 s @ 50 Hz = 25 latencies).
• Train ≈ 85 % / test ≈ 15 % split.  Prints hold-out balanced accuracy.
"""
# ---------------------------------------------------------------------
# Imports
# ---------------------------------------------------------------------
import numpy as np
import mne
from pathlib import Path
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import RidgeClassifierCV
from sklearn.model_selection import (StratifiedKFold, train_test_split,
                                     cross_val_score)
from sklearn.metrics import balanced_accuracy_score

# ---------------------------------------------------------------------
# 0. Load raw EEG  (adapt to your format: EDF, BDF, FIF, ...)
# ---------------------------------------------------------------------
RAW_FILE     = Path("subject01_raw.edf")
EVENT_CH     = "STI 014"        # digital trigger channel
LEFT_CODE    = 1                # event code for left-hand key
RIGHT_CODE   = 2                # event code for right-hand key

raw = mne.io.read_raw_edf(RAW_FILE, preload=True)
raw.set_montage("standard_1020")                  # ensures channel positions

# ---------------------------------------------------------------------
# 1. Light pre-processing
# ---------------------------------------------------------------------
raw.notch_filter([50, 100])                       # mains hum (optional)
raw.filter(l_freq=0.1, h_freq=20., fir_design="firwin")
raw.resample(50)                                  # anti-alias filter auto-applied

# ---------------------------------------------------------------------
# 2. Epochs:  −0.2 … +0.3 s around each press
# ---------------------------------------------------------------------
events = mne.find_events(raw, stim_channel=EVENT_CH, shortest_event=1)

# build event-ID dict
event_id = dict(left=LEFT_CODE, right=RIGHT_CODE)

epochs = mne.Epochs(raw, events, event_id=event_id,
                    tmin=-0.2, tmax=0.3,
                    baseline=(-0.2, 0.0),
                    preload=True, detrend=None)

X_cube = epochs.get_data()                 # (n_trials, n_channels, n_times)
y = np.where(epochs.events[:, 2] == LEFT_CODE, -1, 1)   # -1 left, 1 right

n_trials, n_ch, n_t = X_cube.shape
latencies = np.arange(n_t)                 # 0 … 24 (25 samples @ 50 Hz)

# ---------------------------------------------------------------------
# 3. Train-test split  (85 % / 15 %)
# ---------------------------------------------------------------------
X_train_cube, X_test_cube, y_train, y_test = train_test_split(
        X_cube, y, test_size=0.15, stratify=y, random_state=42)

# ---------------------------------------------------------------------
# 4. Build model pipeline
# ---------------------------------------------------------------------
alphas = np.logspace(-2, 8, 11)            # 1e-2 … 1e8
inner_cv = StratifiedKFold(n_splits=20, shuffle=True, random_state=0)

ridge_pipe = make_pipeline(
        RobustScaler(with_centering=True, with_scaling=True,
                     quantile_range=(25, 75)),
        RidgeClassifierCV(alphas=alphas,
                          cv=inner_cv,
                          scoring='balanced_accuracy',
                          fit_intercept=True)
)

# ---------------------------------------------------------------------
# 5. Latency sweep with 5-fold stratified CV on the **training** set
# ---------------------------------------------------------------------
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

best_lat, best_score = None, 0
for t in latencies:
    X_t = X_train_cube[:, :, t]            # slice single time-sample
    cv_scores = cross_val_score(ridge_pipe, X_t, y_train,
                                cv=outer_cv,
                                scoring='balanced_accuracy',
                                n_jobs=-1)
    mean_bacc = cv_scores.mean()
    if mean_bacc > best_score:
        best_score, best_lat = mean_bacc, t

print(f"Best latency sample = {best_lat}  "
      f"({(best_lat - (n_t//2)) / 50:.0f} ms rel press)  "
      f"CV balanced-acc = {best_score:.3f}")

# ---------------------------------------------------------------------
# 6. Re-fit on full training set (85 %) with best latency, evaluate on test
# ---------------------------------------------------------------------
X_train_best = X_train_cube[:, :, best_lat]
X_test_best  = X_test_cube[:,  :, best_lat]

ridge_pipe.fit(X_train_best, y_train)
y_pred = ridge_pipe.predict(X_test_best)
test_bacc = balanced_accuracy_score(y_test, y_pred)

print(f"**Hold-out balanced accuracy**  = {test_bacc:.3f}")