In [None]:
%load_ext autoreload
%autoreload 2
import torch
import random
import numpy as np
import optuna
from optuna.pruners import MedianPruner
from scipy.signal import butter, sosfiltfilt
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.feature_selection import mutual_info_classif
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from pyriemann.estimation import Covariances
from pyriemann.tangentspace import TangentSpace
from modules.competition_dataset import EEGDataset
from Models import FilterBankRTSClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

In [None]:
data_path = './data/mtcaic3'
lda_model_path = './checkpoints/mi/models/lda_mi.pkl'

# Add this at the beginning of your notebook, after imports
def set_random_seeds(seed=42):
    """Set random seeds for reproducibility"""

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_random_seeds(42)

In [23]:
def load_eeg_data(data_path, window_length, stride, tmin, eeg_channels):
    ds = EEGDataset(
        data_path,
        window_length=window_length,
        stride=stride,
        task="mi",
        split="train",
        data_fraction=0.4,
        tmin=tmin,
        eeg_channels=eeg_channels,
    )
    X = np.stack([x.numpy() for x, _ in ds])
    y = np.array([label[0] for _, label in ds])
    return X, y


# Optuna optimization
data_path = "./data/mtcaic3"
cv_folds = 3

window_length = 1000
stride = 85
eeg_channels = ['FZ', 'CZ', 'PZ', 'C3', 'OZ']

def objective(trial):
    # Data parameters
    tmin = trial.suggest_int("tmin", 0, 250, step=10)

    # Filter bank parameters
    filter_order = trial.suggest_int("filter_order", 3, 6)
    fs = trial.suggest_int("fs", 70, 300, step=10)

    # Random Forest parameters
    n_estimators = trial.suggest_int("n_estimators", 50, 500, step=50)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 5)

    try:
        X, y = load_eeg_data(data_path, window_length=window_length, stride=stride, tmin=tmin, eeg_channels=eeg_channels)
    except Exception as e:
        print("Data loading failed:", e)
        return 0.0

    clf = FilterBankRTSClassifier(fs=fs, order=filter_order, n_estimators=n_estimators, max_depth=max_depth, class_weight="balanced", n_jobs=-1)

    # Add RF-specific parameters
    clf.min_samples_split = min_samples_split
    clf.min_samples_leaf = min_samples_leaf
    clf.max_features = max_features

    # Override the classifier creation in fit method
    original_fit = clf.fit

    def custom_fit(X, y):
        # Store the classes - this is required by scikit-learn
        clf.classes_ = np.unique(y)

        fb_covs = clf.compute_fb_covs(X)
        n, B, C, _ = fb_covs.shape

        covs_flat = fb_covs.reshape(n * B, C, C)
        labels_rep = np.repeat(y, B)

        clf.ts = TangentSpace(metric="riemann").fit(covs_flat, labels_rep)
        Z = clf.ts.transform(covs_flat)
        Z = Z.reshape(n, B, -1)

        clf.w = mutual_info_classif(Z.reshape(n, -1), y, discrete_features=False).reshape(B, -1).mean(axis=1)
        clf.w = clf.w / clf.w.sum()

        Z_weighted = np.concatenate([np.sqrt(clf.w[i]) * Z[:, i, :] for i in range(B)], axis=1)

        clf.clf = make_pipeline(
            StandardScaler(),
            RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=None,
                min_samples_split=min_samples_split,
                min_samples_leaf=min_samples_leaf,
                max_features='sqrt',
                class_weight="balanced",
                n_jobs=-1,
                random_state=42,
            ),
        )
        clf.clf.fit(Z_weighted, y)
        return clf

    clf.fit = custom_fit

    cv = StratifiedKFold(cv_folds, shuffle=True, random_state=42)
    scores = cross_validate(clf, X, y, cv=cv, scoring="accuracy", return_train_score=True)

    train_acc = scores["train_score"].mean()
    val_acc = scores["test_score"].mean()

    print(f"   → Train acc: {train_acc:.3f} | Val acc: {val_acc:.3f}")
    trial.set_user_attr("train_acc", train_acc)

    return val_acc


study = optuna.create_study(direction="maximize", pruner=MedianPruner())
study.optimize(objective, n_trials=50 , timeout=7200)

print("\n=== Best trial ===")
best = study.best_trial
print("Val Acc:", best.value)
print("Train Acc:", best.user_attrs["train_acc"])
print("Params:")
for k, v in best.params.items():
    print(f"  {k}: {v}")

[I 2025-06-28 19:00:21,900] A new study created in memory with name: no-name-7f6843c7-6c41-4c15-ab15-995dc8764f03


task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 12/960


[I 2025-06-28 19:02:01,160] Trial 0 finished with value: 0.577676531840226 and parameters: {'tmin': 70, 'filter_order': 3, 'fs': 300, 'n_estimators': 350, 'min_samples_split': 10, 'min_samples_leaf': 5}. Best is trial 0 with value: 0.577676531840226.


   → Train acc: 0.992 | Val acc: 0.578
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:03:26,158] Trial 1 finished with value: 0.5750321573720879 and parameters: {'tmin': 230, 'filter_order': 4, 'fs': 210, 'n_estimators': 500, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.577676531840226.


   → Train acc: 0.999 | Val acc: 0.575
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:05:11,566] Trial 2 finished with value: 0.5678408521576039 and parameters: {'tmin': 10, 'filter_order': 5, 'fs': 190, 'n_estimators': 150, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.577676531840226.


   → Train acc: 1.000 | Val acc: 0.568
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 14/960


[I 2025-06-28 19:06:50,952] Trial 3 finished with value: 0.5682697622996131 and parameters: {'tmin': 150, 'filter_order': 4, 'fs': 280, 'n_estimators': 400, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.577676531840226.


   → Train acc: 0.998 | Val acc: 0.568
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:08:31,812] Trial 4 finished with value: 0.5510021608425416 and parameters: {'tmin': 90, 'filter_order': 4, 'fs': 240, 'n_estimators': 400, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.577676531840226.


   → Train acc: 0.999 | Val acc: 0.551
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:09:55,235] Trial 5 finished with value: 0.6214133728507264 and parameters: {'tmin': 240, 'filter_order': 3, 'fs': 150, 'n_estimators': 300, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 5 with value: 0.6214133728507264.


   → Train acc: 1.000 | Val acc: 0.621
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:11:34,961] Trial 6 finished with value: 0.5694931541400491 and parameters: {'tmin': 110, 'filter_order': 4, 'fs': 210, 'n_estimators': 450, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 5 with value: 0.6214133728507264.


   → Train acc: 0.998 | Val acc: 0.569
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:13:14,873] Trial 7 finished with value: 0.5614235157271394 and parameters: {'tmin': 110, 'filter_order': 4, 'fs': 190, 'n_estimators': 450, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 5 with value: 0.6214133728507264.


   → Train acc: 1.000 | Val acc: 0.561
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:14:37,128] Trial 8 finished with value: 0.557707668929572 and parameters: {'tmin': 170, 'filter_order': 5, 'fs': 210, 'n_estimators': 50, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 5 with value: 0.6214133728507264.


   → Train acc: 0.995 | Val acc: 0.558
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:16:33,154] Trial 9 finished with value: 0.5728496191955338 and parameters: {'tmin': 40, 'filter_order': 4, 'fs': 300, 'n_estimators': 500, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 5 with value: 0.6214133728507264.


   → Train acc: 0.999 | Val acc: 0.573
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:17:58,312] Trial 10 finished with value: 0.6247614335896314 and parameters: {'tmin': 240, 'filter_order': 6, 'fs': 110, 'n_estimators': 250, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 10 with value: 0.6247614335896314.


   → Train acc: 0.990 | Val acc: 0.625
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:19:24,393] Trial 11 finished with value: 0.6287495905872867 and parameters: {'tmin': 240, 'filter_order': 6, 'fs': 110, 'n_estimators': 250, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 11 with value: 0.6287495905872867.


   → Train acc: 0.988 | Val acc: 0.629
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:20:49,731] Trial 12 finished with value: 0.6235645744848198 and parameters: {'tmin': 200, 'filter_order': 6, 'fs': 70, 'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 11 with value: 0.6287495905872867.


   → Train acc: 0.990 | Val acc: 0.624
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:22:05,021] Trial 13 finished with value: 0.6088229910305692 and parameters: {'tmin': 250, 'filter_order': 6, 'fs': 110, 'n_estimators': 200, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 11 with value: 0.6287495905872867.


   → Train acc: 0.994 | Val acc: 0.609
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:23:33,592] Trial 14 finished with value: 0.6203485633537448 and parameters: {'tmin': 190, 'filter_order': 6, 'fs': 140, 'n_estimators': 250, 'min_samples_split': 4, 'min_samples_leaf': 5}. Best is trial 11 with value: 0.6287495905872867.


   → Train acc: 0.986 | Val acc: 0.620
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:24:57,118] Trial 15 finished with value: 0.6401474897251754 and parameters: {'tmin': 210, 'filter_order': 5, 'fs': 80, 'n_estimators': 150, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.993 | Val acc: 0.640
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:26:19,684] Trial 16 finished with value: 0.6339468302658487 and parameters: {'tmin': 200, 'filter_order': 5, 'fs': 70, 'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.993 | Val acc: 0.634
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 14/960


[I 2025-06-28 19:27:52,643] Trial 17 finished with value: 0.6258982863460475 and parameters: {'tmin': 150, 'filter_order': 5, 'fs': 70, 'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.991 | Val acc: 0.626
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:29:15,436] Trial 18 finished with value: 0.6273399402233758 and parameters: {'tmin': 200, 'filter_order': 5, 'fs': 90, 'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.994 | Val acc: 0.627
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:30:35,267] Trial 19 finished with value: 0.5987122766091515 and parameters: {'tmin': 210, 'filter_order': 5, 'fs': 150, 'n_estimators': 150, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.994 | Val acc: 0.599
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:31:57,118] Trial 20 finished with value: 0.6143656088747909 and parameters: {'tmin': 160, 'filter_order': 5, 'fs': 90, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.993 | Val acc: 0.614
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:33:19,704] Trial 21 finished with value: 0.623263888888889 and parameters: {'tmin': 220, 'filter_order': 6, 'fs': 110, 'n_estimators': 150, 'min_samples_split': 3, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.987 | Val acc: 0.623
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:34:47,501] Trial 22 finished with value: 0.6268008447322945 and parameters: {'tmin': 180, 'filter_order': 6, 'fs': 130, 'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.995 | Val acc: 0.627
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 15/960


[I 2025-06-28 19:36:07,370] Trial 23 finished with value: 0.6199494949494949 and parameters: {'tmin': 220, 'filter_order': 5, 'fs': 90, 'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.993 | Val acc: 0.620
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:37:23,527] Trial 24 finished with value: 0.6306058941973275 and parameters: {'tmin': 250, 'filter_order': 5, 'fs': 70, 'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.6401474897251754.


   → Train acc: 0.991 | Val acc: 0.631
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 16/960


[I 2025-06-28 19:38:38,145] Trial 25 finished with value: 0.6522057477576423 and parameters: {'tmin': 250, 'filter_order': 5, 'fs': 80, 'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 25 with value: 0.6522057477576423.


   → Train acc: 0.999 | Val acc: 0.652
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:40:17,552] Trial 26 finished with value: 0.6366762966020084 and parameters: {'tmin': 140, 'filter_order': 5, 'fs': 130, 'n_estimators': 350, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 25 with value: 0.6522057477576423.


   → Train acc: 1.000 | Val acc: 0.637
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:41:58,254] Trial 27 finished with value: 0.5725037218010649 and parameters: {'tmin': 120, 'filter_order': 5, 'fs': 160, 'n_estimators': 350, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 25 with value: 0.6522057477576423.


   → Train acc: 1.000 | Val acc: 0.573
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:43:40,780] Trial 28 finished with value: 0.634200027514101 and parameters: {'tmin': 140, 'filter_order': 5, 'fs': 130, 'n_estimators': 350, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 25 with value: 0.6522057477576423.


   → Train acc: 1.000 | Val acc: 0.634
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 12/960


[I 2025-06-28 19:46:06,284] Trial 29 finished with value: 0.6585397435343119 and parameters: {'tmin': 70, 'filter_order': 3, 'fs': 90, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 29 with value: 0.6585397435343119.


   → Train acc: 0.998 | Val acc: 0.659
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:47:51,512] Trial 30 finished with value: 0.6598790349742903 and parameters: {'tmin': 60, 'filter_order': 3, 'fs': 100, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.660
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:49:38,598] Trial 31 finished with value: 0.6503664826825545 and parameters: {'tmin': 60, 'filter_order': 3, 'fs': 90, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.999 | Val acc: 0.650
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:52:30,619] Trial 32 finished with value: 0.6586590617640045 and parameters: {'tmin': 60, 'filter_order': 3, 'fs': 100, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.659
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:55:39,434] Trial 33 finished with value: 0.6492184660123591 and parameters: {'tmin': 30, 'filter_order': 3, 'fs': 100, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.649
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 19:57:27,529] Trial 34 finished with value: 0.6571958701406393 and parameters: {'tmin': 60, 'filter_order': 3, 'fs': 120, 'n_estimators': 350, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.992 | Val acc: 0.657
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 19:59:05,147] Trial 35 finished with value: 0.6424205898359339 and parameters: {'tmin': 80, 'filter_order': 3, 'fs': 120, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.993 | Val acc: 0.642
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 20:00:50,886] Trial 36 finished with value: 0.5824796305108507 and parameters: {'tmin': 0, 'filter_order': 3, 'fs': 170, 'n_estimators': 400, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.582
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 20:02:40,004] Trial 37 finished with value: 0.5758540771605508 and parameters: {'tmin': 60, 'filter_order': 3, 'fs': 280, 'n_estimators': 350, 'min_samples_split': 9, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.576
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 20:04:25,374] Trial 38 finished with value: 0.5713074033684721 and parameters: {'tmin': 30, 'filter_order': 3, 'fs': 170, 'n_estimators': 250, 'min_samples_split': 8, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.571
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 20:06:07,729] Trial 39 finished with value: 0.5759239671893822 and parameters: {'tmin': 90, 'filter_order': 3, 'fs': 230, 'n_estimators': 450, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.996 | Val acc: 0.576
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 11/960


[I 2025-06-28 20:08:05,108] Trial 40 finished with value: 0.6451652089407192 and parameters: {'tmin': 50, 'filter_order': 4, 'fs': 120, 'n_estimators': 400, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.999 | Val acc: 0.645
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 20:09:48,090] Trial 41 finished with value: 0.6557433250158794 and parameters: {'tmin': 80, 'filter_order': 3, 'fs': 100, 'n_estimators': 300, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.996 | Val acc: 0.656
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[I 2025-06-28 20:11:30,271] Trial 42 finished with value: 0.6497643092264723 and parameters: {'tmin': 80, 'filter_order': 3, 'fs': 100, 'n_estimators': 350, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 30 with value: 0.6598790349742903.


   → Train acc: 0.997 | Val acc: 0.650
task: mi, split: train, domain: time, data_fraction: 0.4
Using 40.0% of data: 960/960 samples
skipped: 13/960


[W 2025-06-28 20:13:04,212] Trial 43 failed with parameters: {'tmin': 100, 'filter_order': 3, 'fs': 100, 'n_estimators': 300, 'min_samples_split': 8, 'min_samples_leaf': 3} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/zeyadcode/.pyenv/versions/icmtc_venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_81244/3846833443.py", line 178, in objective
    scores = cross_validate(clf, X, y, cv=cv, scoring="accuracy", return_train_score=True)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/zeyadcode/.pyenv/versions/icmtc_venv/lib/python3.12/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/zeyadcode/.pyenv/versions/icmtc_venv/lib/python3.12/site-packages/sklea

KeyboardInterrupt: 

In [None]:
# Trial 67 finished with value: 0.6263345734944465 and parameters: {'window_length': 250, 'stride': 250, 'tmin': 0, 'ch_FZ': 1, 'ch_C3': 0, 'ch_CZ': 1, 'ch_C4': 0, 'ch_PZ': 1, 'ch_PO7': 1, 'ch_OZ': 1, 'ch_PO8': 0, 'n_bands': 4, 'min_freq': 11, 'max_freq': 40, 'filter_order': 3, 'fs': 125, 'n_estimators': 200, 'max_depth': None, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 67 with value: 0.6263345734944465.

class FilterBankRTSClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, bands=None, fs=250, order=4, n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features="sqrt", class_weight="balanced", n_jobs=-1):
        self.bands = bands if bands else [(8, 12), (12, 16), (16, 20), (20, 24), (24, 30)]
        self.fs = fs
        self.order = order
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.class_weight = class_weight
        self.n_jobs = n_jobs

    def compute_fb_covs(self, X):
        """X: (n_trials, C, T) → fb_covs: (n_trials, B, C, C)"""
        # Pre-compute SOS filters if not done
        if not hasattr(self, "sos_bands"):
            self.sos_bands = [butter(self.order, (l / (self.fs / 2), h / (self.fs / 2)), btype="bandpass", output="sos") for l, h in self.bands]

        n, C, _ = X.shape
        B = len(self.sos_bands)
        fb_covs = np.zeros((n, B, C, C))
        for i, sos in enumerate(self.sos_bands):
            Xf = sosfiltfilt(sos, X, axis=2)
            fb_covs[:, i] = Covariances(estimator="lwf").transform(Xf)
        return fb_covs

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        fb_covs = self.compute_fb_covs(X)
        n, B, C, _ = fb_covs.shape

        # Flatten for tangent space
        covs_flat = fb_covs.reshape(n * B, C, C)
        labels_rep = np.repeat(y, B)

        # Fit tangent space
        self.ts = TangentSpace(metric="riemann").fit(covs_flat, labels_rep)
        Z = self.ts.transform(covs_flat)
        Z = Z.reshape(n, B, -1)

        # Compute mutual information weights
        self.w = mutual_info_classif(Z.reshape(n, -1), y, discrete_features=False).reshape(B, -1).mean(axis=1)
        self.w = self.w / self.w.sum()

        # Weight features
        Z_weighted = np.concatenate([np.sqrt(self.w[i]) * Z[:, i, :] for i in range(B)], axis=1)

        # Train classifier
        self.clf = make_pipeline(
            StandardScaler(),
            RandomForestClassifier(
                n_estimators=self.n_estimators,
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf,
                max_features=self.max_features,
                class_weight=self.class_weight,
                n_jobs=self.n_jobs,
                random_state=42,
            ),
        )
        self.clf.fit(Z_weighted, y)
        return self

    def predict(self, X):
        fb_covs = self.compute_fb_covs(X)
        n, B, C, _ = fb_covs.shape

        covs_flat = fb_covs.reshape(n * B, C, C)
        Z = self.ts.transform(covs_flat).reshape(n, B, -1)
        Z_weighted = np.concatenate([np.sqrt(self.w[i]) * Z[:, i, :] for i in range(B)], axis=1)

        return self.clf.predict(Z_weighted)

    def predict_proba(self, X):
        fb_covs = self.compute_fb_covs(X)
        n, B, C, _ = fb_covs.shape

        covs_flat = fb_covs.reshape(n * B, C, C)
        Z = self.ts.transform(covs_flat).reshape(n, B, -1)
        Z_weighted = np.concatenate([np.sqrt(self.w[i]) * Z[:, i, :] for i in range(B)], axis=1)

        return self.clf.predict_proba(Z_weighted)

        
window_length = 1000
# Best parameters from Optuna trial 67
stride = 250
tmin = 0
eeg_channels = ['FZ', 'CZ', 'PZ', 'PO7', 'OZ']
n_bands = 4
min_freq = 11
max_freq = 40
filter_order = 3
fs = 125
n_estimators = 200
max_depth = None
min_samples_split = 6
min_samples_leaf = 2
max_features = 'sqrt'
data_path = "./data/mtcaic3"


# Load data with besjt parameters
ds_train = EEGDataset(
    data_path,
    window_length=window_length,
    stride=stride,
    task="mi",
    split="train",
    data_fraction=1,
    tmin=tmin,
    eeg_channels=eeg_channels,
)
X_train = np.stack([x.numpy() for x, _ in ds_train])
y_train = np.array([label[0] for _, label in ds_train])

# Load data with besjt parameters
ds_val = EEGDataset(
    data_path,
    window_length=window_length,
    stride=stride,
    task="mi",
    split="train",
    data_fraction=1,
    tmin=tmin,
    eeg_channels=eeg_channels,
)
X_val = np.stack([x.numpy() for x, _ in ds_val])
y_val = np.array([label[0] for _, label in ds_val])


# Create frequency bands

# Create FilterBank classifier with best parameters
clf = FilterBankRTSClassifier(
    fs=fs,
    order=filter_order,
    n_estimators=n_estimators,
    max_depth=max_depth,
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf# Trial 67 finished with value: 0.6263345734944465 and parameters: {'window_length': 250, 'stride': 250, 'tmin': 0, 'ch_FZ': 1, 'ch_C3': 0, 'ch_CZ': 1, 'ch_C4': 0, 'ch_PZ': 1, 'ch_PO7': 1, 'ch_OZ': 1, 'ch_PO8': 0, 'n_bands': 4, 'min_freq': 11, 'max_freq': 40, 'filter_order': 3, 'fs': 125, 'n_estimators': 200, 'max_depth': None, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 67 with value: 0.6263345734944465.
)

# Fit on training data
clf.fit(X_train, y_train)

# Calculate accuracy
y_pred = clf.predict(X_val)
val_acc = accuracy_score(y_val, y_pred)

print(f"Validation accuracy: {val_acc:.4f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred)) 