In [1]:
%load_ext autoreload
%autoreload 2
import os
if not os.path.exists('./modules') and not os.path.exists('modules.zip'):
    from google.colab import files
    uploaded = files.upload()
if not os.path.exists('./modules') and os.path.exists('modules.zip'):
    os.system('unzip modules.zip -d .')

!pip3 install optuna
import torch
import torch.nn as nn
import optuna
from modules import Trainer
from modules.competition_dataset import EEGDataset
from modules.utils import split_and_get_loaders, evaluate_model, get_closest_divisor
import matplotlib.pyplot as plt
import random
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


  from .autonotebook import tqdm as notebook_tqdm


device(type='cpu')

In [2]:
data_path = './data/mtcaic3'
model_path = './checkpoints/ssvep/models/70_lstm_ssvep.pth'

In [3]:
# Add this at the beginning of your notebook, after imports
def set_random_seeds(seed=42):
    """Set random seeds for reproducibility"""

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Call this function before creating datasets and models
set_random_seeds(42)

In [4]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)

        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class DepthWiseConv2D(nn.Module):
    def __init__(self, in_channels, kernel_size, dim_mult=1, padding=0, bias=False):
        super(DepthWiseConv2D, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels * dim_mult, padding=padding, kernel_size=kernel_size, groups=in_channels, bias=bias)

    def forward(self, x: torch.Tensor):
        return self.depthwise(x)


class SeperableConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, bias=False):
        super(SeperableConv2D, self).__init__()
        self.depthwise = DepthWiseConv2D(in_channels, kernel_size, padding=padding)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out

class SSVEPClassifier(nn.Module):
    # EEG Net Based
    # todo look at this https://paperswithcode.com/paper/a-transformer-based-deep-neural-network-model
    def __init__(self, n_electrodes=16, n_samples=128, out_dim=4, dropout=0.25, kernLength=256, F1=96, D=1, F2=96, hidden_dim=100, layer_dim=1):
        super().__init__()

        # B x C x T
        self.block_1 = nn.Sequential(
            nn.Conv2d(1, F1, (1, kernLength), padding='same', bias=False),
            nn.BatchNorm2d(F1),
            #
            DepthWiseConv2D(F1, (n_electrodes, 1), dim_mult=D, bias=False),
            nn.BatchNorm2d(F1*D),
            nn.ELU(),
            nn.MaxPool2d((1, 2)), # todo try making this max pool
            nn.Dropout(dropout),
            #
            SeperableConv2D(F1 * D, F2, kernel_size=(1, 16), padding='same', bias=False),
            nn.BatchNorm2d(F2),
            nn.ELU(),
            nn.MaxPool2d((1, 4)),
            nn.Dropout(dropout),
        )

        self.lstm_head = LSTMModel(F2, hidden_dim, layer_dim, out_dim)

    def forward(self, x: torch.Tensor):
        """expected input shape: BxCxT"""
        x = x.unsqueeze(1)
        y = self.block_1(x) # B x F1 x 1 x time_sub

        y = y.squeeze(2) # B x F1 x time_sub
        y = y.permute(0, 2, 1) # B x time_sub x F1
        y = self.lstm_head(y)

        return y


dummy_x = torch.randn(5, 14, 320)
model = SSVEPClassifier(n_electrodes=dummy_x.shape[1], n_samples=dummy_x.shape[2])
model(dummy_x)

  return F.conv2d(


tensor([[-0.1870, -0.0344,  0.1441,  0.0280],
        [-0.0968, -0.0085,  0.1039,  0.0302],
        [-0.1788,  0.0431,  0.0535,  0.0239],
        [-0.1983,  0.0379,  0.0793, -0.0029],
        [-0.2682, -0.0221,  0.1113, -0.0013]], grad_fn=<AddmmBackward0>)

In [None]:
window_length = get_closest_divisor(160)
print(window_length)
stride = window_length // 3
batch_size = 64

dataset = EEGDataset(data_path, window_length=window_length, stride=stride)
train_loader, val_loader, test_loader = split_and_get_loaders(dataset, batch_size)

175


In [None]:
model = SSVEPClassifier(
    n_electrodes=dummy_x.shape[1],
    n_samples=dummy_x.shape[2],
    dropout=0.33066508963955576,
    kernLength=256,
    F1 = 128,
    D = 2,
    F2 = 96,
    hidden_dim=256,
    layer_dim=3,
).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.00030241790493218325)
avg_losses = []
val_accuracies = []

epochs = 200
for epoch in range(epochs):
    avg_loss = 0
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x).to(device)

        loss = criterion(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        avg_loss += loss.item()

    avg_loss /= len(train_loader)
    avg_losses.append(avg_loss)

    evaluation = evaluate_model(model, val_loader, device)
    val_accuracies.append(evaluation)
    print(f'epoch: {epoch}, avg_loss: {avg_loss}, val_evaluation: {evaluation}')

In [None]:
# maxpool
plt.plot(range(len(avg_losses)), avg_losses, "b-", label="trainingg loss")
plt.plot(range(len(val_accuracies)), val_accuracies, "r-", label="validation accuracies")
plt.legend()
print(f"min avg_losses: {min(avg_losses)}")
print(f"max val_accuracies: {max(val_accuracies)}")

In [4]:
class CustomTrainer(Trainer):
    def _prepare_training(self, is_trial, stride_factor=2, do_not_modify_network=True):
        super()._prepare_training(is_trial, stride_factor, do_not_modify_network)
        assert self.dataset is not None
        
        if is_trial:
            assert isinstance(self.trial, optuna.Trial), "trial is none, cant' suggest params"

            if do_not_modify_network:
                best_params = self._get_study().best_params if do_not_modify_network else None
                assert best_params is not None, "best_params is None, can't use them"
                
                kernLength = best_params["kernLength"]
                F1 = best_params["F1"]
                D = best_params["D"]
                F2 = best_params["F2"]
                hidden_dim = best_params["hidden_dim"]
                layer_dim = best_params["layer_dim"]
                
            else:
                kernLength = self.trial.suggest_categorical("kernLength", [128, 256, 512])
                F1 = self.trial.suggest_categorical("F1", [64, 96, 128])
                D = self.trial.suggest_categorical("D", [1, 2, 3])
                F2 = self.trial.suggest_categorical("F2", [64, 96, 128])
                hidden_dim = self.trial.suggest_categorical("hidden_dim", [64, 128, 256])
                layer_dim = self.trial.suggest_categorical("layer_dim", [1, 2, 3, 4])

            dropout = self.trial.suggest_float("dropout", 0, 0.5)
            lr = self.trial.suggest_float("lr", 3e-4, 3e-2, log=True)

        else:
            best_params = self._get_study().best_params
            kernLength = best_params["kernLength"]
            F1 = best_params["F1"]
            D = best_params["D"]
            F2 = best_params["F2"]
            hidden_dim = best_params["hidden_dim"]
            layer_dim = best_params["layer_dim"]
            dropout = best_params["dropout"]
            lr = best_params["lr"]
            
        n_samples = self.dataset.data[0].shape[1]  # data[x] shape CxT
        n_electrodes = self.dataset.data[0].shape[0]

        n_samples = self.dataset.data[0].shape[1]  # data[x] shape CxT
        n_electrodes = self.dataset.data[0].shape[0]

        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes, n_samples=n_samples, out_dim=4, dropout=dropout, kernLength=kernLength, F1=F1, D=D, F2=F1, hidden_dim=hidden_dim, layer_dim=layer_dim
        )
        if model_path is not None:
            self.model.load_state_dict(torch.load(model_path))
            print(f"loaded model weights from {model_path}")
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
        

# todo mke it accept stride size, model_path, database_path, should_load_model

trainer = CustomTrainer(data_path, train_epochs=10000, optuna_n_trials=35)

In [26]:
delete_existing = False
trainer.optimize(delete_existing)

[I 2025-06-19 20:47:11,718] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[I 2025-06-19 20:52:59,283] Trial 15 finished with value: 0.5607876712328768 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.027864116927831084, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 4, 'lr': 0.00031228685999486617}. Best is trial 11 with value: 0.5625.
[I 2025-06-19 20:58:32,786] Trial 16 finished with value: 0.3264126712328767 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.4821978058864569, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 4, 'lr': 0.0011883837399415897}. Best is trial 11 with value: 0.5625.
[I 2025-06-19 21:02:50,556] Trial 17 finished with value: 0.5834760273972602 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.05276658823165456, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim':

Skipped, T.shape: (8, 348), self.window_length: 350


[I 2025-06-19 21:06:53,216] Trial 18 finished with value: 0.516875 and parameters: {'window_length': 350, 'batch_size': 64, 'dropout': 0.07601412338776302, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 2, 'lr': 0.001026060389437682}. Best is trial 17 with value: 0.5834760273972602.
[I 2025-06-19 21:11:08,615] Trial 19 finished with value: 0.5380993150684932 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.0690609097641802, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 128, 'hidden_dim': 256, 'layer_dim': 2, 'lr': 0.0021895134509308536}. Best is trial 17 with value: 0.5834760273972602.
[I 2025-06-19 21:18:38,071] Trial 20 finished with value: 0.5366010273972602 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.21214002674930119, 'kernLength': 256, 'F1': 128, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 3, 'lr': 0.0007009681317028536}. Best is trial 17 with value: 0.5834760273972602.


Skipped, T.shape: (8, 348), self.window_length: 350


[I 2025-06-19 21:23:56,769] Trial 21 finished with value: 0.2834375 and parameters: {'window_length': 350, 'batch_size': 64, 'dropout': 0.07768407679589648, 'kernLength': 512, 'F1': 64, 'D': 3, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 2, 'lr': 0.008744327559435102}. Best is trial 17 with value: 0.5834760273972602.
[I 2025-06-19 21:27:38,777] Trial 22 finished with value: 0.45601851851851855 and parameters: {'window_length': 175, 'batch_size': 64, 'dropout': 0.24733667912308432, 'kernLength': 256, 'F1': 64, 'D': 2, 'F2': 128, 'hidden_dim': 128, 'layer_dim': 2, 'lr': 0.0007590623631616397}. Best is trial 17 with value: 0.5834760273972602.
[I 2025-06-19 21:35:07,341] Trial 23 finished with value: 0.4835188356164384 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.05033536560317159, 'kernLength': 256, 'F1': 128, 'D': 2, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 3, 'lr': 0.001756712667776652}. Best is trial 17 with value: 0.5834760273972602.
[I 2025-06-19 21:40:40,714]

Skipped, T.shape: (8, 348), self.window_length: 350


[I 2025-06-19 22:23:15,114] Trial 31 finished with value: 0.49597772277227725 and parameters: {'window_length': 350, 'batch_size': 32, 'dropout': 0.13263392186529155, 'kernLength': 512, 'F1': 128, 'D': 3, 'F2': 128, 'hidden_dim': 128, 'layer_dim': 2, 'lr': 0.002221902346079088}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 22:28:47,970] Trial 32 finished with value: 0.34288594470046085 and parameters: {'window_length': 175, 'batch_size': 32, 'dropout': 0.21382958645461678, 'kernLength': 512, 'F1': 64, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 2, 'lr': 0.004492754781135221}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 22:34:21,202] Trial 33 finished with value: 0.30184331797235026 and parameters: {'window_length': 175, 'batch_size': 32, 'dropout': 0.10899856059028316, 'kernLength': 512, 'F1': 64, 'D': 3, 'F2': 128, 'hidden_dim': 64, 'layer_dim': 2, 'lr': 0.007719820042002409}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 22:4

Skipped, T.shape: (8, 348), self.window_length: 350


[I 2025-06-19 23:38:40,672] Trial 43 finished with value: 0.5176361386138614 and parameters: {'window_length': 350, 'batch_size': 32, 'dropout': 0.11759007158529765, 'kernLength': 512, 'F1': 64, 'D': 3, 'F2': 64, 'hidden_dim': 128, 'layer_dim': 1, 'lr': 0.0009870293520543205}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 23:42:46,750] Trial 44 finished with value: 0.5363869863013698 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.09343791190460347, 'kernLength': 128, 'F1': 64, 'D': 3, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 2, 'lr': 0.0009343370216390094}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 23:47:37,625] Trial 45 finished with value: 0.5928938356164384 and parameters: {'window_length': 250, 'batch_size': 64, 'dropout': 0.18176937346202754, 'kernLength': 512, 'F1': 64, 'D': 1, 'F2': 96, 'hidden_dim': 256, 'layer_dim': 2, 'lr': 0.0006075298979437705}. Best is trial 28 with value: 0.6127996575342466.
[I 2025-06-19 23:52


Study statistics:
  Number of finished trials: 50
  Number of pruned trials: 0
  Number of complete trials: 47

Best trial:
  Value: 0.6127996575342466

Best hyperparameters:
  window_length: 250
  batch_size: 64
  dropout: 0.1103041385689191
  kernLength: 512
  F1: 64
  D: 3
  F2: 96
  hidden_dim: 256
  layer_dim: 2
  lr: 0.001446527896878632


{'window_length': 250,
 'batch_size': 64,
 'dropout': 0.1103041385689191,
 'kernLength': 512,
 'F1': 64,
 'D': 3,
 'F2': 96,
 'hidden_dim': 256,
 'layer_dim': 2,
 'lr': 0.001446527896878632}

In [27]:
# manual_write_study_params(trainer.study_name, trainer.storage)
trainer.train()

[I 2025-06-20 00:09:34,987] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[I 2025-06-20 00:09:46,329] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.


epoch 0, evaluation 0.2917380136986301, avg_loss 1.3841414694058694
epoch 1, evaluation 0.3073630136986301, avg_loss 1.3780235868389323
epoch 2, evaluation 0.3092893835616438, avg_loss 1.3685882847187882
epoch 3, evaluation 0.3210616438356164, avg_loss 1.3565816968174305
epoch 4, evaluation 0.3285530821917808, avg_loss 1.340835875171726
epoch 5, evaluation 0.3428938356164384, avg_loss 1.3247399742320431
epoch 6, evaluation 0.3589469178082192, avg_loss 1.3005372795007997
epoch 7, evaluation 0.3812071917808219, avg_loss 1.277446111177994
epoch 8, evaluation 0.3929794520547945, avg_loss 1.250127545049635
epoch 9, evaluation 0.4019691780821918, avg_loss 1.225795670687142
epoch 10, evaluation 0.4252996575342466, avg_loss 1.1947784702656632
epoch 11, evaluation 0.4353595890410959, avg_loss 1.1680164892794722
epoch 12, evaluation 0.4548373287671233, avg_loss 1.1408713910539272
epoch 13, evaluation 0.4674657534246575, avg_loss 1.108100170806303
epoch 14, evaluation 0.4713184931506849, avg_loss

KeyboardInterrupt: 

In [40]:
f"test accuracy: {evaluate_model(trainer.model, trainer.val_loader, device)}"

'test accuracy: 0.7007705479452054'

In [39]:
trainer.model

SSVEPClassifier(
  (block_1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(1, 512), stride=(1, 1), padding=same, bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): DepthWiseConv2D(
      (depthwise): Conv2d(64, 192, kernel_size=(8, 1), stride=(1, 1), groups=64, bias=False)
    )
    (3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): ELU(alpha=1.0)
    (5): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
    (6): Dropout(p=0.1103041385689191, inplace=False)
    (7): SeperableConv2D(
      (depthwise): DepthWiseConv2D(
        (depthwise): Conv2d(192, 192, kernel_size=(1, 16), stride=(1, 1), padding=same, groups=192, bias=False)
      )
      (pointwise): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    )
    (8): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ELU(alpha=1.0)
    (10): Max

In [None]:
# to try:
# - reoptimize optuna for second round
# - extend data horizon to include validation data
# - lower learning rate