In [None]:

%load_ext autoreload
%autoreload 2
import os

import torch
import torch.nn as nn
import optuna
from modules import Trainer
from modules.competition_dataset import EEGDataset, LABELS
from modules.utils import evaluate_model
import random
import numpy as np
from torch.utils.data import ConcatDataset, random_split, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')
# data_path = '/content/drive/MyDrive/ai_data/eeg_detection/data/mtcaic3'
# model_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/models/ssvep.pth'
# optuna_db_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/optuna/optuna_studies.db'
data_path = './data/mtcaic3'
model_path = './checkpoints/ssvep/models/ssvep_the_honored_one.pth'
optuna_db_path = './checkpoints/ssvep/optuna/ssvep.db'

In [3]:
# Add this at the beginning of your notebook, after imports
def set_random_seeds(seed=42):
    """Set random seeds for reproducibility"""

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Call this function before creating datasets and models
set_random_seeds(42)

In [17]:
window_length = 128 * 3 # ensure divisble by 64 the kernel size
print(window_length)
stride = window_length // 3
batch_size = 64

dataset_train = EEGDataset(
    data_path,
    window_length=window_length,
    stride=stride,
    domain="time",
    data_fraction=0.5,
    hardcoded_mean=True,
)

dataset_val = EEGDataset(
    data_path=data_path,
    window_length=window_length,
    stride=stride,
    task='ssvep',
    split='validation',
    read_labels=True,
    hardcoded_mean=True,
    data_fraction=1
)

combined = ConcatDataset([dataset_train, dataset_val])
train_len = int(len(combined) * 0.8)
val_len = len(combined) - train_len
train_ds, val_ds = random_split(combined, [train_len, val_len])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size)

384
Using 50.0% of data: 1200/1200 samples
skipped: 1/1200
data shape: (12529, 2, 384), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (514, 2, 384), mean shape: (1, 2, 1)


In [11]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)

        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class DepthWiseConv2D(nn.Module):
    def __init__(self, in_channels, kernel_size, dim_mult=1, padding=0, bias=False):
        super(DepthWiseConv2D, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels * dim_mult, padding=padding, kernel_size=kernel_size, groups=in_channels, bias=bias)

    def forward(self, x: torch.Tensor):
        return self.depthwise(x)


class SeperableConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, bias=False):
        super(SeperableConv2D, self).__init__()
        self.depthwise = DepthWiseConv2D(in_channels, kernel_size, padding=padding)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out

class SSVEPClassifier(nn.Module):
    # EEG Net Based
    # todo look at this https://paperswithcode.com/paper/a-transformer-based-deep-neural-network-model
    def __init__(self, n_electrodes=16, out_dim=4, dropout=0.25, kernLength=256, F1=96, D=1, F2=96, hidden_dim=100, layer_dim=1):
        super().__init__()

        # B x C x T
        self.block_1 = nn.Sequential(
            nn.Conv2d(1, F1, (1, kernLength), padding='same', bias=False),
            nn.BatchNorm2d(F1),
            #
            DepthWiseConv2D(F1, (n_electrodes, 1), dim_mult=D, bias=False),
            nn.BatchNorm2d(F1*D),
            nn.ELU(),
            nn.MaxPool2d((1, 2)), # todo try making this max pool
            nn.Dropout(dropout),
            #
            SeperableConv2D(F1 * D, F2, kernel_size=(1, 16), padding='same', bias=False),
            nn.BatchNorm2d(F2),
            nn.ELU(),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout),
        )

        self.lstm_head = LSTMModel(F2, hidden_dim, layer_dim, out_dim)

    def forward(self, x: torch.Tensor):
        """expected input shape: BxCxT"""
        x = x.unsqueeze(1)
        y = self.block_1(x) # B x F1 x 1 x time_sub

        y = y.squeeze(2) # B x F1 x time_sub
        y = y.permute(0, 2, 1) # B x time_sub x F1
        y = self.lstm_head(y)

        return y

dummy_x = torch.randn(5, 2, 256).to(device)
model = SSVEPClassifier(
    n_electrodes=2,
    dropout=0.33066508963955576,
    kernLength=64,
    F1 = 128,
    D = 1,
    F2 = 256,
    hidden_dim=256,
    layer_dim=2,
).to(device)

model(dummy_x)

tensor([[-0.0235,  0.0492, -0.0168,  0.0635],
        [-0.0001,  0.0575, -0.0109,  0.0280],
        [-0.0014,  0.0460, -0.0001,  0.0372],
        [-0.0050,  0.0383, -0.0173,  0.0532],
        [ 0.0113,  0.0471, -0.0220,  0.0520]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [12]:
try:
    model.load_state_dict(torch.load(model_path, weights_only=True))
except Exception:
    print("skipping model loading...")


opt = torch.optim.Adam(model.parameters(), lr=0.00030241790493218325)
criterion = nn.CrossEntropyLoss()
avg_losses = []
val_accuracies = []

epochs = 4000
for epoch in range(epochs):
    avg_loss = 0
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device).to(torch.int64)
        y_pred = model(x).to(device)

        loss = criterion(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        avg_loss += loss.item()

    avg_loss /= len(train_loader)
    avg_losses.append(avg_loss)

    if epoch % 5 == 0:
        evaluation = evaluate_model(model, val_loader, device)
        val_accuracies.append(evaluation)
        model.cpu()
        torch.save(model.state_dict(), model_path)
        model.to(device)
        print(f"epoch: {epoch}, avg_loss: {avg_loss}, val_evaluation: {evaluation}")

epoch: 0, avg_loss: 1.2615062517694908, val_evaluation: 0.44597126795752656
epoch: 5, avg_loss: 1.2056106163723634, val_evaluation: 0.4415990006246096
epoch: 10, avg_loss: 1.1648569112957115, val_evaluation: 0.46595877576514677


KeyboardInterrupt: 

In [36]:
class CustomTrainer(Trainer):
    # This method is called by _objective during an Optuna trial
    def prepare_trial_run(self):
        assert isinstance(self.trial, optuna.Trial), "Trial not set!"

        # 1. Define Hyperparameters for this trial
        #    a. Data/Loader parameters
        window_length = self.trial.suggest_categorical("window_length", [128, 256, 640]) # e.g. 64*2, 64*4, 64*10
        batch_size = self.trial.suggest_categorical("batch_size", [32, 64])

        #    b. Model architecture parameters
        kernLength = self.trial.suggest_categorical("kernLength", [64, 128, 256])
        F1 = self.trial.suggest_categorical("F1", [8, 16, 32])
        D = self.trial.suggest_categorical("D", [1, 2, 3])
        F2 = self.trial.suggest_categorical("F2", [16, 32, 64]) # F2 must be F1 * D
        hidden_dim = self.trial.suggest_categorical("hidden_dim", [64, 128, 256])
        layer_dim = self.trial.suggest_categorical("layer_dim", [1, 2, 3])
        dropout = self.trial.suggest_float("dropout", 0.1, 0.6)
        
        #    c. Optimizer parameters
        lr = self.trial.suggest_float("lr", 1e-4, 1e-2, log=True)

        # 2. Prepare the data using these parameters
        super()._prepare_data(is_trial=True, batch_size=batch_size, window_length=window_length)
        
        assert self.dataset is not None, "Dataset was not created correctly"
        n_electrodes = self.dataset.datasets[0].data[0].shape[0] # Get shape from underlying dataset

        # 3. Build the model and optimizer
        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes, # Use value from data
            dropout=dropout,
            kernLength=kernLength,
            F1=F1,
            D=D,
            F2=F1 * D, # F2 is dependent on F1 and D
            hidden_dim=hidden_dim,
            layer_dim=layer_dim,
        ).to(self.device)
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    # This method is called by train() for the final run
    def prepare_final_run(self):
        # 1. Get the best hyperparameters from the completed study
        study = self._get_study()
        best_params = study.best_params
        
        # 2. Prepare data using the best params
        super()._prepare_data(is_trial=False) # is_trial=False handles getting params from study
        
        assert self.dataset is not None, "Dataset was not created correctly"
        n_electrodes = self.dataset.datasets[0].data[0].shape[0]

        # 3. Build the final model and optimizer
        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes,
            dropout=best_params["dropout"],
            kernLength=best_params["kernLength"],
            F1=best_params["F1"],
            D=best_params["D"],
            F2=best_params["F1"] * best_params["D"],
            hidden_dim=best_params["hidden_dim"],
            layer_dim=best_params["layer_dim"],
        ).to(self.device)
        
        # Optional: Load pre-existing weights if you are resuming
        try:
            self.model.load_state_dict(torch.load(self.model_path))
            print(f"Loaded existing model weights from {self.model_path}")
        except Exception:
            print(f"No existing model weights found at {self.model_path}. Training from scratch.")
        
        lr = 0.00018182233882257615 # best_params["lr"]
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            mode='max',        # “min” if you want to reduce lr when the quantity monitored has stopped decreasing
            factor=0.5,        # new_lr = lr * factor
            patience=20,        # number of epochs with no improvement after which lr will be reduced
            threshold=1e-4,    # threshold for measuring the new optimum, to only focus on significant changes
            threshold_mode='rel', # `'rel'` means compare change relative to best value. Could use `'abs'`.
            cooldown=0,        # epochs to wait before resuming normal operation after lr has been reduced
            min_lr=1e-6,       # lower bound on the lr
        )

trainer = CustomTrainer(
        data_path=data_path,
        optuna_db_path=optuna_db_path,
        model_path=model_path,
        train_epochs=500, # Final training epochs
        tune_epochs=50,   # Epochs per trial
        optuna_n_trials=50
    )

In [None]:
delete_existing = False
trainer.optimize(delete_existing)

[I 2025-06-23 21:15:19,301] A new study created in RDB with name: ssvep_classifier_optimization


Study 'ssvep_classifier_optimization' deleted.
Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=141, Val batches=36


[I 2025-06-23 21:16:54,230] Trial 0 finished with value: 0.3016014234875445 and parameters: {'window_length': 640, 'batch_size': 32, 'kernLength': 256, 'F1': 8, 'D': 3, 'F2': 64, 'hidden_dim': 128, 'layer_dim': 2, 'dropout': 0.5209686203255685, 'lr': 0.000396594163838901}. Best is trial 0 with value: 0.3016014234875445.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:18:51,308] Trial 1 finished with value: 0.4016274338854984 and parameters: {'window_length': 256, 'batch_size': 32, 'kernLength': 256, 'F1': 16, 'D': 2, 'F2': 16, 'hidden_dim': 64, 'layer_dim': 2, 'dropout': 0.34178090696330155, 'lr': 0.00012013939686290462}. Best is trial 1 with value: 0.4016274338854984.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:21:05,733] Trial 2 finished with value: 0.5047951176983435 and parameters: {'window_length': 256, 'batch_size': 32, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 16, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.27072914067188847, 'lr': 0.003415544035465565}. Best is trial 2 with value: 0.5047951176983435.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:22:37,744] Trial 3 finished with value: 0.34670154024992733 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 8, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.2766800118548799, 'lr': 0.007986336796775765}. Best is trial 2 with value: 0.5047951176983435.


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=141, Val batches=36


[I 2025-06-23 21:24:28,098] Trial 4 finished with value: 0.31583629893238435 and parameters: {'window_length': 640, 'batch_size': 32, 'kernLength': 64, 'F1': 8, 'D': 2, 'F2': 16, 'hidden_dim': 128, 'layer_dim': 2, 'dropout': 0.25794926040204724, 'lr': 0.005782682258467888}. Best is trial 2 with value: 0.5047951176983435.


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=71, Val batches=18


[I 2025-06-23 21:25:03,842] Trial 5 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:25:19,853] Trial 6 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=468, Val batches=117


[I 2025-06-23 21:25:33,051] Trial 7 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=468, Val batches=117


[I 2025-06-23 21:25:50,105] Trial 8 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=936, Val batches=234


[I 2025-06-23 21:26:09,092] Trial 9 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:26:23,742] Trial 10 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:26:40,026] Trial 11 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:26:54,318] Trial 12 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:27:10,078] Trial 13 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:27:23,887] Trial 14 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:27:36,705] Trial 15 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:27:49,205] Trial 16 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:28:07,011] Trial 17 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=936, Val batches=234


[I 2025-06-23 21:28:22,532] Trial 18 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=141, Val batches=36


[I 2025-06-23 21:28:38,324] Trial 19 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=431, Val batches=108


[I 2025-06-23 21:28:53,974] Trial 20 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:29:06,169] Trial 21 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:29:19,033] Trial 22 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:30:56,168] Trial 23 finished with value: 0.5068294100552165 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.23034280399057303, 'lr': 0.004496130654360325}. Best is trial 23 with value: 0.5068294100552165.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:32:21,345] Trial 24 finished with value: 0.43591979075850046 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.21824936853593288, 'lr': 0.0017365954870900704}. Best is trial 23 with value: 0.5068294100552165.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:33:53,983] Trial 25 finished with value: 0.5260098808485906 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.2038648541113718, 'lr': 0.0020667262541921487}. Best is trial 25 with value: 0.5260098808485906.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:34:06,261] Trial 26 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:34:17,481] Trial 27 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=71, Val batches=18


[I 2025-06-23 21:34:28,525] Trial 28 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=468, Val batches=117


[I 2025-06-23 21:34:42,632] Trial 29 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=71, Val batches=18


[I 2025-06-23 21:34:58,248] Trial 30 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:36:18,930] Trial 31 finished with value: 0.5297878523684976 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.2077357526503814, 'lr': 0.0017738156653704668}. Best is trial 31 with value: 0.5297878523684976.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:36:29,857] Trial 32 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:37:54,836] Trial 33 finished with value: 0.5419936065097355 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.1952344844575938, 'lr': 0.0014640183506411913}. Best is trial 33 with value: 0.5419936065097355.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:38:11,199] Trial 34 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:38:29,947] Trial 35 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:39:53,896] Trial 36 finished with value: 0.5896541702993315 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.10328170267309397, 'lr': 0.0018182233882257615}. Best is trial 36 with value: 0.5896541702993315.


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:40:05,723] Trial 37 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 44/960
data shape: (5345, 2, 640), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (271, 2, 640), mean shape: (1, 2, 1)
Data prepared: Train batches=71, Val batches=18


[I 2025-06-23 21:40:16,023] Trial 38 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:40:28,029] Trial 39 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=468, Val batches=117


[I 2025-06-23 21:40:52,728] Trial 40 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:41:07,417] Trial 41 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:41:21,409] Trial 42 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:41:32,238] Trial 43 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:41:45,558] Trial 44 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:41:56,680] Trial 45 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:42:13,000] Trial 46 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:42:26,327] Trial 47 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (16360, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=216, Val batches=54


[I 2025-06-23 21:42:39,318] Trial 48 pruned. 


Using 40.0% of data: 960/960 samples
skipped: 0/960
data shape: (35569, 2, 128), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (1834, 2, 128), mean shape: (1, 2, 1)
Data prepared: Train batches=468, Val batches=117


[I 2025-06-23 21:42:51,651] Trial 49 pruned. 



--- Optimization Finished ---
Study statistics: 
  Number of finished trials: 50
  Number of pruned trials: 39
  Number of complete trials: 11
Best trial:
  Value: 0.5896541702993315
  Best hyperparameters: 
    window_length: 256
    batch_size: 64
    kernLength: 256
    F1: 16
    D: 3
    F2: 64
    hidden_dim: 64
    layer_dim: 1
    dropout: 0.10328170267309397
    lr: 0.0018182233882257615


{'window_length': 256,
 'batch_size': 64,
 'kernLength': 256,
 'F1': 16,
 'D': 3,
 'F2': 64,
 'hidden_dim': 64,
 'layer_dim': 1,
 'dropout': 0.10328170267309397,
 'lr': 0.0018182233882257615}

In [37]:
# manual_write_study_params(trainer.study_name, trainer.storage)
trainer.train()

[I 2025-06-23 22:14:56,274] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[I 2025-06-23 22:14:56,313] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.


skipped: 0/2400
data shape: (41141, 2, 256), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (842, 2, 256), mean shape: (1, 2, 1)
Data prepared: Train batches=525, Val batches=132
Loaded existing model weights from ./checkpoints/ssvep/models/ssvep_the_honored_one.pth
Epoch 0/500, Validation Accuracy: 0.7243, Avg Loss: 0.7900, lr: 0.00018182233882257614
Model saved to ./checkpoints/ssvep/models/ssvep_the_honored_one.pth
Epoch 1/500, Validation Accuracy: 0.7163, Avg Loss: 0.7953, lr: 0.00018182233882257614
Epoch 2/500, Validation Accuracy: 0.7147, Avg Loss: 0.7896, lr: 0.00018182233882257614
Epoch 3/500, Validation Accuracy: 0.7153, Avg Loss: 0.7854, lr: 0.00018182233882257614
Epoch 4/500, Validation Accuracy: 0.6945, Avg Loss: 0.7832, lr: 0.00018182233882257614
Epoch 5/500, Validation Accuracy: 0.7080, Avg Loss: 0.7862, lr: 0.00018182233882257614
Model saved to ./checkpoints/ssvep/models/ssvep_the_honored_one.pth
Epoch 6/500, Validation Accuracy: 0.7123, Avg Loss: 0.7848, lr: 0.00018182

KeyboardInterrupt: 

In [None]:
trainer._prepare_training(False)
trainer.model.eval()
f"test accuracy: {evaluate_model(trainer.model, trainer.eval_loader, device)}"

lr: 1.0241790493218325e-05
loaded model weights


'test accuracy: 0.74484375'