In [None]:

%load_ext autoreload
%autoreload 2
import os

import torch
import torch.nn as nn
import optuna
from modules import Trainer
from modules.competition_dataset import EEGDataset, LABELS
from modules.utils import evaluate_model
import random
import numpy as np
from torch.utils.data import ConcatDataset, random_split, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [42]:
# from google.colab import drive
# drive.mount('/content/drive')
# data_path = '/content/drive/MyDrive/ai_data/eeg_detection/data/mtcaic3'
# model_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/models/ssvep.pth'
# optuna_db_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/optuna/optuna_studies.db'
data_path = './data/mtcaic3'
model_path = './checkpoints/ssvep/models/ssvep_PO8_OZ_PZ.pth'
optuna_db_path = './checkpoints/ssvep/optuna/ssvep_PO8_OZ_PZ.db'

In [43]:
# Add this at the beginning of your notebook, after imports
def set_random_seeds(seed=42):
    """Set random seeds for reproducibility"""

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Call this function before creating datasets and models
set_random_seeds(42)

In [None]:
window_length = 128 * 3 # ensure divisble by 64 the kernel size
print(window_length)
stride = window_length // 3
batch_size = 64
eeg_channels = ["PO8", "OZ"]

dataset_train = EEGDataset(
    data_path,
    window_length=window_length,
    stride=stride,
    domain="time",
    data_fraction=0.5,
    hardcoded_mean=True,
    eeg_channels=eeg_channels,
)

dataset_val = EEGDataset(
    data_path=data_path,
    window_length=window_length,
    stride=stride,
    task='ssvep',
    split='validation',
    read_labels=True,
    hardcoded_mean=True,
    data_fraction=1
    eeg_channels=eeg_channels,
)

combined = ConcatDataset([dataset_train, dataset_val])
train_len = int(len(combined) * 0.8)
val_len = len(combined) - train_len
train_ds, val_ds = random_split(combined, [train_len, val_len])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size)

384
Using 50.0% of data: 1200/1200 samples
skipped: 1/1200
data shape: (12529, 2, 384), mean shape: (1, 2, 1)
skipped: 0/50
data shape: (514, 2, 384), mean shape: (1, 2, 1)


In [44]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(x.device)

        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

class DepthWiseConv2D(nn.Module):
    def __init__(self, in_channels, kernel_size, dim_mult=1, padding=0, bias=False):
        super(DepthWiseConv2D, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels * dim_mult, padding=padding, kernel_size=kernel_size, groups=in_channels, bias=bias)

    def forward(self, x: torch.Tensor):
        return self.depthwise(x)


class SeperableConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, bias=False):
        super(SeperableConv2D, self).__init__()
        self.depthwise = DepthWiseConv2D(in_channels, kernel_size, padding=padding)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out

class SSVEPClassifier(nn.Module):
    # EEG Net Based
    # todo look at this https://paperswithcode.com/paper/a-transformer-based-deep-neural-network-model
    def __init__(self, n_electrodes=16, out_dim=4, dropout=0.25, kernLength=256, F1=96, D=1, F2=96, hidden_dim=100, layer_dim=1):
        super().__init__()

        # B x C x T
        self.block_1 = nn.Sequential(
            nn.Conv2d(1, F1, (1, kernLength), padding='same', bias=False),
            nn.BatchNorm2d(F1),
            #
            DepthWiseConv2D(F1, (n_electrodes, 1), dim_mult=D, bias=False),
            nn.BatchNorm2d(F1*D),
            nn.ELU(),
            nn.MaxPool2d((1, 2)), # todo try making this max pool
            nn.Dropout(dropout),
            #
            SeperableConv2D(F1 * D, F2, kernel_size=(1, 16), padding='same', bias=False),
            nn.BatchNorm2d(F2),
            nn.ELU(),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout),
        )

        self.lstm_head = LSTMModel(F2, hidden_dim, layer_dim, out_dim)

    def forward(self, x: torch.Tensor):
        """expected input shape: BxCxT"""
        x = x.unsqueeze(1)
        y = self.block_1(x) # B x F1 x 1 x time_sub

        y = y.squeeze(2) # B x F1 x time_sub
        y = y.permute(0, 2, 1) # B x time_sub x F1
        y = self.lstm_head(y)

        return y

dummy_x = torch.randn(5, 2, 256).to(device)
model = SSVEPClassifier(
    n_electrodes=2,
    dropout=0.33066508963955576,
    kernLength=64,
    F1 = 128,
    D = 1,
    F2 = 256,
    hidden_dim=256,
    layer_dim=2,
).to(device)

model(dummy_x)

tensor([[ 0.0294, -0.0717, -0.0142,  0.0830],
        [-0.0014, -0.0591,  0.0052,  0.0621],
        [ 0.0201, -0.0940,  0.0026,  0.0807],
        [ 0.0282, -0.0920,  0.0106,  0.0707],
        [ 0.0430, -0.0657,  0.0302,  0.0347]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [12]:
try:
    model.load_state_dict(torch.load(model_path, weights_only=True))
except Exception:
    print("skipping model loading...")


opt = torch.optim.Adam(model.parameters(), lr=0.00030241790493218325)
criterion = nn.CrossEntropyLoss()
avg_losses = []
val_accuracies = []

epochs = 4000
for epoch in range(epochs):
    avg_loss = 0
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device).to(torch.int64)
        y_pred = model(x).to(device)

        loss = criterion(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        avg_loss += loss.item()

    avg_loss /= len(train_loader)
    avg_losses.append(avg_loss)

    if epoch % 5 == 0:
        evaluation = evaluate_model(model, val_loader, device)
        val_accuracies.append(evaluation)
        model.cpu()
        torch.save(model.state_dict(), model_path)
        model.to(device)
        print(f"epoch: {epoch}, avg_loss: {avg_loss}, val_evaluation: {evaluation}")

epoch: 0, avg_loss: 1.2615062517694908, val_evaluation: 0.44597126795752656
epoch: 5, avg_loss: 1.2056106163723634, val_evaluation: 0.4415990006246096
epoch: 10, avg_loss: 1.1648569112957115, val_evaluation: 0.46595877576514677


KeyboardInterrupt: 

In [48]:
class CustomTrainer(Trainer):
    # This method is called by _objective during an Optuna trial
    def prepare_trial_run(self):
        assert isinstance(self.trial, optuna.Trial), "Trial not set!"

        # 1. Define Hyperparameters for this trial
        #    a. Data/Loader parameters
        window_length = self.trial.suggest_categorical("window_length", [128, 256, 640]) # e.g. 64*2, 64*4, 64*10
        batch_size = self.trial.suggest_categorical("batch_size", [32, 64])

        #    b. Model architecture parameters
        kernLength = self.trial.suggest_categorical("kernLength", [64, 128, 256])
        F1 = self.trial.suggest_categorical("F1", [8, 16, 32])
        D = self.trial.suggest_categorical("D", [1, 2, 3])
        F2 = self.trial.suggest_categorical("F2", [16, 32, 64]) # F2 must be F1 * D
        hidden_dim = self.trial.suggest_categorical("hidden_dim", [64, 128, 256])
        layer_dim = self.trial.suggest_categorical("layer_dim", [1, 2, 3])
        dropout = self.trial.suggest_float("dropout", 0.1, 0.6)
        
        #    c. Optimizer parameters
        lr = self.trial.suggest_float("lr", 1e-4, 1e-2, log=True)

        # 2. Prepare the data using these parameters
        super()._prepare_data(is_trial=True, batch_size=batch_size, window_length=window_length)
        
        assert self.dataset is not None, "Dataset was not created correctly"
        n_electrodes = self.dataset.datasets[0].data[0].shape[0] # Get shape from underlying dataset

        # 3. Build the model and optimizer
        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes, # Use value from data
            dropout=dropout,
            kernLength=kernLength,
            F1=F1,
            D=D,
            F2=F1 * D, # F2 is dependent on F1 and D
            hidden_dim=hidden_dim,
            layer_dim=layer_dim,
        ).to(self.device)
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    # This method is called by train() for the final run
    def prepare_final_run(self):
        # 1. Get the best hyperparameters from the completed study
        study = self._get_study()
        best_params = study.best_params
        
        # 2. Prepare data using the best params
        self.data_fraction = 1
        super()._prepare_data(is_trial=False) # is_trial=False handles getting params from study
        
        assert self.dataset is not None, "Dataset was not created correctly"
        n_electrodes = self.dataset.datasets[0].data[0].shape[0]

        # 3. Build the final model and optimizer
        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes,
            dropout=best_params["dropout"],
            kernLength=best_params["kernLength"],
            F1=best_params["F1"],
            D=best_params["D"],
            F2=best_params["F1"] * best_params["D"],
            hidden_dim=best_params["hidden_dim"],
            layer_dim=best_params["layer_dim"],
        ).to(self.device)
        
        # Optional: Load pre-existing weights if you are resuming
        try:
            self.model.load_state_dict(torch.load(self.model_path))
            print(f"Loaded existing model weights from {self.model_path}")
        except Exception:
            print(f"No existing model weights found at {self.model_path}. Training from scratch.")
        
        lr = best_params["lr"]
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            mode='max',        # “min” if you want to reduce lr when the quantity monitored has stopped decreasing
            factor=0.5,        # new_lr = lr * factor
            patience=20,        # number of epochs with no improvement after which lr will be reduced
            threshold=1e-4,    # threshold for measuring the new optimum, to only focus on significant changes
            threshold_mode='rel', # `'rel'` means compare change relative to best value. Could use `'abs'`.
            cooldown=0,        # epochs to wait before resuming normal operation after lr has been reduced
            min_lr=1e-6,       # lower bound on the lr
        )

trainer = CustomTrainer(
        data_path=data_path,
        optuna_db_path=optuna_db_path,
        model_path=model_path,
        train_epochs=5000, # Final training epochs
        tune_epochs=50,   # Epochs per trial
        optuna_n_trials=50,
        data_fraction=0.5
    )

In [49]:
delete_existing = False
trainer.optimize(delete_existing)

[I 2025-06-23 22:27:47,936] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=1163, Val batches=291


[I 2025-06-23 22:34:18,694] Trial 1 finished with value: 0.4591891601247446 and parameters: {'window_length': 128, 'batch_size': 32, 'kernLength': 128, 'F1': 8, 'D': 3, 'F2': 32, 'hidden_dim': 128, 'layer_dim': 3, 'dropout': 0.1976025314801271, 'lr': 0.00020513510803189754}. Best is trial 1 with value: 0.4591891601247446.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 22:38:47,520] Trial 2 finished with value: 0.5167222281965803 and parameters: {'window_length': 128, 'batch_size': 64, 'kernLength': 256, 'F1': 8, 'D': 3, 'F2': 64, 'hidden_dim': 256, 'layer_dim': 1, 'dropout': 0.5556690158670263, 'lr': 0.0012244266468301728}. Best is trial 2 with value: 0.5167222281965803.


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=175, Val batches=44


[I 2025-06-23 22:40:01,939] Trial 3 finished with value: 0.2782546494992847 and parameters: {'window_length': 640, 'batch_size': 32, 'kernLength': 128, 'F1': 8, 'D': 2, 'F2': 16, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.5873602876657222, 'lr': 0.0005817555816884812}. Best is trial 2 with value: 0.5167222281965803.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 22:50:02,324] Trial 4 finished with value: 0.5510269921496935 and parameters: {'window_length': 128, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 3, 'F2': 32, 'hidden_dim': 256, 'layer_dim': 3, 'dropout': 0.36014646816149276, 'lr': 0.00018035013974104623}. Best is trial 4 with value: 0.5510269921496935.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 22:51:48,598] Trial 5 finished with value: 0.5060776063581113 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 16, 'D': 1, 'F2': 16, 'hidden_dim': 64, 'layer_dim': 2, 'dropout': 0.3643607251146025, 'lr': 0.0006701883191789286}. Best is trial 4 with value: 0.5510269921496935.


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=88, Val batches=22


[I 2025-06-23 22:52:20,807] Trial 6 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 22:55:41,490] Trial 7 finished with value: 0.6884057971014492 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 1, 'F2': 32, 'hidden_dim': 256, 'layer_dim': 1, 'dropout': 0.20835163861055508, 'lr': 0.0010227777802267056}. Best is trial 7 with value: 0.6884057971014492.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 22:55:56,564] Trial 8 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 22:56:26,787] Trial 9 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=175, Val batches=44


[I 2025-06-23 22:56:41,178] Trial 10 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=535, Val batches=134


[I 2025-06-23 22:56:58,409] Trial 11 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:02:05,376] Trial 12 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:19:14,669] Trial 13 finished with value: 0.6465211313044413 and parameters: {'window_length': 128, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 32, 'hidden_dim': 256, 'layer_dim': 3, 'dropout': 0.2638979625805211, 'lr': 0.0003655977026355225}. Best is trial 7 with value: 0.6884057971014492.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:19:44,350] Trial 14 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:20:19,579] Trial 15 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:20:45,102] Trial 16 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:33:32,774] Trial 17 finished with value: 0.7183263207106124 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 32, 'hidden_dim': 256, 'layer_dim': 3, 'dropout': 0.26211635308091535, 'lr': 0.0003746351873334935}. Best is trial 17 with value: 0.7183263207106124.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:33:52,131] Trial 18 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=535, Val batches=134


[I 2025-06-23 23:34:11,879] Trial 19 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:38:09,081] Trial 20 finished with value: 0.5542309490416082 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.18400095243841755, 'lr': 0.0035220886319312214}. Best is trial 17 with value: 0.7183263207106124.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:38:54,092] Trial 21 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:39:25,432] Trial 22 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:40:01,956] Trial 23 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=88, Val batches=22


[I 2025-06-23 23:40:30,773] Trial 24 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:41:00,106] Trial 25 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:41:25,802] Trial 26 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=535, Val batches=134


[I 2025-06-23 23:42:00,334] Trial 27 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:42:54,420] Trial 28 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=88, Val batches=22


[I 2025-06-23 23:43:11,535] Trial 29 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=1163, Val batches=291


[I 2025-06-23 23:43:36,942] Trial 30 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:43:59,833] Trial 31 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:47:39,186] Trial 32 finished with value: 0.5780738662926601 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.18794369549565948, 'lr': 0.004416371805829308}. Best is trial 17 with value: 0.7183263207106124.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:48:03,498] Trial 33 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:48:24,003] Trial 34 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:52:14,943] Trial 35 finished with value: 0.6187470780738663 and parameters: {'window_length': 256, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 64, 'hidden_dim': 64, 'layer_dim': 1, 'dropout': 0.18946921713370796, 'lr': 0.0010898602411002927}. Best is trial 17 with value: 0.7183263207106124.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:52:39,192] Trial 36 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:53:02,454] Trial 37 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=175, Val batches=44


[I 2025-06-23 23:53:20,478] Trial 38 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:53:49,309] Trial 39 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-23 23:54:11,981] Trial 40 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:54:29,524] Trial 41 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:54:48,270] Trial 42 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:55:07,194] Trial 43 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:55:26,070] Trial 44 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:55:47,010] Trial 45 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=268, Val batches=67


[I 2025-06-23 23:56:06,773] Trial 46 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 50/1200
data shape: (6718, 3, 640), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (271, 3, 640), mean shape: (1, 3, 1)
Data prepared: Train batches=88, Val batches=22


[I 2025-06-23 23:56:30,520] Trial 47 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (20548, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=535, Val batches=134


[I 2025-06-23 23:56:51,052] Trial 48 pruned. 


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-24 00:07:24,200] Trial 49 finished with value: 0.652328207334122 and parameters: {'window_length': 128, 'batch_size': 64, 'kernLength': 256, 'F1': 32, 'D': 3, 'F2': 32, 'hidden_dim': 256, 'layer_dim': 2, 'dropout': 0.19613411677778766, 'lr': 0.0003771308018418921}. Best is trial 17 with value: 0.7183263207106124.


Using 50.0% of data: 1200/1200 samples
skipped: 0/1200
data shape: (44658, 3, 128), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (1834, 3, 128), mean shape: (1, 3, 1)
Data prepared: Train batches=582, Val batches=146


[I 2025-06-24 00:07:50,489] Trial 50 pruned. 



--- Optimization Finished ---
Study statistics: 
  Number of finished trials: 51
  Number of pruned trials: 38
  Number of complete trials: 12
Best trial:
  Value: 0.7183263207106124
  Best hyperparameters: 
    window_length: 256
    batch_size: 64
    kernLength: 256
    F1: 32
    D: 3
    F2: 32
    hidden_dim: 256
    layer_dim: 3
    dropout: 0.26211635308091535
    lr: 0.0003746351873334935


{'window_length': 256,
 'batch_size': 64,
 'kernLength': 256,
 'F1': 32,
 'D': 3,
 'F2': 32,
 'hidden_dim': 256,
 'layer_dim': 3,
 'dropout': 0.26211635308091535,
 'lr': 0.0003746351873334935}

In [50]:
# manual_write_study_params(trainer.study_name, trainer.storage)
trainer.train()

[I 2025-06-24 00:07:50,817] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[I 2025-06-24 00:07:50,859] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.


skipped: 0/2400
data shape: (41141, 3, 256), mean shape: (1, 3, 1)
skipped: 0/50
data shape: (842, 3, 256), mean shape: (1, 3, 1)
Data prepared: Train batches=525, Val batches=132
No existing model weights found at ./checkpoints/ssvep/models/ssvep_PO8_OZ_PZ.pth. Training from scratch.
Epoch 0/5000, Validation Accuracy: 0.2892, Avg Loss: 1.3848, lr: 0.0003746351873334935
Model saved to ./checkpoints/ssvep/models/ssvep_PO8_OZ_PZ.pth
Epoch 1/5000, Validation Accuracy: 0.2870, Avg Loss: 1.3804, lr: 0.0003746351873334935
Epoch 2/5000, Validation Accuracy: 0.3019, Avg Loss: 1.3759, lr: 0.0003746351873334935
Epoch 3/5000, Validation Accuracy: 0.3176, Avg Loss: 1.3714, lr: 0.0003746351873334935
Epoch 4/5000, Validation Accuracy: 0.3187, Avg Loss: 1.3631, lr: 0.0003746351873334935
Epoch 5/5000, Validation Accuracy: 0.3355, Avg Loss: 1.3536, lr: 0.0003746351873334935
Model saved to ./checkpoints/ssvep/models/ssvep_PO8_OZ_PZ.pth
Epoch 6/5000, Validation Accuracy: 0.3417, Avg Loss: 1.3422, lr: 0.0

KeyboardInterrupt: 

In [None]:
trainer._prepare_training(False)
trainer.model.eval()
f"test accuracy: {evaluate_model(trainer.model, trainer.eval_loader, device)}"

lr: 1.0241790493218325e-05
loaded model weights


'test accuracy: 0.74484375'