In [1]:
%load_ext autoreload
%autoreload 2
import os
if not os.path.exists('./modules') and not os.path.exists('modules.zip'):
    from google.colab import files
    uploaded = files.upload()
if not os.path.exists('./modules') and os.path.exists('modules.zip'):
    os.system('unzip modules.zip -d .')

!pip3 install optuna
import torch
import torch.nn as nn
import optuna
from modules import Trainer
from modules.competition_dataset import EEGDataset
from modules.utils import split_and_get_loaders, evaluate_model, get_closest_divisor
import matplotlib.pyplot as plt
import random
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


  from .autonotebook import tqdm as notebook_tqdm


device(type='cpu')

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')
# data_path = '/content/drive/MyDrive/ai_data/eeg_detection/data/mtcaic3'
# model_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/models/ssvep.pth'
# optuna_db_path = '/content/drive/MyDrive/ai_data/eeg_detection/checkpoints/ssvep/optuna/optuna_studies.db'
data_path = './data/mtcaic3'
model_path = './checkpoints/ssvep/models/75_lstm.pth'
optuna_db_path = './checkpoints/ssvep/optuna/2_optuna_studies.db'

In [3]:
# Add this at the beginning of your notebook, after imports
def set_random_seeds(seed=42):
    """Set random seeds for reproducibility"""

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Call this function before creating datasets and models
set_random_seeds(42)

In [4]:
class LConvNet(nn.Module):
    """
    PyTorch implementation of the LConvNet model described in the paper:
    "Enhancing EEG signals classification using LSTM-CNN architecture" (Omar et al., 2024).

    The architecture is based on the specifications in Table 1 of the paper.
    There are some inconsistencies between the paper's text/diagrams and Table 1.
    This implementation follows Table 1, as the parameter counts confirm its structure.
    For example, the final Conv2D layer has 68 filters (not 64), and the Dense/LSTM layers
    use 32 units (not 64).
    """
    def __init__(self, n_channels=25, n_samples=256, n_classes=1, dropout_rate=0.5, lstm_units=32, dense_units=32):
        super(LConvNet, self).__init__()

        self.n_channels = n_channels
        self.n_samples = n_samples
        self.n_classes = n_classes # Binary classification (epileptic vs healthy)

        # --- CNN Feature Extractor ---
        # This block corresponds to layers 1, 2, and 3 in Table 1.
        # It processes the input EEG data to extract spatial features.
        self.cnn_block = nn.Sequential(
            # Input shape for Conv2D: (B, 1, n_channels, n_samples)
            # Layer 1
            nn.Conv2d(1, 16, kernel_size=(3, 3), padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            # Layer 2
            nn.Conv2d(16, 32, kernel_size=(5, 5), padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            # Layer 3 - Note: Table 1's parameter count confirms 68 filters, not 64.
            nn.Conv2d(32, 68, kernel_size=(7, 7), padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            
            # Layer 3 also includes dropout
            nn.Dropout(dropout_rate)
        )

        # --- Time-Distributed Dense Layer ---
        # Corresponds to layer 5 in Table 1.
        # The output of the CNN block is flattened and fed into a dense layer,
        # applied to each "time step" from the CNN's output.
        # The input features are calculated from the CNN output shape: 68 filters * 3 * 32 = 6912.
        # Let's re-calculate the flattened dimension from the CNN output.
        # After MaxPool3: H_out = floor((H_in-2)/2 + 1), W_out = floor((W_in-2)/2 + 1)
        # H1 = floor(25/2) = 12, W1 = floor(256/2) = 128
        # H2 = floor(12/2) = 6, W2 = floor(128/2) = 64
        # H3 = floor(6/2) = 3, W3 = floor(64/2) = 32
        # So the flattened dimension is 68 * 3 * 32 = 6528. The paper's param count for layer 5 (69664)
        # implies an input of 2176, which is 68 * 32. This suggests they permuted the dimensions
        # so that the height (3) became the time dimension.
        # Input to this layer will be (B, 3, 6528/3) = (B, 3, 2176)
        self.time_distributed_dense = nn.Linear(68 * 32, dense_units)

        # --- LSTM for Temporal Dependencies ---
        # Corresponds to layer 6 in Table 1. It processes the sequence from the
        # Time-Distributed Dense layer. Table 1 parameter count confirms 32 units.
        self.lstm = nn.LSTM(
            input_size=dense_units,
            hidden_size=lstm_units,
            num_layers=1,
            batch_first=True # This is important for matching tensor shapes
        )

        # --- Global Average Pooling Branch ---
        # Corresponds to layer 7 in Table 1. This branch takes the original input
        # and computes the average over the time dimension.
        # It provides a summary of the spatial information.
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)

        # --- Final Classifier ---
        # Corresponds to layer 8 in Table 1. It concatenates the features from the
        # LSTM branch and the GAP branch and makes the final prediction.
        # Input features = lstm_units + n_channels = 32 + 25 = 57.
        self.classifier = nn.Sequential(
            nn.Linear(lstm_units + n_channels, self.n_classes),
            nn.Sigmoid() # Sigmoid for binary classification
        )

    def forward(self, x: torch.Tensor):
        """
        Expected input shape: (B, C, T) -> (Batch, Channels, Samples)
        Example from paper: (B, 25, 256)
        """
        # Keep a reference to the original input for the GAP branch
        gap_input = x

        # 1. CNN Block
        # Add a channel dimension for Conv2D: (B, C, T) -> (B, 1, C, T)
        x = x.unsqueeze(1)
        x = self.cnn_block(x) # Output shape: (B, 68, 3, 32)

        # 2. Time-Distributed Dense
        # Permute to make the height dimension (3) the time dimension for the sequence
        x = x.permute(0, 2, 1, 3) # (B, 3, 68, 32)
        # Flatten the spatial features for the dense layer
        x = x.reshape(x.size(0), x.size(1), -1) # (B, 3, 68 * 32) -> (B, 3, 2176)
        x = self.time_distributed_dense(x) # (B, 3, 32)
        
        # 3. LSTM Block
        # We only need the final hidden state for classification
        _, (hn, _) = self.lstm(x)
        lstm_features = hn.squeeze(0) # Shape: (B, 32)

        # 4. Global Average Pooling Block
        # Input shape: (B, 25, 256)
        pooled_features = self.global_avg_pool(gap_input) # Output shape: (B, 25, 1)
        pooled_features = pooled_features.squeeze(2) # Output shape: (B, 25)

        # 5. Concatenate and Classify
        combined_features = torch.cat((lstm_features, pooled_features), dim=1) # Shape: (B, 57)
        output = self.classifier(combined_features) # Shape: (B, 1)

        return output


dummy_x = torch.randn(5, 8, 256)
model = LConvNet(n_channels=8, n_classes=4).to(device)
model(dummy_x)

tensor([[0.4964, 0.4826, 0.4628, 0.5279],
        [0.4981, 0.4752, 0.4634, 0.5338],
        [0.5023, 0.4834, 0.4610, 0.5220],
        [0.4969, 0.4791, 0.4632, 0.5333],
        [0.4995, 0.4798, 0.4620, 0.5322]], grad_fn=<SigmoidBackward0>)

In [5]:
window_length = 256
stride = 128
learning_rate = 1e-4
batch_size = 64
opt = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
dataset = EEGDataset(data_path, window_length=window_length, stride=stride, split='train')
train_loader, val_loader, test_loader = split_and_get_loaders(dataset, batch_size)

mean: [[[  18.94653916]
  [  -4.79315914]
  [-155.33400078]
  ...
  [  -0.85366908]
  [ -21.21283221]
  [ -10.88007821]]

 [[   2.88797945]
  [  14.03066089]
  [ 158.92097775]
  ...
  [   0.64594806]
  [   9.30438465]
  [   4.13539372]]

 [[ -27.57951229]
  [ -16.84199493]
  [ -86.29044736]
  ...
  [   1.26474958]
  [   7.93034018]
  [   4.70334708]]

 ...

 [[   5.58468604]
  [ -38.77063466]
  [ -24.45822144]
  ...
  [ -12.18208337]
  [ -28.22180953]
  [  -9.72892152]]

 [[  -3.73300704]
  [  38.75802838]
  [  27.31917901]
  ...
  [  10.76662178]
  [  27.31073366]
  [   9.00676603]]

 [[   1.75415838]
  [ -36.39651433]
  [ -30.74731076]
  ...
  [  -9.18116839]
  [ -25.04968969]
  [  -8.3511859 ]]], std: [[[ 534.64056483]
  [ 247.66103768]
  [2263.63560173]
  ...
  [  22.00025365]
  [ 346.158536  ]
  [ 172.03318645]]

 [[ 562.32708628]
  [ 236.51261777]
  [2192.90329526]
  ...
  [  18.76510161]
  [ 333.84927349]
  [ 164.23415602]]

 [[ 562.43575935]
  [ 239.90310922]
  [2155.81437682]


In [47]:
criterion = nn.CrossEntropyLoss()
avg_losses = []
val_accuracies = []

epochs = 200
for epoch in range(epochs):
    avg_loss = 0
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x).to(device)


        loss = criterion(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        avg_loss += loss.item()

    avg_loss /= len(train_loader)
    avg_losses.append(avg_loss)

    evaluation = evaluate_model(model, val_loader, device)
    val_accuracies.append(evaluation)
    print(f'epoch: {epoch}, avg_loss: {avg_loss}, val_evaluation: {evaluation}')

epoch: 0, avg_loss: 1.3845494298169212, val_evaluation: 0.26424632352941174
epoch: 1, avg_loss: 1.3834319915214595, val_evaluation: 0.2686121323529412
epoch: 2, avg_loss: 1.381959608436501, val_evaluation: 0.27090992647058826


KeyboardInterrupt: 

In [10]:
# maxpool
plt.plot(range(len(avg_losses)), avg_losses, "b-", label="trainingg loss")
plt.plot(range(len(val_accuracies)), val_accuracies, "r-", label="validation accuracies")
plt.legend()
print(f"min avg_losses: {min(avg_losses)}")
print(f"max val_accuracies: {max(val_accuracies)}")

NameError: name 'avg_losses' is not defined

In [11]:
batch_size = 64
window_length = 175
stride_factor=3

In [12]:
class CustomTrainer(Trainer):
    def _prepare_training(self, is_trial, do_not_modify_network=True, batch_size=batch_size, window_length=window_length, stride_factor=stride_factor):
        super()._prepare_training(is_trial, do_not_modify_network, batch_size=batch_size,
                                  window_length=window_length,
                                  stride_factor=stride_factor)
        assert self.dataset is not None

        if is_trial:
            assert isinstance(self.trial, optuna.Trial), "trial is none, cant' suggest params"

            if do_not_modify_network:
                best_params = self._get_study().best_params if do_not_modify_network else None
                assert best_params is not None, "best_params is None, can't use them"

                kernLength = best_params["kernLength"]
                F1 = best_params["F1"]
                D = best_params["D"]
                F2 = best_params["F2"]
                hidden_dim = best_params["hidden_dim"]
                layer_dim = best_params["layer_dim"]

            else:
                kernLength = self.trial.suggest_categorical("kernLength", [128, 256, 512])
                F1 = self.trial.suggest_categorical("F1", [64, 96, 128])
                D = self.trial.suggest_categorical("D", [1, 2, 3])
                F2 = self.trial.suggest_categorical("F2", [64, 96, 128])
                hidden_dim = self.trial.suggest_categorical("hidden_dim", [64, 128, 256])
                layer_dim = self.trial.suggest_categorical("layer_dim", [1, 2, 3, 4])

            dropout = self.trial.suggest_float("dropout", 0, 0.5)
            lr = self.trial.suggest_float("lr", 3e-4, 3e-2, log=True)

        else:
            # best_params = self._get_study().best_params
            # kernLength = best_params["kernLength"]
            # F1 = best_params["F1"]
            # D = best_params["D"]
            # F2 = best_params["F2"]
            # hidden_dim = best_params["hidden_dim"]
            # layer_dim = best_params["layer_dim"]
            # dropout = best_params["dropout"]
            # lr = best_params["lr"]
            dropout=0.33066508963955576
            kernLength=256
            F1 = 128
            D = 2
            F2 = F1 # OOPS 96
            hidden_dim=256
            layer_dim=3
            lr = 0.000010241790493218325

        n_samples = self.dataset.data[0].shape[1]  # data[x] shape CxT
        n_electrodes = self.dataset.data[0].shape[0]

        n_samples = self.dataset.data[0].shape[1]  # data[x] shape CxT
        n_electrodes = self.dataset.data[0].shape[0]

        self.model = SSVEPClassifier(
            n_electrodes=n_electrodes, n_samples=n_samples, out_dim=4, dropout=dropout, kernLength=kernLength, F1=F1, D=D, F2=F2, hidden_dim=hidden_dim, layer_dim=layer_dim
        )
        print(f"lr: {lr}")
        try:
            self.model.load_state_dict(torch.load(model_path))
            print(f"loaded model weights")
        except Exception:
            print(f"no model weights found at {model_path}")
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)

trainer = CustomTrainer(data_path, optuna_db_path, model_path, train_epochs=10000, optuna_n_trials=35)

In [7]:
delete_existing = False
trainer.optimize(delete_existing)

[I 2025-06-20 18:42:30,753] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[I 2025-06-20 18:42:30,815] Using an existing study with name 'ssvep_classifier_optimization' instead of creating a new one.
[W 2025-06-20 18:42:30,839] Trial 0 failed with parameters: {} because of the following error: ValueError('Record does not exist.').
Traceback (most recent call last):
  File "/home/zeyadcode/.pyenv/versions/icmtc_venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/home/zeyadcode/Workspace/ai_projects/eeg_detection/modules/trainer.py", line 101, in _objective
    self._prepare_training(True)
  File "/tmp/ipykernel_306525/3387801006.py", line 3, in _prepare_training
    super()._prepare_training(is_trial, do_not_modify_network, batch_size=batch_size,
  File "/home/zeyadcode/Workspace/ai_projects/eeg_detection/modules/trainer.py", li

ValueError: Record does not exist.

In [None]:
# manual_write_study_params(trainer.study_name, trainer.storage)
trainer.train()

In [14]:
trainer._prepare_training(False)
trainer.model.eval()
f"test accuracy: {evaluate_model(trainer.model, trainer.test_loader, device)}"

lr: 1.0241790493218325e-05
loaded model weights


'test accuracy: 0.74484375'

In [6]:
# %%
from torch.utils.data import Dataset
import os
import numpy as np
import pandas as pd
import torch
from scipy.fft import fft, rfft
from scipy import signal
from numpy.lib.stride_tricks import sliding_window_view


LABELS = ['Backward', 'Forward', 'Left', 'Right']
LABEL_TO_IDX = {lbl: i for i, lbl in enumerate(LABELS)}
IDX_TO_LABEL = {idx: label for idx, label in enumerate(LABELS)}

# Precompute filter once
_SFREQ    = 256
_LOW, _HI = 3, 100
_NYQ      = _SFREQ / 2.0
_B, _A    = signal.butter(4, [_LOW/_NYQ, _HI/_NYQ], btype='bandpass')


In [13]:
fn = "./data/mtcaic3/SSVEP/validation/S35/1/EEGdata.csv"
torch.set_printoptions(sci_mode=False, precision=6)
#correct is backward, forwaard, left, right

def avg_refrencing(data: np.ndarray):
    return data - data.mean(axis=2, keepdims=True)

def band_pass_filter(data: np.ndarray):
    return signal.filtfilt(_B, _A, data, axis=2)

def normalize(data: np.ndarray):
    mean = data.mean(axis=2, keepdims=True)
    std  = data.std(axis=2, keepdims=True) + 1e-6
    data = (data - mean) / std
    return data

cols = ['FZ','C3','CZ','C4','PZ','PO7','OZ','PO8']

df = pd.read_csv(fn, usecols=cols)
df_valid = pd.read_csv(fn, usecols=['Validation'])
last175 = df

i = 7
tensor = torch.tensor(last175.values, dtype=torch.float32)[-1750 * i:-1750 * (i - 1), :].view(10, 175, 8).permute(0, 2, 1)
valid_tensor = torch.tensor(df_valid.values, dtype=torch.float32)[-1750 * i:-1750 * (i - 1), :].view(10, 175, 1)
print((valid_tensor == 0).sum())
print(tensor[-1][:, 0])
data_array = avg_refrencing(tensor.numpy())
data_array = band_pass_filter(data_array)
data_array = normalize(data_array)

tensor = torch.from_numpy(data_array.copy()).to(torch.float32)
print(tensor.shape)
trainer.model.eval()
print(torch.softmax(trainer.model(tensor), dim=1))
print(torch.argmax(trainer.model(tensor), dim=1))

tensor(0)
tensor([285661.468750, 299282.437500, 351748.000000, 387430.937500,
        429527.781250, 281785.593750, 272474.312500, 271807.593750])
torch.Size([10, 8, 175])


AttributeError: 'NoneType' object has no attribute 'eval'

In [98]:
# 1. Your target tensor (shape [8,175])
target = tensor[0]  

# 2. Grab the underlying Dataset
ds = trainer.train_loader.dataset

best_dist = float('inf')
best_label = None
best_data = None

# 3. Iterate over every sample in the Dataset
for i in range(len(ds)):
    item = ds[i]
    # unpack data & label
    if isinstance(item, (tuple, list)):
        data_i, label_i = item[0], item[1]
    elif isinstance(item, dict):
        # adjust key names if needed
        data_i, label_i = item.get('eeg', item.get('data')), item['label']
    else:
        continue

    # convert numpy → torch if necessary
    if isinstance(data_i, np.ndarray):
        data_i = torch.from_numpy(data_i)

    # ensure same shape
    data_i = data_i.view_as(target)

    # compute Euclidean distance
    dist = torch.norm(data_i - target).item()

    if dist < best_dist:
        best_dist = dist
        best_label = label_i
        best_data = data_i

print(best_data)
# 4. Print the label of the most similar sample
print(best_label)


tensor([[ 0.014051,  0.760070, -0.390054,  ...,  1.452694,  0.092904,
          0.152871],
        [ 0.080737, -1.386883, -1.813527,  ..., -2.237866, -1.017529,
          0.188362],
        [ 0.061857,  0.087862, -1.565250,  ...,  0.266991, -0.599239,
          0.293950],
        ...,
        [ 0.075408, -1.377178, -1.511927,  ..., -2.300731, -0.829118,
          0.133468],
        [ 0.075436, -1.367463, -1.623659,  ..., -2.319957, -0.983773,
          0.143160],
        [ 0.112787, -1.282061, -2.147263,  ..., -2.190778, -1.087937,
          0.217843]])
tensor(0)


In [100]:
tensor[0]

tensor([[     7.525880,    366.709625,   -187.037323,  ...,
            700.185608,     45.491299,     74.363281],
        [    -1.113108,   -418.527405,   -539.871704,  ...,
           -660.560547,   -313.477386,     29.497141],
        [     4.467845,     13.405240,   -554.742981,  ...,
             74.969322,   -222.740326,     84.234581],
        ...,
        [    -0.338264,   -185.773224,   -202.974960,  ...,
           -303.672516,   -115.808647,      7.073462],
        [    -1.511939,   -404.904968,   -476.529968,  ...,
           -671.194824,   -297.636383,     17.421761],
        [     1.490031,   -154.565750,   -251.364700,  ...,
           -256.233124,   -132.847198,     13.243663]])