In [1]:
import os

import numpy as np
import pandas as pd
import mne
import torch
import torch.nn as nn

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

from braindecode.models import EEGNetv4
from skorch import NeuralNetClassifier

from moabb.datasets.base import BaseDataset
from moabb.paradigms import SSVEP
from moabb.pipelines import SSVEP_CCA
from moabb.pipelines.features import StandardScaler_Epoch
from moabb.evaluations import WithinSessionEvaluation


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
n_subjects = 30
n_sessions = 8
start_time = 1
end_time = 6
split = "train"

label_to_freq = {"Left": "10", "Right": "13", "Forward": "7", "Backward": "8"}
event_mapping = {"10": 0, "13": 1, "7": 2, "8": 3}
event_mapping_decoder = np.vectorize({0: "10", 1: "13", 2: "7", 3: "8"}.get)


class CompetitionDataset(BaseDataset):
    def __init__(self):
        super().__init__(
            subjects=list(range(1, n_subjects + 1)),
            sessions_per_subject=n_sessions,
            events=event_mapping,
            code="Competition",
            interval=[start_time, end_time],
            paradigm="ssvep",
        )

        self.base_path = "./data/mtcaic3/SSVEP"
        self.metadata_path = os.path.join('./data/mtcaic3', f"{split}.csv")

    def data_path(self, subject, path=None, force_update=False, update_path=None, verbose=None):  # type: ignore
        """Return list of CSV file paths for this subject."""
        subject_dir = os.path.join(self.base_path, split, f"S{int(subject)}")
        csv_files = []

        # Collect all 8 session files for this subject
        for session in range(1, n_sessions + 1):
            csv_file = os.path.join(subject_dir, str(session), "EEGdata.csv")
            if os.path.exists(csv_file):
                csv_files.append(csv_file)
            else:
                print(f"Warning: {csv_file} does not exist for subject {subject}, session {session}")

        return csv_files

    def _get_single_subject_data(self, subject):  # type: ignore
        """Load and process data for a single subject."""
        csv_files = self.data_path(subject)
        sessions = {}

        for session_idx, csv_file in enumerate(csv_files):
            # Load CSV data
            # todo neglect invalid cols
            eeg_columns = ["FZ", "C3", "CZ", "C4", "PZ", "PO7", "OZ", "PO8"]
            df = pd.read_csv(csv_file, usecols=eeg_columns + ["Validation"])

            mask = df['Validation'] == 1
            eeg_data = df[eeg_columns].values.T  # Shape: (n_channels, n_timepoints)

            # Create channel info
            ch_names = eeg_columns + ["stim"]
            ch_types = ["eeg"] * len(eeg_columns) + ["stim"]
            sfreq = 250  # Your sampling frequency

            info = mne.create_info(ch_names=ch_names, ch_types=ch_types, sfreq=sfreq)  # type: ignore

            # Create stimulus channel from event labels
            # Assuming you have 10 trials of 1750 samples each
            stim_data = np.zeros(len(df))

            for trial in range(10):
                trial_start = trial * 1750
                if trial_start < len(df):
                    trial_label = self._get_trial_label(subject, session_idx + 1, trial + 1)
                    stim_data[trial_start] = event_mapping.get(trial_label)

            # Combine EEG and stimulus data
            eeg_data = eeg_data[:, mask]
            full_data = np.vstack([eeg_data, stim_data[np.newaxis, :]])

            # Create Raw object
            raw = mne.io.RawArray(data=full_data, info=info, verbose=False)

            # Store in sessions dictionary
            session_name = str(session_idx)
            if session_name not in sessions:
                sessions[session_name] = {}
            sessions[session_name]["0"] = raw  # Single run per session

        return sessions

    def _get_trial_label(self, subject_id, session_id, trial_idx):
        """Extract the event label for a specific trial."""
        metadata_df = self._load_metadata()

        trial_number = trial_idx
        subject_str = f"S{subject_id}"

        # Filter the metadata for this specific trial
        trial_row = metadata_df[
            (metadata_df["subject_id"] == subject_str) & (metadata_df["trial_session"] == session_id) & (metadata_df["trial"] == trial_number) & (metadata_df["task"] == "SSVEP")  # Extra safety filter
        ]
        
        label = trial_row.iloc[0]["label"]
        return label_to_freq.get(label)

    def _load_metadata(self):
        """Load the metadata CSV file once and cache it."""
        if not hasattr(self, "_metadata_df"):
            self._metadata_df = pd.read_csv(self.metadata_path)
            self._metadata_df = self._metadata_df[self._metadata_df["task"] == "SSVEP"]

        return self._metadata_df


dataset = CompetitionDataset()
dataset._get_single_subject_data(1)

The dataset class name 'CompetitionDataset' must be an abbreviation of its code 'Competition'. See moabb.datasets.base.is_abbrev for more information.


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 17495 and the array at index 1 has size 17500

In [None]:
paradigm = SSVEP(n_classes=4, tmin=start_time, tmax=end_time)

# # Get the data
X, y, metadata = paradigm.get_data(dataset, subjects=list(range(1, 5)))  # , subjects=[1, 2])
freqs = paradigm.used_events(dataset)
interval = [paradigm.tmin, paradigm.tmax]
# Create label encoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

Choosing the first 4 classes from all possible events
 '10': 0
 '13': 0
 '7': 1
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 0
 '7': 1
 '8': 4>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 1>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 1
 '7': 1
 '8': 5>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 0
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 1>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 2
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 0
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 1
 '7': 1
 '8': 4>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 5
 '7': 2
 '8': 0>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 1
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 0
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 2
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7'

In [None]:
# CCA 62.2%
cca_clf = Pipeline([    
    ('standardize', StandardScaler_Epoch()),  
    ("classifier", SSVEP_CCA(interval=interval, freqs=freqs, n_harmonics=4)),
    ])

# custom_splitter = WithinSessionSplitter(n_folds=2)
evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=[dataset], cv=custom_splitter)
# pipeline = {"SSVEP_CCA": Pipeline([("classifier", cca_clf)])}

# results = evaluation.process(pipeline)

cca_clf.fit(X, y_encoded)
y_pred = cca_clf.predict(X)

In [None]:
# Assuming X and y_encoded are already available from your MOABB data loading
# X.shape: ((191, 8, 1251), y_encoded.shape: (191,))

# Convert X to float32 tensor
X_tensor = torch.tensor(X, dtype=torch.float32)
mue = X_tensor.mean(dim=(0, 2), keepdim=True)
std = (X_tensor.std(dim=(0, 2), keepdim=True) + 1e-8)
X_tensor = (X_tensor - mue) / std
print(X_tensor.shape)

# Ensure y_encoded is a 1D array of integers (already done by LabelEncoder, but good to confirm)
# No need for y_reshaped = np.repeat(y_encoded[:, None], 9, axis=1)
y_target = torch.tensor(y_encoded, dtype=torch.int64)  # Use int64 for PyTorch labels

# Create EEGNet model
model = EEGNetv4(
    n_chans=8,  # Your number of channels
    n_classes=4,  # Your number of classes
    input_window_samples=1251,  # Your time samples (updated from 1001 to 1251 based on X.shape)
    F1=8,  # First filter parameter
    D=2,  # Depth multiplier
    F2=16,  # F1 * D
)

# Wrap in skorch for sklearn compatibility
net = NeuralNetClassifier(
    model,
    criterion=nn.CrossEntropyLoss(),
    optimizer=torch.optim.Adam,
    lr=0.001,
    batch_size=32,
    max_epochs=100,
)

# Use with your correctly shaped data
net.fit(X_tensor, y_target)
y_pred = net.predict(X_tensor)  # Use X_tensor for prediction as well

torch.Size([100, 8, 1251])




AssertionError: 

In [None]:
decoded_y_pred = event_mapping_decoder(y_pred)
correct = (y == decoded_y_pred).sum()
print(correct / len(y))
# print((y == decoded_y_pred_mset).sum())

0.1099476439790576


In [12]:
import torch
import torch.nn as nn
from skorch import NeuralNetClassifier
from huggingface_hub import hf_hub_download
import pickle # Still needed for kwargs.pkl, as it's likely a standard pickle file

# --- Your existing data loading and preprocessing ---
# (Keep this as is)
import numpy as np
X = np.random.rand(100, 8, 1251) # Example: 100 samples, 8 channels, 1251 timepoints
y_encoded = np.random.randint(0, 4, 100) # Example: 100 labels for 4 classes

X_tensor = torch.tensor(X, dtype=torch.float32)
mue = X_tensor.mean(dim=(0, 2), keepdim=True)
std = (X_tensor.std(dim=(0, 2), keepdim=True) + 1e-8)
X_tensor = (X_tensor - mue) / std
X_tensor = X_tensor.unsqueeze(3)
y_target = torch.tensor(y_encoded, dtype=torch.int64)

# --- Hugging Face Model Loading ---
# Download the model architecture and parameters
path_kwargs = hf_hub_download(
    repo_id='PierreGtch/EEGNetv4',
    filename='EEGNetv4_Lee2019_MI/kwargs.pkl',
)
path_params = hf_hub_download(
    repo_id='PierreGtch/EEGNetv4',
    filename='EEGNetv4_Lee2019_MI/model-params.pkl',
)

with open(path_kwargs, 'rb') as f:
    kwargs = pickle.load(f) # kwargs.pkl is likely a standard pickle file

module_cls = kwargs['module_cls']
module_kwargs = kwargs['module_kwargs']

print(f"Pre-trained model kwargs: {module_kwargs}")

torch_module = module_cls(**module_kwargs)

# --- CHANGE HERE: Use torch.load() instead of pickle.load() for model-params.pkl ---
# The map_location='cpu' is important if you're not using a GPU,
# or if the model was trained on GPU but you want to load it on CPU.
pretrained_state_dict = torch.load(path_params, map_location='cpu')

torch_module.load_state_dict(pretrained_state_dict)

print("Pre-trained EEGNetv4 model loaded successfully!")

# --- Wrap in skorch for sklearn compatibility ---
net = NeuralNetClassifier(
    torch_module,
    criterion=nn.CrossEntropyLoss(),
    optimizer=torch.optim.Adam,
    lr=0.001,
    batch_size=32,
    max_epochs=100,
)

print("Starting training with pre-trained model...")
print(X_tensor.shape, y_target.shape)
dummy_x = torch.rand(100, 3, 385, 1)
dummy_y = torch.zeros(100, dtype=torch.int64)

net.fit(dummy_x, dummy_y)
y_pred = net.predict(X_tensor)

print("Training complete.")
print(f"Predictions shape: {y_pred.shape}")

Pre-trained model kwargs: {'in_chans': 3, 'n_classes': 2, 'input_window_samples': 385, 'drop_prob': 0.25}
Pre-trained EEGNetv4 model loaded successfully!
Starting training with pre-trained model...
torch.Size([100, 8, 1251, 1]) torch.Size([100])
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.6651[0m       [32m1.0000[0m        [35m0.5189[0m  0.0655
      2        [36m0.5948[0m       1.0000        [35m0.4511[0m  0.0410
      3        [36m0.4666[0m       1.0000        [35m0.3953[0m  0.0421




      4        [36m0.3556[0m       1.0000        [35m0.3434[0m  0.0386
      5        [36m0.2536[0m       1.0000        [35m0.2932[0m  0.0426
      6        [36m0.2052[0m       1.0000        [35m0.2444[0m  0.0336
      7        [36m0.1246[0m       1.0000        [35m0.1994[0m  0.0439
      8        [36m0.0795[0m       1.0000        [35m0.1643[0m  0.0363
      9        [36m0.0575[0m       1.0000        [35m0.1383[0m  0.0404
     10        [36m0.0317[0m       1.0000        [35m0.1197[0m  0.0369
     11        [36m0.0262[0m       1.0000        [35m0.1053[0m  0.0354
     12        [36m0.0143[0m       1.0000        [35m0.0943[0m  0.0368
     13        [36m0.0130[0m       1.0000        [35m0.0854[0m  0.0379
     14        [36m0.0091[0m       1.0000        [35m0.0783[0m  0.0371
     15        [36m0.0088[0m       1.0000        [35m0.0727[0m  0.0372
     16        [36m0.0065[0m       1.0000        [35m0.0683[0m  0.0353
     17        [36m0.005

AssertionError: 

In [8]:
torch_module

EEGNetv4(
  (ensuredims): Ensure4d()
  (dimshuffle): Rearrange('batch ch t 1 -> batch 1 ch t')
  (conv_temporal): Conv2d(1, 8, kernel_size=(1, 64), stride=(1, 1), padding=(0, 32), bias=False)
  (bnorm_temporal): BatchNorm2d(8, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
  (conv_spatial): Conv2dWithConstraint(8, 16, kernel_size=(3, 1), stride=(1, 1), groups=8, bias=False)
  (bnorm_1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
  (elu_1): Expression(expression=elu) 
  (pool_1): AvgPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0)
  (drop_1): Dropout(p=0.25, inplace=False)
  (conv_separable_depth): Conv2d(16, 16, kernel_size=(1, 16), stride=(1, 1), padding=(0, 8), groups=16, bias=False)
  (conv_separable_point): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bnorm_2): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
  (elu_2): Expression(expression=elu) 
  (pool_2): AvgPool2d(ke