In [2]:
from moabb.datasets.base import BaseDataset
from moabb.paradigms import SSVEP
from moabb.pipelines import SSVEP_CCA
from moabb.evaluations import WithinSessionEvaluation
from sklearn.pipeline import Pipeline
import os
import pandas as pd
import mne
import numpy as np
from moabb.evaluations.splitters import WithinSessionSplitter
from sklearn.preprocessing import LabelEncoder
from moabb.pipelines import SSVEP_TRCA, SSVEP_MsetCCA
from moabb.pipelines.utils import create_pipeline_from_config
from pyriemann.estimation import Covariances
from pyriemann.tangentspace import TangentSpace
from sklearn.svm import SVC
from pyriemann.classification import MDM  
from sklearn.linear_model import LogisticRegression
from moabb.pipelines import SSVEP_MsetCCA
from moabb.datasets import Wang2016
from moabb.pipelines.features import StandardScaler_Epoch  

In [None]:
n_subjects = 30
n_sessions = 8
start_time = 1
end_time = 6
split = "train"

label_to_freq = {"Left": "10", "Right": "13", "Forward": "7", "Backward": "8"}
event_mapping = {"10": 0, "13": 1, "7": 2, "8": 3}
event_mapping_decoder = np.vectorize({0: "10", 1: "13", 2: "7", 3: "8"}.get)


class CompetitionDataset(BaseDataset):
    def __init__(self):
        super().__init__(
            subjects=list(range(1, n_subjects + 1)),
            sessions_per_subject=n_sessions,
            events=event_mapping,
            code="Competition",
            interval=[start_time, end_time],
            paradigm="ssvep",
        )

        self.base_path = "./data/mtcaic3/SSVEP"
        self.metadata_path = os.path.join('./data/mtcaic3', f"{split}.csv")

    def data_path(self, subject, path=None, force_update=False, update_path=None, verbose=None):  # type: ignore
        """Return list of CSV file paths for this subject."""
        subject_dir = os.path.join(self.base_path, split, f"S{int(subject)}")
        csv_files = []

        # Collect all 8 session files for this subject
        for session in range(1, n_sessions + 1):
            csv_file = os.path.join(subject_dir, str(session), "EEGdata.csv")
            if os.path.exists(csv_file):
                csv_files.append(csv_file)
            else:
                print(f"Warning: {csv_file} does not exist for subject {subject}, session {session}")

        return csv_files

    def _get_single_subject_data(self, subject):  # type: ignore
        """Load and process data for a single subject."""
        csv_files = self.data_path(subject)
        sessions = {}

        for session_idx, csv_file in enumerate(csv_files):
            # Load CSV data
            # todo neglect invalid cols
            eeg_columns = ["FZ", "C3", "CZ", "C4", "PZ", "PO7", "OZ", "PO8"]
            df = pd.read_csv(csv_file, usecols=eeg_columns + ["Validation"])

            mask = df['Validation'] == 1
            eeg_data = df[eeg_columns].values.T  # Shape: (n_channels, n_timepoints)

            # Create channel info
            ch_names = eeg_columns + ["stim"]
            ch_types = ["eeg"] * len(eeg_columns) + ["stim"]
            sfreq = 250  # Your sampling frequency

            info = mne.create_info(ch_names=ch_names, ch_types=ch_types, sfreq=sfreq)  # type: ignore

            # Create stimulus channel from event labels
            # Assuming you have 10 trials of 1750 samples each
            stim_data = np.zeros(len(df))

            for trial in range(10):
                trial_start = trial * 1750
                if trial_start < len(df):
                    trial_label = self._get_trial_label(subject, session_idx + 1, trial + 1)
                    stim_data[trial_start] = event_mapping.get(trial_label)

            # Combine EEG and stimulus data
            eeg_data = eeg_data[:, mask]
            full_data = np.vstack([eeg_data, stim_data[np.newaxis, :]])

            # Create Raw object
            raw = mne.io.RawArray(data=full_data, info=info, verbose=False)

            # Store in sessions dictionary
            session_name = str(session_idx)
            if session_name not in sessions:
                sessions[session_name] = {}
            sessions[session_name]["0"] = raw  # Single run per session

        return sessions

    def _get_trial_label(self, subject_id, session_id, trial_idx):
        """Extract the event label for a specific trial."""
        metadata_df = self._load_metadata()

        trial_number = trial_idx
        subject_str = f"S{subject_id}"

        # Filter the metadata for this specific trial
        trial_row = metadata_df[
            (metadata_df["subject_id"] == subject_str) & (metadata_df["trial_session"] == session_id) & (metadata_df["trial"] == trial_number) & (metadata_df["task"] == "SSVEP")  # Extra safety filter
        ]
        
        label = trial_row.iloc[0]["label"]
        return label_to_freq.get(label)

    def _load_metadata(self):
        """Load the metadata CSV file once and cache it."""
        if not hasattr(self, "_metadata_df"):
            self._metadata_df = pd.read_csv(self.metadata_path)
            self._metadata_df = self._metadata_df[self._metadata_df["task"] == "SSVEP"]

        return self._metadata_df


dataset = CompetitionDataset()
dataset._get_single_subject_data(1)

The dataset class name 'CompetitionDataset' must be an abbreviation of its code 'Competition'. See moabb.datasets.base.is_abbrev for more information.


{'0': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '1': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '2': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '3': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '4': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '5': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '6': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>},
 '7': {'0': <RawArray | 9 x 17500 (70.0 s), ~1.2 MiB, data loaded>}}

In [54]:
paradigm = SSVEP(n_classes=4, tmin=start_time, tmax=end_time)

# # Get the data
X, y, metadata = paradigm.get_data(dataset, subjects=list(range(1, 5)))  # , subjects=[1, 2])
freqs = paradigm.used_events(dataset)
interval = [paradigm.tmin, paradigm.tmax]
# Create label encoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

Choosing the first 4 classes from all possible events
 '10': 0
 '13': 0
 '7': 1
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 0
 '7': 1
 '8': 4>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 1>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 1
 '7': 1
 '8': 5>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 0
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7': 1
 '8': 1>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 2
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 0
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 1
 '7': 1
 '8': 4>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 5
 '7': 2
 '8': 0>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 3
 '7': 1
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 0
 '8': 2>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 2
 '7': 2
 '8': 3>
  warn(f"warnEpochs {epochs}")
 '10': 0
 '13': 4
 '7'

In [None]:
# MDM 10% 5 Subject
ssvep_mdm = Pipeline([  
    ('covariances', Covariances(estimator='lwf')),  # Covariance estimation  
    ('classifier', MDM())                           # Minimum Distance to Mean classifier  
])  

ssvep_mdm.fit(X, y_encoded)  
y_pred = ssvep_mdm.predict(X)



In [None]:
# FilterBank + CSP + SVM 23% 5 subjects
from pyriemann.estimation import Covariances  
from pyriemann.spatialfilters import CSP  
from sklearn.svm import SVC  
from sklearn.pipeline import Pipeline  
  
filterbank_csp_svm = Pipeline([  
    ('covariances', Covariances(estimator='lwf')),  
    ('csp', CSP(nfilter=4)),  
    ('classifier', SVC(kernel='rbf', C=0.05))  
])  
filterbank_csp_svm.fit(X, y_encoded)  
y_pred = filterbank_csp_svm.predict(X)

In [None]:
# FilterBank + CSP + LDA  23% (5 Subjects)
from pyriemann.estimation import Covariances  
from pyriemann.spatialfilters import CSP  
from sklearn.svm import SVC  
from sklearn.pipeline import Pipeline  

filterbank_csp_svm = Pipeline([  
    ('covariances', Covariances(estimator='lwf')),  
    ('csp', CSP(nfilter=4)),  
    ('classifier', SVC(kernel='rbf', C=0.01))  
])  
filterbank_csp_svm.fit(X, y_encoded)  
y_pred = filterbank_csp_svm.predict(X)

In [None]:
# TRCA 21% 5 Subjects
trca_clf = SSVEP_TRCA(
    interval=interval,  # Use the same interval as the paradigm
    freqs=freqs,       # Use the frequencies from the paradigm
    downsample=1,
    is_ensemble=True,
    method="original",
    estimator="scm"
)

trca_clf.fit(X, y_encoded)
y_pred = trca_clf.predict(X)

In [None]:
# TS+SVM 23.0% (5 Subjects)
ssvep_ts_svm = Pipeline(
    [
        ("covariances", Covariances(estimator="lwf")),  # Covariance estimation
        ("tangentspace", TangentSpace()),  # Tangent Space projection
        ("classifier", SVC(kernel="rbf", C=0.05)),  # Support Vector Machine precomputed', 'poly', 'sigmoid', 'linear', 'rbf'} 
    ]
)

# Use the same encoded labels as you did with TRCA
ssvep_ts_svm.fit(X, y_encoded)
y_pred = ssvep_ts_svm.predict(X)



In [None]:
# TS+LR 11.5% 5 Subjs
ssvep_ts_lr = Pipeline([  
    ('covariances', Covariances(estimator='lwf')),  # Covariance estimation  
    ('tangentspace', TangentSpace()),               # Tangent Space projection  
    ('classifier', LogisticRegression(C=0.05))            # Logistic Regression  
])
ssvep_ts_lr.fit(X, y_encoded)
y_pred = ssvep_ts_lr.predict(X)



In [None]:
# CCA 62.2%
cca_clf = Pipeline([    
    ('standardize', StandardScaler_Epoch()),  
    ("classifier", SSVEP_CCA(interval=interval, freqs=freqs, n_harmonics=4)),
    ])

# custom_splitter = WithinSessionSplitter(n_folds=2)
evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=[dataset], cv=custom_splitter)
# pipeline = {"SSVEP_CCA": Pipeline([("classifier", cca_clf)])}

# results = evaluation.process(pipeline)

cca_clf.fit(X, y_encoded)
y_pred = cca_clf.predict(X)

In [64]:
import numpy as np
import torch
import torch.nn as nn
from braindecode.models import EEGNetv4
from skorch import NeuralNetClassifier
from sklearn.preprocessing import LabelEncoder  # Assuming you have this from previous steps

# Assuming X and y_encoded are already available from your MOABB data loading
# X.shape: ((191, 8, 1251), y_encoded.shape: (191,))

# Convert X to float32 tensor
X_tensor = torch.tensor(X, dtype=torch.float32)
print(X_tensor.shape)

# Ensure y_encoded is a 1D array of integers (already done by LabelEncoder, but good to confirm)
# No need for y_reshaped = np.repeat(y_encoded[:, None], 9, axis=1)
y_target = torch.tensor(y_encoded, dtype=torch.int64)  # Use int64 for PyTorch labels

# Create EEGNet model
model = EEGNetv4(
    n_chans=8,  # Your number of channels
    n_classes=4,  # Your number of classes
    input_window_samples=1251,  # Your time samples (updated from 1001 to 1251 based on X.shape)
    F1=8,  # First filter parameter
    D=2,  # Depth multiplier
    F2=16,  # F1 * D
)

# Wrap in skorch for sklearn compatibility
net = NeuralNetClassifier(
    model,
    criterion=nn.CrossEntropyLoss(),
    optimizer=torch.optim.Adam,
    lr=0.001,
    batch_size=32,
    max_epochs=100,
)

# Use with your correctly shaped data
net.fit(X_tensor, y_target)
y_pred = net.predict(X_tensor)  # Use X_tensor for prediction as well

torch.Size([191, 8, 1251])




  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.4145[0m       [32m0.3846[0m        [35m1.3612[0m  0.4601
      2        [36m1.2623[0m       0.3590        1.3739  0.3223
      3        [36m1.2198[0m       0.3590        1.3883  0.2648
      4        [36m1.1475[0m       0.3333        1.3954  0.3316
      5        [36m1.0899[0m       0.3077        1.3960  0.2990
      6        [36m1.0789[0m       0.3333        1.3837  0.2708
      7        [36m1.0586[0m       0.3590        1.3631  0.2807
      8        [36m1.0268[0m       0.3846        [35m1.3398[0m  0.2673
      9        1.0290       0.3846        [35m1.3224[0m  0.2549
     10        [36m0.9957[0m       0.3846        [35m1.3051[0m  0.2676
     11        1.0311       0.3846        [35m1.2861[0m  0.2670
     12        [36m0.9951[0m       0.3846        [35m1.2685[0m  0.2567
     13        0.9994       0.3846        [35m1.

In [6]:
decoded_y_pred = event_mapping_decoder(y_pred)
correct = (y == decoded_y_pred).sum()

print(correct / len(y))
# print((y == decoded_y_pred_mset).sum())

0.1099476439790576
