In [121]:
import braindecode

In [122]:
import mne
from scipy.io import loadmat
import scipy
import sklearn
import numpy as np
import pandas as pd
import glob
from mne.decoding import CSP
import os

In [123]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, LeaveOneGroupOut, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

In [124]:
import warnings
warnings.filterwarnings('ignore') # to ignore warnings

In [125]:
verbose = False                    # global variable to suppress output display of MNE functions
mne.set_log_level(verbose=verbose) # to suppress large info outputs

In [126]:
n_jobs = None  # for multicore parallel processing, set it to 1 if cause memory issues, for full utilization set to -1

## Data Loading and Conversion to MNE Datatypes
[Mike Cohen Tutorials link for EEG Preprocessing](https://www.youtube.com/watch?v=uWB5tjhataY&list=PLn0OLiymPak2gDD-VDA90w9_iGDgOOb2o)

In [127]:
current_folder = globals()['_dh'][0]  # a hack to get path of current folder in which jupyter file is located
data_path = os.path.join(current_folder, r'C:\Users\User\Documents\GitHub\Frequency-Adaptive-Temporal-Kernel-EEGNet\Data')

In [128]:
training_files   = glob.glob(data_path + '/*T.mat')
len(training_files)     # if  return zero,then no file is loaded

10

## Lets Append Epochs

In [129]:
def get_mne_epochs_complete(files_paths, verbose=verbose, t_start=2, fs=512, mode='train'):
    '''
    similar to get_mne_epochs, just appends data from all relevant files together to give a single
    epoch object
    '''
    eeg_data = []
    for filepath in files_paths:
        mat_data = loadmat(filepath)
        eeg_data.extend(mat_data['RawEEGData'])

    idx_start = fs*t_start      # fs*ts
    eeg_data = np.array(eeg_data)
    eeg_data = eeg_data[:, :, idx_start:]
    event_id = {'left-hand': 0, 'right-hand': 1} # pytorch expects labels in [0, n_classes-1]
    channel_names = ['F3', 'FC3', 'C3', 'CP3', 'P3', 'FCz', 'CPz', 'F4', 'FC4', 'C4', 'CP4', 'P4']
    info = mne.create_info(ch_names=channel_names, sfreq=fs, ch_types='eeg')
    epochs = mne.EpochsArray(eeg_data, info, verbose=verbose, tmin=t_start-3.0)
    epochs.set_montage('standard_1020')
    epochs.filter(1., None) # required be ICA, (7-30 Hz) later
    epochs.apply_baseline(baseline=(-.250, 0)) # linear baseline correction
    
    if mode == 'train': # this in only applicable for training data
        labels = []
        for filepath in files_paths:
            mat_data = loadmat(filepath)
            labels.extend(mat_data['Labels'].ravel() - 1)
        epochs.event_id = event_id
        epochs.events[:,2] = labels    
    return epochs 

### Data Loading with Band Pass Filtering

In [130]:
# loading relevant files
training_epochs_all = get_mne_epochs_complete(training_files).filter(7,32) # for all training subjects

In [131]:
epochs = training_epochs_all.copy()
data, labels = epochs.get_data(), epochs.events[:,-1]
print('Shape of EEG Data: ', data.shape, '\t Shape of Labels: ', labels.shape) 

Shape of EEG Data:  (800, 12, 3072) 	 Shape of Labels:  (800,)


## Deep Learning with Braindecode 

### It's Training Time with [0.5, 4.5] sec and 2sec window with 1 sec stride (using leave one group out cv)

In [132]:
epochs = training_epochs_all.copy()
epochs = epochs.crop(tmin=0.5, tmax=4.5, include_tmax=False)

In [133]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.125 sec stride
window_size = 1024 #1024 #1024 #50 # 3072
window_stride = 512 #512 #256 # 50

windows_datasets = create_from_mne_epochs(
            [epochs], # expects list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
)

In [134]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

windows_datasets.update_description = pd.DataFrame(data=get_windows_datasets_labels(windows_datasets), 
                                           columns=['labels'])

In [135]:
print("Total Windows in a whole Dataset: ", len(windows_datasets.update_description))

Total Windows in a whole Dataset:  2400


In [136]:
from braindecode.preprocessing import exponential_moving_standardize
from braindecode.preprocessing import Preprocessor, preprocess

low_cut_hz = 8.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

# FIXED: Function receives numpy array, not epochs object
def custom_exp_moving_std_fn(data, factor_new=factor_new, init_block_size=init_block_size):
    """
    Apply exponential moving standardization to each trial.
    
    Parameters
    ----------
    data : np.ndarray
        Shape (n_epochs, n_channels, n_times) or (n_channels, n_times)
    """
    # Handle both 2D and 3D arrays
    if data.ndim == 3:
        # For windowed data: (n_epochs, n_channels, n_times)
        for i in range(len(data)):
            data[i] = exponential_moving_standardize(
                data[i], 
                factor_new=factor_new, 
                init_block_size=init_block_size
            )
    else:
        # For continuous data: (n_channels, n_times)
        data = exponential_moving_standardize(
            data, 
            factor_new=factor_new, 
            init_block_size=init_block_size
        )
    return data

preprocessors = [
    # keep only EEG sensors
    Preprocessor(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    Preprocessor(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    Preprocessor(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

# Apply preprocessing
preprocess(windows_datasets, preprocessors)

<braindecode.datasets.base.BaseConcatDataset at 0x1c072a28500>

In [137]:
# preprocess(windows_datasets, preprocessors)

In [138]:
batch_size = 32 #64
n_epochs = 25 #25 #25 #25 #25 #20 #25 use few epochs for quick verification

In [139]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes = 2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets[0][0].shape[0]
input_window_samples = windows_datasets[0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    n_times=input_window_samples,  # Changed from input_window_samples= to n_times=
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [140]:
cv = LeaveOneGroupOut()
# group parameter for leave one group out cross validation in sklearn, each subject is given unique identifier
group_list = []
for subject in np.linspace(1,8,8):
    group_list.extend([subject for _ in range(len(windows_datasets)//8)]) #since total 8 subjects
groups = np.array(group_list)

In [144]:
# Training time
import skorch
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02  # 0.01 
weight_decay = 0.5 * 0.001

# Option 1: Simple validation split (recommended for most cases)
clf = EEGClassifier(
    model,
    criterion=torch.nn.CrossEntropyLoss(),
    optimizer=torch.optim.AdamW,
    train_split=skorch.dataset.ValidSplit(cv=5),  # 5-fold validation split
    optimizer__lr=lr,
    optimizer__weight_decay=weight_decay,
    batch_size=batch_size,
    callbacks=[
        "accuracy", 
        ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ],
    device=device,
)

In [148]:
def training_function(windows_datasets, n_epochs=25):
    print('\n', '#'*25, 'Cross Subject Training:', '#'*25, '\n')
    dataset = windows_datasets
    
    # Extract labels from the dataset
    # Braindecode datasets store targets in the dataset objects themselves
    y = np.array([dataset[i][1] for i in range(len(dataset))])
    
    print(f"Found {len(np.unique(y))} classes: {np.unique(y)}")
    
    clf.fit(dataset, y=y, epochs=n_epochs)
    
    best_validation_acc = clf.callbacks_[4][1].best_score_  # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))
    
    return clf

# Call the function
training_function(windows_datasets, n_epochs=n_epochs)


 ######################### Cross Subject Training: ######################### 

Found 2 classes: [0 1]
  epoch    train_accuracy    train_loss    valid_acc    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  -----------  ----------------  ------------  ------  ------
      1            [36m0.5005[0m        [32m0.7266[0m       [35m0.5000[0m            [31m0.5000[0m        [94m3.1844[0m  0.0200  1.8979
      2            [36m0.7547[0m        [32m0.5908[0m       [35m0.7688[0m            [31m0.7688[0m        [94m0.5207[0m  0.0199  1.7886
      3            0.7354        [32m0.5639[0m       0.7063            0.7063        0.7900  0.0197  1.8100
      4            0.5156        0.5754       0.5104            0.5104        2.0035  0.0192  1.7825
      5            [36m0.7562[0m        [32m0.5269[0m       0.7583            0.7583        [94m0.4736[0m  0.0187  1.7215
      6            0.7307        [32m0.5201[0m       0.6813   