### Clinical BCI Challenge-WCCI2020
- [website link](https://sites.google.com/view/bci-comp-wcci/?fbclid=IwAR37WLQ_xNd5qsZvktZCT8XJerHhmVb_bU5HDu69CnO85DE3iF0fs57vQ6M)


 - [Dataset Link](https://github.com/5anirban9/Clinical-Brain-Computer-Interfaces-Challenge-WCCI-2020-Glasgow)
 

In [1]:
import mne
from scipy.io import loadmat
import scipy
import sklearn
import numpy as np
import pandas as pd
import glob
from mne.decoding import CSP
import os

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.pipeline import make_pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

In [18]:
import warnings
warnings.filterwarnings('ignore') # to ignore warnings

In [4]:
verbose = False # to universally just change it to true/false for different output display
mne.set_log_level(verbose=verbose) # to suppress large info outputs

In [5]:
# using kappa as evaluation metric
kappa = sklearn.metrics.make_scorer(sklearn.metrics.cohen_kappa_score) # kappa scorer
acc = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score)      # accuracy scorer
scorer = kappa          # just assign another scorer to replace kappa scorer

In [6]:
n_jobs = None  # for multicore parallel processing, set it to 1 if cause memory issues, for full utilization set to -1

## Data Loading and Conversion to MNE Datatypes
[Mike Cohen Tutorials link for EEG Preprocessing](https://www.youtube.com/watch?v=uWB5tjhataY&list=PLn0OLiymPak2gDD-VDA90w9_iGDgOOb2o)

In [7]:
current_folder = globals()['_dh'][0]  # a hack to get path of current folder in which juptyter file is located
data_path = os.path.join(current_folder, 'Data')

In [8]:
all_files        = glob.glob(data_path + '/*.mat')
training_files   = glob.glob(data_path + '/*T.mat')
evaluation_files = glob.glob(data_path + '/*E.mat')
len(all_files), len(training_files), len(evaluation_files)     # if these return zero,then no file is loaded

(18, 8, 10)

In [9]:
def get_mne_epochs(filepath, verbose=verbose, t_start=2, fs=512, mode='train'):
    '''
    This function reads the EEG data from .mat file and convert it to MNE-Python Compatible epochs
    data structure. It takes data from [0, 8] sec range and return it by setting t = 0 at cue onset
    i.e. 3 seconds and dropping first two seconds so the output data is in [-1.0, 5.0] sec range. The
    Details can be found in the preprocessing section of the attached document
    '''
    mat_data = loadmat(filepath) # read .mat file
    eeg_data= mat_data['RawEEGData']
    idx_start = fs*t_start      
    eeg_data = eeg_data[:, :, idx_start:]
    event_id = {'left-hand': 1, 'right-hand': 2}
    channel_names = ['F3', 'FC3', 'C3', 'CP3', 'P3', 'FCz', 'CPz', 'F4', 'FC4', 'C4', 'CP4', 'P4']
    info = mne.create_info(ch_names=channel_names, sfreq=fs, ch_types='eeg')
    epochs = mne.EpochsArray(eeg_data, info, verbose=verbose, tmin=t_start-3.0)
    epochs.set_montage('standard_1020')
    epochs.filter(1., None) 
    epochs.apply_baseline(baseline=(-.250, 0)) # linear baseline correction
    
    if mode == 'train': # this in only applicable for training data
        epochs.event_id = event_id
        epochs.events[:,2] = mat_data['Labels'].ravel()    
    return epochs 

def get_labels(filepath):
    mat_data = loadmat(filepath) # read .mat file
    return mat_data['Labels'].ravel()

In [19]:
epochs, labels = get_mne_epochs(training_files[0], verbose=verbose), get_labels(training_files[0])
data = epochs.get_data()
print('Shape of EEG Data: ', data.shape, '\t Shape of Labels: ', labels.shape) 

Shape of EEG Data:  (80, 12, 3072) 	 Shape of Labels:  (80,)


### Training Data

In [11]:
# loading original data
epochs_list_train = []
for i in training_files:
    epochs_list_train.append(get_mne_epochs(i, verbose=verbose))

### Evaluation Data
first 8 for single subject and last 2 are for cross subject

In [12]:
epochs_list_eval = []
for i in evaluation_files:
    epochs_list_eval.append(get_mne_epochs(i, mode='test', verbose=verbose))

### Bandpass filtering of data

In [13]:
for epochs in epochs_list_train:
    epochs.filter(7.0, 32.0)
    
for epochs in epochs_list_eval:
    epochs.filter(7.0, 32.0)

## Lets try doing some classification

In [14]:
cv = StratifiedShuffleSplit(n_splits=5, random_state=0) 

In [15]:
epochs = epochs_list_train[3]
psds, freqs = mne.time_frequency.psd_multitaper(epochs, tmin=0.5, tmax=4.5, fmin=8, fmax=30 ,n_jobs=1)
psds = 10 * np.log10(psds) # to convert powers to DB
labels = epochs.events[:,-1]

In [16]:
x_trainVal, x_test, y_trainVal, y_test = train_test_split(psds, labels.ravel(), shuffle=True, stratify=labels, random_state=0)  to avoid confusing names and reusing x_trainVal
print('train set:  features: ', x_trainVal.shape, 'labels: ', y_trainVal.shape)
print('Test  set:  features: ', x_test.shape, 'labels: ', y_test.shape)
y_train = y_trainVal

train set:  features:  (60, 12, 88) labels:  (60,)
Test  set:  features:  (20, 12, 88) labels:  (20,)


In [17]:
# using all channels
trials, channels, eeg = x_trainVal.shape
x_train = x_trainVal.reshape(trials, channels*eeg) 
print('*'*10, 'Classification Scores Comparison with default Parameters' ,'*'*10)
print('#'*15, 'Using All Channels', '#'*15)
print('KNN           : ', np.mean(cross_val_score(make_pipeline(StandardScaler(),KNeighborsClassifier()), x_train, y_train, cv=cv, scoring=scorer)))
print('Log-Regression: ', np.mean(cross_val_score(make_pipeline(StandardScaler(),LogisticRegression(max_iter=1000)), x_train, y_train, cv=cv, scoring=scorer)))
print('Linear SVM    : ', np.mean(cross_val_score(make_pipeline(StandardScaler(),LinearSVC(random_state=0)), x_train, y_train, cv=cv, scoring=scorer)))
print('kernal SVM    : ', np.mean(cross_val_score(make_pipeline(StandardScaler(), SVC(gamma='scale')), x_train, y_train, cv=cv, scoring=scorer)))
print('LDA           : ', np.mean(cross_val_score(make_pipeline(StandardScaler(), lda()), x_train, y_train, cv=cv, scoring=scorer)))

********** Classification Scores Comparison with default Parameters **********
############### Using All Channels ###############
KNN           :  0.4666666666666667
Log-Regression:  0.8
Linear SVM    :  0.8
kernal SVM    :  0.6666666666666667
LDA           :  0.6000000000000001


## Grid Search
with [0.5, 4.5] seconds time interval and [8, 30] Hz freqs

In [106]:
cv = StratifiedShuffleSplit(10, random_state=0)

In [107]:
# for linear svm
param_grid_linear_svm =   { 'linearsvc__C' : np.logspace(-4, 2, 15)}

# lda, auto shrinkage performs pretty well mostly 
shrinkage = list(np.arange(0.1,1.01,0.1))
shrinkage.append('auto')
param_grid_lda = {'lineardiscriminantanalysis__shrinkage': shrinkage} 

In [108]:
grids_linear_svm_list = [GridSearchCV(make_pipeline(StandardScaler(), LinearSVC(random_state=0)), 
                               param_grid=param_grid_linear_svm, cv=cv, n_jobs=n_jobs, scoring=scorer)
                        for _ in range(len(training_files))]

grids_lda_list = [GridSearchCV(make_pipeline(StandardScaler(), lda(solver='eigen')), 
                        param_grid=param_grid_lda, cv=cv, n_jobs=n_jobs, scoring=scorer)
                 for _ in range(len(training_files))]

In [109]:
def training_function(subject_index=0):
    # this time training function trains on whole training set
    print('-'*25, 'Training for Subject:', subject_index+1, '-'*25)
    epochs = epochs_list_train[subject_index]
    psds, freqs = mne.time_frequency.psd_multitaper(epochs, tmin=0.5, tmax=4.5, fmin=8, fmax=30 ,n_jobs=1)
    psds = 10 * np.log10(psds)
    psds = psds.reshape(psds.shape[0], -1)
    labels = epochs.events[:,-1]

    grids_linear_svm_list[subject_index].fit(psds, labels)
    print('LinearSVM: Maximum Cross Validation Score = ', round(grids_linear_svm_list[subject_index].best_score_,3))
    grids_lda_list[subject_index].fit(psds, labels)
    print('LDA      : Maximum Cross Validation Score = ', round(grids_lda_list[subject_index].best_score_,3))
    print()

In [110]:
def evaluation_function(subject_index=0):  
    # prints the prediction counts for each class
    epochs = epochs_list_eval[subject_index]
    psds, freqs = mne.time_frequency.psd_multitaper(epochs, tmin=0.5, tmax=4.5, fmin=8, fmax=30 ,n_jobs=1)
    psds = 10 * np.log10(psds)
    psds = psds.reshape(psds.shape[0], -1)
    
    preds_linear_svm = grids_linear_svm_list[subject_index].predict(psds)
    preds_lda =  grids_lda_list[subject_index].predict(psds)
    print('-'*25, 'Predictions Counts Subject:', subject_index+1, '-'*25)
    print('Linear SVM: Class 1 =', sum(preds_linear_svm==1), 'Class 2 =', sum(preds_linear_svm==2))
    print('LDA       : Class 1 =', sum(preds_lda==1), 'Class 2 =', sum(preds_lda==2))
    print()

### It's Training Time

In [111]:
for subject in range(len(training_files)):
    training_function(subject)

------------------------- Training for Subject: 1 -------------------------
LinearSVM: Maximum Cross Validation Score =  0.65
LDA      : Maximum Cross Validation Score =  0.625

------------------------- Training for Subject: 2 -------------------------
LinearSVM: Maximum Cross Validation Score =  0.75
LDA      : Maximum Cross Validation Score =  0.75

------------------------- Training for Subject: 3 -------------------------
LinearSVM: Maximum Cross Validation Score =  0.675
LDA      : Maximum Cross Validation Score =  0.675

------------------------- Training for Subject: 4 -------------------------
LinearSVM: Maximum Cross Validation Score =  0.775
LDA      : Maximum Cross Validation Score =  0.775

------------------------- Training for Subject: 5 -------------------------
LinearSVM: Maximum Cross Validation Score =  0.425
LDA      : Maximum Cross Validation Score =  0.425

------------------------- Training for Subject: 6 -------------------------
LinearSVM: Maximum Cross Validat

In [112]:
for subject in range(len(training_files)):
    evaluation_function(subject)

------------------------- Predictions Counts Subject: 1 -------------------------
Linear SVM: Class 1 = 29 Class 2 = 11
LDA       : Class 1 = 29 Class 2 = 11

------------------------- Predictions Counts Subject: 2 -------------------------
Linear SVM: Class 1 = 39 Class 2 = 1
LDA       : Class 1 = 38 Class 2 = 2

------------------------- Predictions Counts Subject: 3 -------------------------
Linear SVM: Class 1 = 27 Class 2 = 13
LDA       : Class 1 = 29 Class 2 = 11

------------------------- Predictions Counts Subject: 4 -------------------------
Linear SVM: Class 1 = 32 Class 2 = 8
LDA       : Class 1 = 30 Class 2 = 10

------------------------- Predictions Counts Subject: 5 -------------------------
Linear SVM: Class 1 = 36 Class 2 = 4
LDA       : Class 1 = 37 Class 2 = 3

------------------------- Predictions Counts Subject: 6 -------------------------
Linear SVM: Class 1 = 18 Class 2 = 22
LDA       : Class 1 = 23 Class 2 = 17

------------------------- Predictions Counts Subjec

### Results
svm always better except the last subject so only last entry for lda and all others for svm in excel file