# In case, the file import data from Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/MyDrive/Colab\ Notebooks
# !ls

/content/drive/MyDrive/Colab Notebooks


In [None]:
# Install libraries
!pip install mne
!pip install pyriemann
!pip install MOABB
!pip install  scipy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mne
  Downloading mne-1.1.1-py3-none-any.whl (7.5 MB)
[K     |████████████████████████████████| 7.5 MB 3.9 MB/s 
Installing collected packages: mne
Successfully installed mne-1.1.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyriemann
  Downloading pyriemann-0.3.tar.gz (365 kB)
[K     |████████████████████████████████| 365 kB 4.0 MB/s 
Building wheels for collected packages: pyriemann
  Building wheel for pyriemann (setup.py) ... [?25l[?25hdone
  Created wheel for pyriemann: filename=pyriemann-0.3-py2.py3-none-any.whl size=78033 sha256=1c467869f1e6ff6691b1373aca346fd7fa1ff2dd54670a359a9ffc06017d1c04
  Stored in directory: /root/.cache/pip/wheels/0b/1b/bf/a537f9e17e6c3490004ede419c72f863af1d0d765d25e532ef
Successfully built pyriemann
Installing collected packages: pyriemann
Successfully installed pyriemann-0.3

# Import libraries and read files

In [None]:
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mne
import seaborn as sns
from mne.decoding import CSP
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

import moabb
from moabb.datasets import BNCI2014004, Shin2017A
from moabb.evaluations import WithinSessionEvaluation, CrossSubjectEvaluation
from moabb.paradigms import LeftRightImagery


moabb.set_log_level("info")
mne.set_log_level("CRITICAL")
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io
import mne
from mne import find_events, Epochs, pick_types, read_evokeds
from mne.preprocessing import ICA

import pywt
import scipy
from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs,
                               corrmap)
from sklearn.neighbors import KDTree

import seaborn as sns

import os
import re

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [None]:
from collections import OrderedDict

# generic import
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

# mne import
from mne import Epochs, pick_types, events_from_annotations
from mne.io import concatenate_raws
from mne.io.edf import read_raw_edf
from mne.datasets import eegbci
from mne.decoding import CSP

# pyriemann import
from pyriemann.classification import MDM, TSclassifier
from pyriemann.estimation import Covariances

# sklearn imports
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import classification_report

In [None]:
n_event_ids = 20
selected_data_len = 2000

# 60 events/ 1 run/ 3 sessions/ 1 subject
batch_size = 10
n_subjs = 20
events_per_run = 20
runs_per_session = 1
sessions_for_train = 2
sessions_for_eval = 1
events_per_subj_for_train = int(events_per_run * runs_per_session * sessions_for_train)
events_per_subj_for_eval = int(events_per_run * runs_per_session * sessions_for_eval)
train_split = int(events_per_subj_for_train * 0.8)

list_subjs_names = [str(f"Subject {i+1}") if i < n_subjs else str(f"All Subjects")  for i in range(n_subjs+1)]
# list_subjs_names

## Extract EEG from npy files

In [None]:
EOG_ref = "EEGANet"
ECG_ref = None

# LOAD EEG DATA
train_eeg = np.load(f'Datasets/Shin2017A/npy_files/train_eeg_{EOG_ref}-{ECG_ref}.npy')
eval_eeg = np.load(f'Datasets/Shin2017A/npy_files/eval_eeg_{EOG_ref}-{ECG_ref}.npy')
# LOAD EVENT
train_events = np.load(f'Datasets/Shin2017A/npy_files/train_events.npy')
eval_events = np.load(f'Datasets/Shin2017A/npy_files/eval_events.npy')

# Classification

## Classification - Train-Test

- Finding a model's best combination of hyperparameters for CSP + LDA and CSP + SVM
    - Tuning models by training and validaiton set

In [None]:
list_epochs_modeling = [train_eeg,
                        eval_eeg]

print(train_events[:,-1].shape)
print(train_events[:,-1][:10])
print(eval_events[:,-1].shape)
print(eval_events[:,-1][:10])

for e_type in list_epochs_modeling:
    # print(e_type.get_data().shape)
    print(e_type.shape)
    print(np.max(e_type))
    print(np.min(e_type))
    print('-'*100)

# print(np.all(train_cont_eeg == train_denoised_eeg))

(800,)
[1 2 1 2 1 2 1 2 2 1]
(400,)
[1 2 2 1 2 1 1 2 1 2]
(800, 30, 2000)
0.8101441264152527
-0.8276818990707397
----------------------------------------------------------------------------------------------------
(400, 30, 2000)
0.8605532646179199
-0.744243860244751
----------------------------------------------------------------------------------------------------


In [None]:
X = train_eeg.copy()
y = train_events[:, -1]

print(X.shape, y.shape)

# Create train and test sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_test, y_test = eval_eeg.copy(), eval_events[:,-1]

print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(800, 30, 2000) (800,)
(640, 30, 2000) (160, 30, 2000) (400, 30, 2000)
(640,) (160,) (400,)


In [None]:
# Setup the pipeline
# steps = [('scaler', StandardScaler()),
#          ('CSP', csp),
#          ('SVM', SVC())]

steps = [('CSP', CSP()),
         ('SVM', SVC())]

pipeline = Pipeline(steps)

# Specify the hyperparameter space
parameters = {"CSP__n_components": [i for i in range(4,11)],
              "CSP__reg": ["ledoit_wolf"],
              "CSP__log": [True, False],
              "SVM__decision_function_shape": ["ovr"],
              "SVM__C":[1, 10, 100],
              "SVM__gamma":[0.1, 0.01]}

# Instantiate the GridSearchCV object: cv
cv = GridSearchCV(pipeline, parameters, cv=5)

# Fit to the training set
cv.fit(X_train, y_train)

# Predict the labels of the test set: y_pred
y_pred = cv.predict(X_val)

# Compute and print metrics
print("Accuracy: {}".format(cv.score(X_val, y_val)))
print(classification_report(y_val, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))


Accuracy: 0.5875
              precision    recall  f1-score   support

           1       0.59      0.58      0.59        81
           2       0.58      0.59      0.59        79

    accuracy                           0.59       160
   macro avg       0.59      0.59      0.59       160
weighted avg       0.59      0.59      0.59       160

Tuned Model Parameters: {'CSP__log': True, 'CSP__n_components': 10, 'CSP__reg': 'ledoit_wolf', 'SVM__C': 100, 'SVM__decision_function_shape': 'ovr', 'SVM__gamma': 0.1}


In [None]:
best_svm_hyperparameters = cv.best_params_
best_svm = cv.best_estimator_

print(best_svm_hyperparameters)
print(best_svm)

# Predict the labels of the test set: y_pred
y_pred = cv.predict(X_test)

# Compute and print metrics
print("Accuracy: {}".format(cv.score(X_test, y_test)))
print(classification_report(y_test, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))

{'CSP__log': True, 'CSP__n_components': 10, 'CSP__reg': 'ledoit_wolf', 'SVM__C': 100, 'SVM__decision_function_shape': 'ovr', 'SVM__gamma': 0.1}
{'component_order': 'mutual_info',
 'cov_est': 'concat',
 'cov_method_params': None,
 'log': True,
 'n_components': 10,
 'norm_trace': False,
 'rank': None,
 'reg': 'ledoit_wolf',
 'transform_into': 'average_power'}
Pipeline(steps=[('CSP', CSP(None)), ('SVM', SVC(C=100, gamma=0.1))])
Accuracy: 0.5
              precision    recall  f1-score   support

           1       0.50      0.10      0.17       200
           2       0.50      0.90      0.64       200

    accuracy                           0.50       400
   macro avg       0.50      0.50      0.40       400
weighted avg       0.50      0.50      0.40       400

Tuned Model Parameters: {'CSP__log': True, 'CSP__n_components': 10, 'CSP__reg': 'ledoit_wolf', 'SVM__C': 100, 'SVM__decision_function_shape': 'ovr', 'SVM__gamma': 0.1}


In [None]:
best_svm

{'component_order': 'mutual_info',
 'cov_est': 'concat',
 'cov_method_params': None,
 'log': True,
 'n_components': 10,
 'norm_trace': False,
 'rank': None,
 'reg': 'ledoit_wolf',
 'transform_into': 'average_power'}


Pipeline(steps=[('CSP', CSP(None)), ('SVM', SVC(C=100, gamma=0.1))])

In [None]:
# Setup the pipeline
# steps = [('scaler', StandardScaler()),
#          ('CSP', csp),
#          ('LDA', SVC())]

steps = [('CSP', CSP()),
         ('LDA', LDA())]

pipeline = Pipeline(steps)

# Specify the hyperparameter space
parameters = {"CSP__n_components": [i for i in range(4,11)],
              "CSP__reg": ["ledoit_wolf"],
              "CSP__log": [True, False]}

# Instantiate the GridSearchCV object: cv
cv = GridSearchCV(pipeline, parameters, cv=5)

# Fit to the training set
cv.fit(X_train, y_train)

# Predict the labels of the test set: y_pred
y_pred = cv.predict(X_val)

# Compute and print metrics
print("Accuracy: {}".format(cv.score(X_val, y_val)))
print(classification_report(y_val, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))

Accuracy: 0.55
              precision    recall  f1-score   support

           1       0.55      0.60      0.58        81
           2       0.55      0.49      0.52        79

    accuracy                           0.55       160
   macro avg       0.55      0.55      0.55       160
weighted avg       0.55      0.55      0.55       160

Tuned Model Parameters: {'CSP__log': True, 'CSP__n_components': 9, 'CSP__reg': 'ledoit_wolf'}


In [None]:
best_lda_hyperparameters = cv.best_params_
best_lda = cv.best_estimator_

print(best_lda_hyperparameters)
print(best_lda)

# Predict the labels of the test set: y_pred
y_pred = cv.predict(X_test)

# Compute and print metrics
print("Accuracy: {}".format(cv.score(X_test, y_test)))
print(classification_report(y_test, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))

{'CSP__log': True, 'CSP__n_components': 9, 'CSP__reg': 'ledoit_wolf'}
{'component_order': 'mutual_info',
 'cov_est': 'concat',
 'cov_method_params': None,
 'log': True,
 'n_components': 9,
 'norm_trace': False,
 'rank': None,
 'reg': 'ledoit_wolf',
 'transform_into': 'average_power'}
Pipeline(steps=[('CSP', CSP(None)), ('LDA', LinearDiscriminantAnalysis())])
Accuracy: 0.5875
              precision    recall  f1-score   support

           1       0.58      0.64      0.61       200
           2       0.60      0.54      0.57       200

    accuracy                           0.59       400
   macro avg       0.59      0.59      0.59       400
weighted avg       0.59      0.59      0.59       400

Tuned Model Parameters: {'CSP__log': True, 'CSP__n_components': 9, 'CSP__reg': 'ledoit_wolf'}


## Classification - CV

- Find models' accuracies by test set (eval)

In [None]:
list_models_scores_subjs = []

for idx_e_subj, e_subj in enumerate(list_subjs_names):
    print(e_subj)

    if e_subj != 'All Subjects':
        # Assign data and label for Training, Validation, Evaluation
        # start_idx_train = int(idx_e_subj * events_per_subj_for_train)
        # end_idx_train = int((idx_e_subj+1) * events_per_subj_for_train)
        start_idx_eval = int((idx_e_subj)*events_per_subj_for_eval)
        end_idx_eval  = int((idx_e_subj+1)*events_per_subj_for_eval)
                        
        X = eval_eeg[start_idx_eval:end_idx_eval,:,:]
        y = eval_events[start_idx_eval:end_idx_eval,-1]
    elif e_subj == 'All Subjects':
        # Assign data and label for Training, Validation, Evaluation
        X = eval_eeg
        y = eval_events[:,-1]
                        
    # CSP+SVM|CSP+LDA
    list_epochs_modeling = [X]
    list_lables = [y]


    list_dict_report_types = []
    # list_models_scores = []
    list_e_model_score = []
    for idx_e_type_epochs, e_type_epochs in enumerate(list_epochs_modeling):

        # Volt to micro Volt

        # epochs_data_train  = e_type_epochs.copy().pick_types(eeg=True).get_data() * 1e6 
        # labels = e_type_epochs.events[:,-1]
        # print(epochs_data_train.shape)

        epochs_data_train = list_epochs_modeling[idx_e_type_epochs]
        labels = list_lables[idx_e_type_epochs]
        print(epochs_data_train.shape)

        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        
        # CSP + SVM
        scores = cross_val_score(best_svm, epochs_data_train, labels, cv=cv, n_jobs=1)
        list_e_model_score.append(scores)

        # Printing the results
        class_balance = np.mean(labels == labels[0])
        class_balance = max(class_balance, 1. - class_balance)
        print("CSP + SVM Classification accuracy: %f / Chance level: %f" %
            (np.nanmean(scores), class_balance))
        
        # CSP + LDA
        scores = cross_val_score(best_lda, epochs_data_train, labels, cv=cv, n_jobs=1)
        list_e_model_score.append(scores)

        # Printing the results
        class_balance = np.mean(labels == labels[0])
        class_balance = max(class_balance, 1. - class_balance)
        print("CSP + LDA Classification accuracy: %f / Chance level: %f" %
            (np.nanmean(scores), class_balance))


        # list_models_scores.append(list_e_model_score)
        print('='*150)
        # break
    list_models_scores_subjs.append(list_e_model_score)

Subject 1
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.600000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.400000 / Chance level: 0.500000
Subject 2
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.550000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.500000 / Chance level: 0.500000
Subject 3
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.650000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.650000 / Chance level: 0.500000
Subject 4
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.550000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.550000 / Chance level: 0.500000
Subject 5
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.300000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.400000 / Chance level: 0.500000
Subject 6
(20, 30, 2000)
CSP + SVM Classification accuracy: 0.650000 / Chance level: 0.500000
CSP + LDA Classification accuracy: 0.600000 / Chance level: 0.500000
Subject 7
(20, 30, 200

In [None]:
list_models_scores_subjs

[[array([0.75, 0.25, 0.5 , 1.  , 0.5 ]),
  array([0.25, 0.  , 0.25, 1.  , 0.5 ])],
 [array([0.5 , 0.5 , 0.75, 0.75, 0.25]),
  array([0.5 , 0.5 , 0.75, 0.75, 0.  ])],
 [array([0.25, 1.  , 1.  , 0.25, 0.75]),
  array([0.25, 1.  , 0.5 , 1.  , 0.5 ])],
 [array([0.5 , 0.5 , 0.75, 0.75, 0.25]),
  array([0.25, 0.5 , 0.75, 0.75, 0.5 ])],
 [array([0.5 , 0.5 , 0.25, 0.  , 0.25]),
  array([0.5 , 0.25, 0.5 , 0.5 , 0.25])],
 [array([0.5 , 0.25, 0.75, 0.75, 1.  ]),
  array([0.75, 0.5 , 0.5 , 0.5 , 0.75])],
 [array([0.5 , 0.5 , 0.25, 0.5 , 0.25]),
  array([0.25, 1.  , 0.5 , 0.25, 0.  ])],
 [array([0.75, 0.5 , 0.5 , 0.75, 0.  ]),
  array([0.75, 0.75, 0.5 , 0.75, 0.  ])],
 [array([0.75, 0.25, 1.  , 1.  , 0.75]),
  array([1.  , 0.5 , 0.75, 0.75, 1.  ])],
 [array([0.5 , 0.5 , 0.75, 0.  , 0.5 ]),
  array([0.5 , 0.75, 0.25, 0.75, 0.5 ])],
 [array([0.5 , 0.75, 0.25, 0.5 , 0.5 ]),
  array([0.5 , 0.75, 0.5 , 0.5 , 0.75])],
 [array([1.  , 0.5 , 0.5 , 0.5 , 0.75]),
  array([0.75, 0.75, 0.5 , 0.5 , 0.25])],
 [ar

In [None]:
for idx_e_subj, e_subj in enumerate(list_models_scores_subjs):
    print(f"Subject {idx_e_subj:02}")
    for idx_e_type, e_type in enumerate(e_subj):
        avg_score = np.mean(e_type)
        # Cont EEG
        if idx_e_type ==  0:
            print(f"Cont EEG: CSP + SVM Classification accuracy: {avg_score}")
        elif idx_e_type == 1:
            print(f"Cont EEG: CSP + LDA Classification accuracy: {avg_score}")
        # break
    # break
    print('='*100)


Subject 00
Cont EEG: CSP + SVM Classification accuracy: 0.6
Cont EEG: CSP + LDA Classification accuracy: 0.4
Subject 01
Cont EEG: CSP + SVM Classification accuracy: 0.55
Cont EEG: CSP + LDA Classification accuracy: 0.5
Subject 02
Cont EEG: CSP + SVM Classification accuracy: 0.65
Cont EEG: CSP + LDA Classification accuracy: 0.65
Subject 03
Cont EEG: CSP + SVM Classification accuracy: 0.55
Cont EEG: CSP + LDA Classification accuracy: 0.55
Subject 04
Cont EEG: CSP + SVM Classification accuracy: 0.3
Cont EEG: CSP + LDA Classification accuracy: 0.4
Subject 05
Cont EEG: CSP + SVM Classification accuracy: 0.65
Cont EEG: CSP + LDA Classification accuracy: 0.6
Subject 06
Cont EEG: CSP + SVM Classification accuracy: 0.4
Cont EEG: CSP + LDA Classification accuracy: 0.4
Subject 07
Cont EEG: CSP + SVM Classification accuracy: 0.5
Cont EEG: CSP + LDA Classification accuracy: 0.55
Subject 08
Cont EEG: CSP + SVM Classification accuracy: 0.75
Cont EEG: CSP + LDA Classification accuracy: 0.8
Subject 09


# Save results

In [None]:
col_names = [f"CSP + SVM_{EOG_ref}-{ECG_ref}", f"CSP + LDA_{EOG_ref}-{ECG_ref}"]
index_names = list_subjs_names
avg_score = np.mean(list_models_scores_subjs, axis=-1)

print(avg_score)

[[0.6  0.4 ]
 [0.55 0.5 ]
 [0.65 0.65]
 [0.55 0.55]
 [0.3  0.4 ]
 [0.65 0.6 ]
 [0.4  0.4 ]
 [0.5  0.55]
 [0.75 0.8 ]
 [0.45 0.55]
 [0.5  0.6 ]
 [0.65 0.55]
 [0.55 0.6 ]
 [0.75 0.7 ]
 [0.6  0.45]
 [0.85 0.85]
 [0.55 0.45]
 [0.55 0.55]
 [0.7  0.6 ]
 [0.45 0.4 ]
 [0.53 0.54]]


In [None]:
df_model_metrics = pd.DataFrame(data=avg_score, 
                                index= list_subjs_names,
                                columns=col_names)
df_model_metrics

Unnamed: 0,CSP + SVM_EEGANet-None,CSP + LDA_EEGANet-None
Subject 1,0.6,0.4
Subject 2,0.55,0.5
Subject 3,0.65,0.65
Subject 4,0.55,0.55
Subject 5,0.3,0.4
Subject 6,0.65,0.6
Subject 7,0.4,0.4
Subject 8,0.5,0.55
Subject 9,0.75,0.8
Subject 10,0.45,0.55


In [None]:
print(f"{EOG_ref}-{ECG_ref}")

EEGANet-None


In [None]:
df_model_metrics.to_csv(path_or_buf=f'result/MI/MI_acc-{EOG_ref}-{ECG_ref}.csv', 
                        sep=',', float_format=None)