# EEG based Brain-Computer Interface using Visual Imagery 

## Computational Cognitive Neuroscience 2020/2021

### EEG Pre-Processing Analysis

1. Import libraries
2. Load dataset
3. Remove unwanted channels
4. Bandpass filter the data
5. Create Epochs
6. Artefact Correction with Independent Compontent Analysis (ICA)
7. Automatic bad epochs rejections (MNE Autoreject)
8. Save dataset

### 1. Import libraries

In [None]:
%%capture libraries   

import sys
import os
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install mne
!{sys.executable} -m pip install mne-features
import numpy as np
import matplotlib 
import pathlib
import mne
import seaborn as sns
import pandas as pd
from mne.io import concatenate_raws, read_raw_edf
from mne import Epochs, create_info, events_from_annotations
from mne.preprocessing import ICA, create_eog_epochs, create_ecg_epochs,corrmap
from mne.time_frequency import tfr_morlet, psd_multitaper, psd_welch, tfr_stockwell,tfr_multitaper,tfr_array_morlet,AverageTFR
from scipy import signal
from scipy.integrate import simps
matplotlib.use('Qt5Agg') #allow interactive plots
import matplotlib.pyplot as plt
from mne.decoding import GeneralizingEstimator, Scaler,cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP, SlidingEstimator
from mne.viz import centers_to_edges
from mne.baseline import rescale
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold, ShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_recall_fscore_support, precision_recall_curve, average_precision_score, plot_precision_recall_curve, ConfusionMatrixDisplay, roc_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.metrics import plot_roc_curve, accuracy_score,precision_score,recall_score,f1_score,roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from autoreject import get_rejection_threshold
from autoreject import AutoReject
%run SM1.ipynb import load_data, excl_chan, filter_data, make_epochs, plot_data, epochs_power

### 2. Load the raw EEG dataset

In [None]:
raw_datasets = load_data(os.getcwd()); #30 sessions in .edf format will be uploaded

### 3. Exclude unwanted channels

In [None]:
include_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']; #reference_channels = ['CQ_CMS', 'CQ_DRL']
excl_chan(raw_datasets) #remove the channels not included in the above list

#### Plot the unfiltered power spectrum density (PSD) from one session:

In [None]:
raw_datasets[1].plot_psd(average=True)

### 4. Apply Band-Pass Filter between 1-30Hz

In [None]:
filter_data(raw_datasets)

In [None]:
#Explore the power spectrum of the filtered dataset, to check if the power line noise has been filtered out
raw_datasets[1].plot_psd(average=True)

### 5. Create Epochs 

Each epoch has a duration of 9.5 seconds. The first and last 250 milliseconds have been removed to avoid overlap between the events. 

In [None]:
epoched_data=make_epochs(raw_datasets, 10) 
print(epoched_data.get_data().shape) #the final shape is n_epochs, chans, samples 

In [None]:
epoched_data

### 6. Apply ICA to the epoched data

In [None]:
picks = raw_datasets[0].info['ch_names'] #define the eeg channels we want to include in the analysis, in this case all of them
ica=ICA(n_components=2, method='fastica', max_iter=10000, random_state=89) #define the parameters
ica.fit(epoched_data,  picks = picks, reject = dict(eeg = 200e-6)) #apply ICA to epochs

#### Plot ICA components

In [None]:
ica.plot_components(picks=range(2), inst=epoched_data)   

#### Identify the components to exclude:

In [None]:
ica.exclude=[0,1,13] #exclude eye movements, heartbeat and saccade

#### Before excluding the above identified ICA components, check also their time courses:

In [None]:
ica.plot_sources(epoched_data)

#### It seems that also ICA components 7 and 8 might be problematic, use plot_overlay to overlay the raw and cleaned signals and see if by exlcuding these further two components there is a substantial difference:


In [None]:
epoevo=epoched_data.average() #create an evoked object
ica.plot_overlay(epoevo, exclude=[0,1,13], picks='eeg')  

In [None]:
ica.plot_overlay(epoevo, exclude=[0,1,13,7,8], picks='eeg')  

#### The signal didn't change after the removal of components 7 and 8. 
#### Exclude the components earlier identified:

In [None]:
ica.apply(epoched_data, exclude=ica.exclude)  

#### 7. Reject Bad Epochs using MNE function Autoreject

In [None]:
ar = AutoReject()
epochs = ar.fit_transform(epoched_data)  

In [None]:
from autoreject import get_rejection_threshold
reject = get_rejection_threshold(epoched_data)  

#### 8. Time-Frequency Analysis using Morlet Wavelet

In [None]:
freqs = np.logspace(*np.log10([2, 30]), num=40) # define frequencies of interest (log-spaced) 
n_cycles = freqs / 2.  # different number of cycle per frequency


#Compute power RELAX condition
power = mne.time_frequency.tfr_morlet(epochs, freqs=freqs, n_cycles=n_cycles, 
                                           use_fft=True, average=False,
                                           return_itc=False, decim=3, n_jobs=1)


In [None]:
#Extract TFR features in Alpha band (8-12Hz)

n_col= power.data.shape[3] #extract n columns from pw output
n_chan=power.data.shape[1] #extract n channels
n_row= power.data.shape[0] #extract n rows

alpha_pow = np.zeros(shape=(n_row,n_chan,n_col))   
counter=0
for samples in range (0,n_row):
    for chan in range(0, n_chan): 
        pow_a = power.data[samples][chan][(power.freqs>=8) & (power.freqs<12)][:]  
        counter+=1
        pow_avg_a = np.mean(pow_a, axis=0)  
        alpha_pow[samples,chan,:]=pow_avg_a

Firstly, initialise the below variables to store the accuracies from all classifiers:

In [None]:
accuracies, f1_scores = [], []  

 Split the dataset into training and testing with a 70:30 ratio (Training:Test):


In [None]:
data= alpha_pow   
labels = epochs.events[:,-1] #our labels

In [None]:
#Define training and testing data
train_data, test_data, labels_train, labels_test = train_test_split(data, labels, test_size=0.30, random_state=37) 


### Support Vector Machine(SVM)

In [None]:
clf_svm_pip = make_pipeline(Vectorizer(), StandardScaler(), svm.SVC(probability=True))  #define pipeline
parameters = {'svc__kernel':['linear', 'rbf', 'sigmoid'], 'svc__C':[0.1, 1, 10], 'svc__gamma':[0.1,0.01,0.001]}
#Apply GridSearchCV to identify the best parameters
gs_cv_svm = GridSearchCV(clf_svm_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True) 

#Training is done by passing the training data and their labels to fit() function.

gs_cv_svm.fit(train_data, labels_train)
print('Best Parameters: {}'.format(gs_cv_svm.best_params_))
print('Best Score: {}'.format(gs_cv_svm.best_score_))


predictions_svm = gs_cv_svm.predict(test_data)

#Evaluate
report_svm = classification_report(labels_test, predictions_svm, target_names=['Relax', 'Push'])
print('SVM Clasification Report:\n {}'.format(report_svm))

acc_svm = accuracy_score(labels_test, predictions_svm)
print("Accuracy of SVM model: {}".format(acc_svm))

precision_svm,recall_svm,fscore_svm,support_svm=precision_recall_fscore_support(labels_test,predictions_svm,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_svm,recall_svm,fscore_svm))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_svm)
print('ROC AUC: %f' % auc)

In [None]:
svm_roc = plot_roc_curve(gs_cv_svm, test_data, labels_test)  

In [None]:
svm_pr = plot_precision_recall_curve(gs_cv_svm, test_data, labels_test) #precision-recall curve

In [None]:
# Performance metrics
errors_svc = abs(predictions_svm - labels_test)
print('Average absolute error:', round(np.mean(errors_svc), 2), 'degrees.')

### Linear Discriminant Analysis (LDA)

In [None]:
clf_lda_pip = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis())
parameters = {'lineardiscriminantanalysis__solver':['svd']}
gs_cv_lda = GridSearchCV(clf_lda_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True) 
gs_cv_lda.fit(train_data,labels_train)
 
print('Best Parameters: {}'.format(gs_cv_lda.best_params_))
print('Best Score: {}'.format(gs_cv_lda.best_score_))


#Predictions
predictions_lda = gs_cv_lda.predict(test_data)

#Evaluation
report_lda = classification_report(labels_test, predictions_lda, target_names=['Relax', 'Push'])
print('LDA Clasification Report:\n {}'.format(report_lda))

acc_lda = accuracy_score(labels_test, predictions_lda)
print("Accuracy of LDA model: {}".format(acc_lda))

precision_lda,recall_lda,fscore_lda,support_lda=precision_recall_fscore_support(labels_test,predictions_lda,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_lda,recall_lda,fscore_lda))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_lda)
print('ROC AUC: %f' % auc)

In [None]:
#ROC 
lda_roc = plot_roc_curve(gs_cv_lda, test_data, labels_test) 

In [None]:
#Precision-Recall 
lda_pr = plot_precision_recall_curve(gs_cv_lda,test_data, labels_test)  

In [None]:
# Performance metrics
errors_lda = abs(predictions_lda - labels_test)
print('Average absolute error:', round(np.mean(errors_lda), 2), 'degrees.')

### Logistic Regression (LR)

In [None]:
clf_lr_pip = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression(max_iter=5000))
parameters ={'logisticregression__C': np.logspace(0, 4, 100)}  


gs_cv_lr = GridSearchCV(clf_lr_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10))
gs_cv_lr.fit(train_data, labels_train)

print('Best Parameters: {}'.format(gs_cv_lr.best_params_))
print('Best Score: {}'.format(gs_cv_lr.best_score_))

#Predictions
predictions_lr = gs_cv_lr.predict(test_data)

#Evaluation
report_lr = classification_report(labels_test, predictions_lr, target_names=['Relax', 'Push'])
print('LR Clasification Report:\n {}'.format(report_lr))

acc_lr = accuracy_score(labels_test, predictions_lr)
print("Accuracy of LR model: {}".format(acc_lr))

precision_lr,recall_lr,fscore_lr,support_lr=precision_recall_fscore_support(labels_test,predictions_lr,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_lr,recall_lr,fscore_lr))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_lr)
print('ROC AUC: %f' % auc)

In [None]:
#ROC
lr_roc = plot_roc_curve(gs_cv_lr, test_data, labels_test)  

In [None]:
#Precision
lr_pr = plot_precision_recall_curve(gs_cv_lr, test_data, labels_test)  

In [None]:
# Performance metrics
errors_lr = abs(predictions_lr - labels_test)
print('Average absolute error:', round(np.mean(errors_lr), 2), 'degrees.')

### Random Forest (RF)

In [None]:
clf_rf_pip = make_pipeline(Vectorizer(), StandardScaler(), RandomForestClassifier()) 
parameters = {'randomforestclassifier__n_estimators':[100,200,300,400,500,600,700], 'randomforestclassifier__criterion':['gini', 'entropy'], 'randomforestclassifier__max_depth':[1,2,3,4,5]} 
gs_cv_rf = GridSearchCV(clf_rf_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True)  
gs_cv_rf.fit(train_data,labels_train)

print('Best Parameters: {}'.format(gs_cv_rf.best_params_))
print('Best Score: {}'.format(gs_cv_rf.best_score_))

predictions_rf = gs_cv_rf.predict(test_data)

#Evaluation
report_rf = classification_report(labels_test, predictions_rf, target_names=['Relax', 'Push'])
print('RF Clasification Report:\n {}'.format(report_rf))

acc_rf = accuracy_score(labels_test, predictions_rf)
print("Accuracy of RF model: {}".format(acc_rf))

precision_rf,recall_rf,fscore_rf,support_rf=precision_recall_fscore_support(labels_test,predictions_rf,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_rf,recall_rf,fscore_rf))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_rf)
print('ROC AUC: %f' % auc)

In [None]:
#ROC
rf_roc = plot_roc_curve(gs_cv_rf, test_data, labels_test) 

In [None]:
#Precision
rf_pr = plot_precision_recall_curve(gs_cv_rf, test_data, labels_test)  

In [None]:
# Performance metrics
errors_rf = abs(predictions_rf - labels_test)
print('Average absolute error:', round(np.mean(errors_rf), 2), 'degrees.')

In [None]:
#Store all classifier perfomance
accuracies.append([acc_svm, acc_lda, acc_lr, acc_rf])
f1_scores.append([fscore_svm, fscore_lda, fscore_lr, fscore_rf ])

In [None]:
#Roc Curve Comparison

%matplotlib inline

ax = plt.gca()

svm_roc.plot(ax=ax, alpha=0.8,label='SVM')
lda_roc.plot(ax=ax, alpha=0.8,label='LDA')    
lr_roc.plot(ax=ax, alpha=0.8,label='LR')       
rf_roc.plot(ax=ax, alpha=0.8,label='RF')

 
plt.legend()
plt.show()

In [None]:
#Precision-Recall curve

#%matplotlib inline

ax = plt.gca()

svm_pr.plot(ax=ax, alpha=0.8,label='SVM')
lda_pr.plot(ax=ax, alpha=0.8,label='LDA')
lr_pr.plot(ax=ax, alpha=0.8,label='LR')
rf_pr.plot(ax=ax, alpha=0.8,label='RF')

plt.legend()
plt.show()

In [None]:
print(np.shape(accuracies)) #the final shape should be (3,4)
print(accuracies)

In [None]:
print(f1_scores)#check the values

In [None]:
# Plot Accuracy scores

 
barWidth = 3 # Choose the width of the bars


bars1 = [row[0] for row in accuracies ]  #SVM
bars2 = [row[1] for row in accuracies ]  #LDA
bars3 = [row[2] for row in accuracies ]  #LR
bars4 = [row[3] for row in accuracies ]  #RF
 


# The x position of bars
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
r5 = [x + barWidth for x in r4]


# Create the bars
ax = plt.axes()
plt.bar(r1, bars1, color='#87CEFA', width=1, edgecolor='white', label='SVM')
plt.bar(r2, bars2, color='#FFE4E1', width=1, edgecolor='white', label='LDA')
plt.bar(r3, bars3, color='#CD5C5C', width=1, edgecolor='white', label='LR')
plt.bar(r4, bars4, color='#C5E384', width=1, edgecolor='white', label='RF')

#plt.axhline(y=0.5, color='k', linestyle='--',linewidth=0.4)
plt.xlabel('Classification Tasks')
plt.ylabel(' Accuracies')
plt.legend(loc="upper left", bbox_to_anchor=(0.98, 1))
plt.xticks([], [])

plt.show()