ID Number: 33385806

# EEG based Brain-Computer Interface using Visual Imagery 

## MSc Project for Computational Cognitive Neuroscience 2020/2021

### Decoding analysis using time-frequency features with SVM, LR, LDA, RF
### In this notebook, features are extracted when noise and artifact removal are not applied. All 14-channels are selected

### Import Libraries

In [None]:
%%capture libraries   

import sys
import os
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install mne
!{sys.executable} -m pip install mne-features
import numpy as np
import matplotlib 
import pathlib
import mne
import seaborn as sns
import pandas as pd
from mne.io import concatenate_raws, read_raw_edf
from mne import Epochs, create_info, events_from_annotations
from mne.preprocessing import ICA, create_eog_epochs, create_ecg_epochs,corrmap
from mne.time_frequency import tfr_morlet, psd_multitaper, psd_welch, tfr_stockwell,tfr_multitaper,tfr_array_morlet,AverageTFR
from scipy import signal
from scipy.integrate import simps
matplotlib.use('Qt5Agg') #allow interactive plots
import matplotlib.pyplot as plt
from mne.decoding import GeneralizingEstimator, Scaler,cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP, SlidingEstimator
from mne.viz import centers_to_edges
from mne.baseline import rescale
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold, ShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_recall_fscore_support, precision_recall_curve, average_precision_score, plot_precision_recall_curve, ConfusionMatrixDisplay, roc_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.metrics import plot_roc_curve, accuracy_score,precision_score,recall_score,f1_score,roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
%run EEG_functions_1.ipynb import load_data, excl_chan, filter_data, make_epochs, plot_data, epochs_power

### Compare the four classifiers when the features are extracted from non-preprocessed data (NO ICA and Bad Epochs Rejections performed)

In [None]:
raw_datasets = load_data(os.getcwd()); #raw 30 sessions in .edf format will be uploaded

include_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']; 
excl_chan(raw_datasets) #remove the channels not included in the above list

filter_data(raw_datasets) #filter between 1-30 Hz

epoched_data=make_epochs(raw_datasets, 10)  #create epochs

print(epoched_data.get_data().shape) #the final shape is n_epochs, chans, samples 

In [None]:
#Repeat the same procedure, extract features from unpreprocessed data

freqs = np.logspace(*np.log10([2, 30]), num=40) # define frequencies of interest (log-spaced) 
n_cycles = freqs / 2.  # different number of cycle per frequency

power_dec = mne.time_frequency.tfr_morlet(epoched_data, freqs=freqs, n_cycles=n_cycles, use_fft=True, average=False,
                                           return_itc=False, decim=3, n_jobs=1)  
 

print(power_dec.data.shape)
 

In [None]:
# Features in Theta band 4-7Hz

n_col= power_dec.data.shape[3] #extract n columns from pw output
n_chan=power_dec.data.shape[1] #extract n channels
n_row= power_dec.data.shape[0] #extract n rows


theta_pow_dec = np.zeros(shape=(n_row,n_chan,n_col))  
counter=0
for samples in range (0,n_row):
    for chan in range(0, n_chan):
        pow_t = power_dec.data[samples][chan][(power_dec.freqs>=4) & (power_dec.freqs<7)][:]  
        counter+=1
        pow_avg_t = np.mean(pow_t, axis=0) 
        theta_pow_dec[samples,chan,:]=pow_avg_t

In [None]:
#Features Alpha band 8-12Hz


n_col= power_dec.data.shape[3] #extract n columns from pw output
n_chan=power_dec.data.shape[1] #extract n channels
n_row= power_dec.data.shape[0] #extract n rows

alpha_pow_dec = np.zeros(shape=(n_row,n_chan,n_col))   
counter=0
for samples in range (0,n_row):
    for chan in range(0, n_chan): 
        pow_a = power_dec.data[samples][chan][(power_dec.freqs>=8) & (power_dec.freqs<12)][:]  
        counter+=1
        pow_avg_a = np.mean(pow_a, axis=0)  
        alpha_pow_dec[samples,chan,:]=pow_avg_a  
        

In [None]:
#Beta band

n_col= power_dec.data.shape[3] #extract n columns from pw output
n_chan=power_dec.data.shape[1] #extract n channels
n_row= power_dec.data.shape[0] #extract n rows

beta_pow_dec = np.zeros(shape=(n_row,n_chan,n_col))    
counter=0
for samples in range (0,n_row):
    for chan in range(0, n_chan): 
        pow_b = power_dec.data[samples][chan][(power_dec.freqs>=13) & (power_dec.freqs<30)][:]  
        counter+=1
        pow_avg_b = np.mean(pow_b, axis=0)  
        beta_pow_dec[samples,chan,:]=pow_avg_b

Firstly, initialise the below variables to store the accuracies from all classifiers:

In [None]:
accuracies, f1_scores = [], [] #run this cell only once

In [None]:
data= alpha_pow_dec  # theta_pow_dec, alpha_pow_dec, beta_pow_dec
labels = epoched_data.events[:,-1] 
train_data, test_data, labels_train, labels_test = train_test_split(data, labels, test_size=0.3, random_state=173)

### Define the Support Vector Machine(SVM)

In [None]:
clf_svm_pip = make_pipeline(Vectorizer(), StandardScaler(), svm.SVC(probability=True))  #define pipeline
parameters = {'svc__kernel':['rbf', 'sigmoid'], 'svc__C':[0.1, 1, 10], 'svc__gamma':[0.1,0.01,0.001]}
#Apply GridSearchCV to identify the best parameters
gs_cv_svm = GridSearchCV(clf_svm_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True) 

#Training is done by passing the training data and their labels to fit() function.

gs_cv_svm.fit(train_data, labels_train)
print('Best Parameters: {}'.format(gs_cv_svm.best_params_))
print('Best Score: {}'.format(gs_cv_svm.best_score_))


predictions_svm = gs_cv_svm.predict(test_data)

#Evaluate
report_svm = classification_report(labels_test, predictions_svm, target_names=['Relax', 'Push'])
print('SVM Clasification Report:\n {}'.format(report_svm))

acc_svm = accuracy_score(labels_test, predictions_svm)
print("Accuracy of SVM model: {}".format(acc_svm))

precision_svm,recall_svm,fscore_svm,support_svm=precision_recall_fscore_support(labels_test,predictions_svm,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_svm,recall_svm,fscore_svm))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_svm)
print('ROC AUC: %f' % auc)

In [None]:
#ROC result
svm_roc = plot_roc_curve(gs_cv_svm, test_data, labels_test)  

In [None]:
#Precision-Recall result
svm_pr = plot_precision_recall_curve(gs_cv_svm, test_data, labels_test) #precision-recall curve

In [None]:
# Performance metrics
errors_svc = abs(predictions_svm - labels_test)
print('Average absolute error:', round(np.mean(errors_svc), 2), 'degrees.')

### Linear Discriminant Analysis (LDA)

In [None]:
clf_lda_pip = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis())
parameters = {'lineardiscriminantanalysis__solver':['svd']}
gs_cv_lda = GridSearchCV(clf_lda_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True) 
gs_cv_lda.fit(train_data,labels_train)
 
print('Best Parameters: {}'.format(gs_cv_lda.best_params_))
print('Best Score: {}'.format(gs_cv_lda.best_score_))


#Predictions
predictions_lda = gs_cv_lda.predict(test_data)

#Evaluation
report_lda = classification_report(labels_test, predictions_lda, target_names=['Relax', 'Push'])
print('LDA Clasification Report:\n {}'.format(report_lda))

acc_lda = accuracy_score(labels_test, predictions_lda)
print("Accuracy of LDA model: {}".format(acc_lda))

precision_lda,recall_lda,fscore_lda,support_lda=precision_recall_fscore_support(labels_test,predictions_lda,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_lda,recall_lda,fscore_lda))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_lda)
print('ROC AUC: %f' % auc)

In [None]:
#ROC 
lda_roc = plot_roc_curve(gs_cv_lda, test_data, labels_test) 

In [None]:
#Precision-Recall 
lda_pr = plot_precision_recall_curve(gs_cv_lda,test_data, labels_test)  

In [None]:
# Performance metrics
errors_lda = abs(predictions_lda - labels_test)
print('Average absolute error:', round(np.mean(errors_lda), 2), 'degrees.')

### Logistic Regression (LR)

In [None]:
clf_lr_pip = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression(max_iter=5000))
parameters ={'logisticregression__C': np.logspace(0, 4, 100)}  


gs_cv_lr = GridSearchCV(clf_lr_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10))
gs_cv_lr.fit(train_data, labels_train)

print('Best Parameters: {}'.format(gs_cv_lr.best_params_))
print('Best Score: {}'.format(gs_cv_lr.best_score_))

#Predictions
predictions_lr = gs_cv_lr.predict(test_data)

#Evaluation
report_lr = classification_report(labels_test, predictions_lr, target_names=['Relax', 'Push'])
print('LR Clasification Report:\n {}'.format(report_lr))

acc_lr = accuracy_score(labels_test, predictions_lr)
print("Accuracy of LR model: {}".format(acc_lr))

precision_lr,recall_lr,fscore_lr,support_lr=precision_recall_fscore_support(labels_test,predictions_lr,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_lr,recall_lr,fscore_lr))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_lr)
print('ROC AUC: %f' % auc)

In [None]:
#ROC
lr_roc = plot_roc_curve(gs_cv_lr, test_data, labels_test)  

In [None]:
#Precision
lr_pr = plot_precision_recall_curve(gs_cv_lr, test_data, labels_test)  

In [None]:
# Performance metrics
errors_lr = abs(predictions_lr - labels_test)
print('Average absolute error:', round(np.mean(errors_lr), 2), 'degrees.')

### Random Forest (RF)

In [None]:
clf_rf_pip = make_pipeline(Vectorizer(), StandardScaler(), RandomForestClassifier()) 
parameters = {'randomforestclassifier__n_estimators':[100,200,300,400,500,600,700], 'randomforestclassifier__criterion':['gini','entropy'], 'randomforestclassifier__max_depth':[1,2,3,4,5]} 
gs_cv_rf = GridSearchCV(clf_rf_pip, parameters, scoring='accuracy', cv=StratifiedKFold(n_splits=10), return_train_score=True)  
gs_cv_rf.fit(train_data,labels_train)

print('Best Parameters: {}'.format(gs_cv_rf.best_params_))
print('Best Score: {}'.format(gs_cv_rf.best_score_))

predictions_rf = gs_cv_rf.predict(test_data)

#Evaluation
report_rf = classification_report(labels_test, predictions_rf, target_names=['Relax', 'Push'])
print('RF Clasification Report:\n {}'.format(report_rf))

acc_rf = accuracy_score(labels_test, predictions_rf)
print("Accuracy of RF model: {}".format(acc_rf))

precision_rf,recall_rf,fscore_rf,support_rf=precision_recall_fscore_support(labels_test,predictions_rf,average='macro')
print('Precision: {0}, Recall: {1}, f1-score:{2}'.format(precision_rf,recall_rf,fscore_rf))

In [None]:
#Area Under Curve (AUC) value
auc = roc_auc_score(labels_test, predictions_rf)
print('ROC AUC: %f' % auc)

In [None]:
#ROC
rf_roc = plot_roc_curve(gs_cv_rf, test_data, labels_test) 

In [None]:
#Precision
rf_pr = plot_precision_recall_curve(gs_cv_rf, test_data, labels_test)  

In [None]:
# Performance metrics
errors_rf = abs(predictions_rf - labels_test)
print('Average absolute error:', round(np.mean(errors_rf), 2), 'degrees.')

Store the classifier performance:

In [None]:
accuracies.append([acc_svm, acc_lda, acc_lr, acc_rf])
f1_scores.append([fscore_svm, fscore_lda, fscore_lr, fscore_rf ])

### ROC Curve comparison


In [None]:
%matplotlib inline

ax = plt.gca()

svm_roc.plot(ax=ax, alpha=0.8,label='SVM')
lda_roc.plot(ax=ax, alpha=0.8,label='LDA')        
lr_roc.plot(ax=ax, alpha=0.8,label='LR')   
rf_roc.plot(ax=ax, alpha=0.8,label='RF')

 
plt.legend()
plt.show()

### Precision-Recall Curve comparison


In [None]:
#%matplotlib inline

ax = plt.gca()

svm_pr.plot(ax=ax, alpha=0.8,label='SVM')
lda_pr.plot(ax=ax, alpha=0.8,label='LDA')
lr_pr.plot(ax=ax, alpha=0.8,label='LR')
rf_pr.plot(ax=ax, alpha=0.8,label='RF')

plt.legend()
plt.show()

### Compare the performance of these four different models (Accuracy & F1-score)

In [None]:
print(np.shape(accuracies)) #the final shape should be (3,4)
accuracies

In [None]:
# Plot Accuracy scores
# Choose the height of the bars
barWidth = 0.2


bars1 = [row[0] for row in accuracies ]  #svm
bars2 = [row[1] for row in accuracies ]  #lda
bars3 = [row[2] for row in accuracies ]  #lr
bars4 = [row[3] for row in accuracies ]  #rf
 


# The x position of bars
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
r5 = [x + barWidth for x in r4]


# Create the bars
ax = plt.axes()
plt.bar(r1, bars1, color='#87CEFA', width=barWidth, edgecolor='white', label='SVM')
plt.bar(r2, bars2, color='#FFE4E1', width=barWidth, edgecolor='white', label='LDA')
plt.bar(r3, bars3, color='#CD5C5C', width=barWidth, edgecolor='white', label='LR')
plt.bar(r4, bars4, color='#C5E384', width=barWidth, edgecolor='white', label='RF')

plt.axhline(y=0.5, color='k', linestyle='--',linewidth=0.4)
#plt.axhline(y=0.6, color='r', linestyle='--',linewidth=0.4)
plt.xlabel('Classification Tasks')
plt.ylabel(' Accuracies')
ax.set_xticks([0.3,1.3,2.3])
ax.set_xticklabels(['Theta','Alpha','Beta'])
plt.legend(loc="upper left", bbox_to_anchor=(0.98, 1))

plt.show()

In [None]:
# Plot F1-scores
# Choose the height of the bars
barWidth = 0.2


bars1 = [row[0] for row in f1_scores ] #svm
bars2 = [row[1] for row in f1_scores ] #lda
bars3 = [row[2] for row in f1_scores ] #lr
bars4 = [row[3] for row in f1_scores ] #rf
 

# The x position of bars

r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
r5 = [x + barWidth for x in r4]


# Create the bars
ax = plt.axes()
plt.bar(r1, bars1, color='#87CEFA', width=barWidth, edgecolor='white', label='SVM')
plt.bar(r2, bars2, color='#FFE4E1', width=barWidth, edgecolor='white', label='LDA')
plt.bar(r3, bars3, color='#CD5C5C', width=barWidth, edgecolor='white', label='LR')
plt.bar(r4, bars4, color='#C5E384', width=barWidth, edgecolor='white', label='RF')

plt.axhline(y=0.5, color='k', linestyle='--',linewidth=0.4)
#plt.axhline(y=0.6, color='r', linestyle='--',linewidth=0.4)
plt.xlabel('Classification Tasks')
plt.ylabel(' F1-scores')
ax.set_xticks([0.3,1.3,2.3])
ax.set_xticklabels(['Theta','Alpha','Beta'])
plt.legend(loc="upper left", bbox_to_anchor=(0.98, 1))

plt.show()