This is the extra work on improving classification performance. The work and why we do that work is explained in the tutorial 2 original notebook, thus commentation will not commence in this one.


In [1]:
import os
import numpy as np
import mne

calibration_path = "../BSPM/data/calibration.set"
eeg = mne.io.read_raw_eeglab(calibration_path,preload=True)
data, times = eeg[:, :]
eeg.resample(eeg.info['sfreq']/8) # Downsample the data by 8.
channel_list = ['Fz', 'FC1', 'CP1', 'CP2', 'Cz', 'C4', 'FC2']
eeg.pick_channels(channel_list)
(events_from_annot,event_dict) = mne.events_from_annotations(eeg)
epochs_noError = mne.Epochs(eeg,events_from_annot,tmin=-0.2,tmax=0.8,event_id=7,preload=True) # S 4
epochs_Error = mne.Epochs(eeg,events_from_annot,tmin=-0.2,tmax=0.8,event_id=8,preload=True) # S 5
noError_data = epochs_noError.get_data()
error_data = epochs_Error.get_data()
noError_label = np.ones(190)
error_label = np.ones(110)*-1

Reading ../BSPM/data/calibration.fdt
Reading 0 ... 248153  =      0.000 ...   969.348 secs...
Used Annotations descriptions: ['R  1', 'R  2', 'R  3', 'S  1', 'S  2', 'S  3', 'S  4', 'S  5', 'S  7', 'S  8', 'S  9', 'empty']
190 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
0 projection items activated
Loading data for 190 events and 33 original time points ...
0 bad epochs dropped
110 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
0 projection items activated
Loading data for 110 events and 33 original time points ...
0 bad epochs dropped


Labels and epoch data have been generated.

In [2]:
# Get time index in between 0.2 and 0.6
noError_data = noError_data[:,:,12:26]
error_data = error_data[:,:,12:26]
noError_data = noError_data.reshape(190,len(eeg.ch_names)*noError_data.shape[2])
error_data = error_data.reshape(110,len(eeg.ch_names)*error_data.shape[2])
# Merge data
all_data = np.concatenate((noError_data,error_data),axis=0)
all_labels = np.concatenate((noError_label,error_label),axis=0)
# Min max scaling
for i in range(all_data.shape[1]):
    all_data[:,i] = (all_data[:,i] - all_data[:,i].min()) / (all_data[:,i].max() - all_data[:,i].min())

Data and labels are ready for analysis

In [3]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10, shuffle=True)
kf.get_n_splits(X=all_data,y=all_labels)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
general_score = 0.0
for train_index, test_index in kf.split(all_data):
    clf = LinearDiscriminantAnalysis(solver='lsqr',shrinkage=0.4)
    clf.fit(all_data[train_index], all_labels[train_index])
    score = clf.score(all_data[test_index], all_labels[test_index])
    general_score += score
    print("Score is: " + str(score))
print("Mean score is: " +str(general_score/10.0))

Score is: 0.8666666666666667
Score is: 0.9
Score is: 0.9333333333333333
Score is: 0.8
Score is: 1.0
Score is: 0.8666666666666667
Score is: 0.9
Score is: 0.9666666666666667
Score is: 0.9
Score is: 0.8333333333333334
Mean score is: 0.8966666666666668


Try different classifier

In [6]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
general_score = 0.0
for train_index, test_index in kf.split(all_data):
    clf = make_pipeline(StandardScaler(), SVC(kernel='rbf',gamma='auto'))
    clf.fit(all_data[train_index], all_labels[train_index])
    score = clf.score(all_data[test_index], all_labels[test_index])
    general_score += score
    print("Score is: " + str(score))
print("Mean score is: " +str(general_score/10.0))

Score is: 0.8333333333333334
Score is: 0.9666666666666667
Score is: 0.8666666666666667
Score is: 0.8666666666666667
Score is: 0.8666666666666667
Score is: 0.8666666666666667
Score is: 0.9666666666666667
Score is: 0.8333333333333334
Score is: 1.0
Score is: 0.9333333333333333
Mean score is: 0.9


Apply the same transformations to the recall.set and generate vector output.

In [5]:
recall_path = "../BSPM/data/recall.set"
eeg_recall = mne.io.read_raw_eeglab(recall_path,preload=True)
data, times = eeg_recall[:, :]
eeg_recall.resample(eeg_recall.info['sfreq']/8) # Downsample the data by 8.
channel_list = ['Fz', 'FC1', 'CP1', 'CP2', 'Cz', 'C4', 'FC2']
eeg_recall.pick_channels(channel_list)
(events_from_annot,event_dict) = mne.events_from_annotations(eeg_recall)
print(event_dict)
#We require S 6: 7
#7 is the event id with which we need to epoch from
epochs_s6 = mne.Epochs(eeg_recall,events_from_annot,tmin=-0.2,tmax=0.8,event_id=7,preload=True, reject_by_annotation=False) # S 6
epochs_data = epochs_s6.get_data()
# Get the data between 0.2 and 0.6 seconds
epochs_data = epochs_data[:,:,12:26]
epochs_data = epochs_data.reshape(300,len(eeg_recall.ch_names)*epochs_data.shape[2])
print(epochs_data.shape)
#Min Max Scaling on the epochs on recall.set
for i in range(epochs_data.shape[1]):
    epochs_data[:,i] = (epochs_data[:,i] - epochs_data[:,i].min()) / (epochs_data[:,i].max() - epochs_data[:,i].min())
#Train a ShrinkLDA classifier on calibration.set
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf = LinearDiscriminantAnalysis(solver='lsqr',shrinkage=0.4)
clf.fit(all_data, all_labels)
predicted = clf.predict(epochs_data)
import scipy.io as sio
sio.savemat('./lda_extra.mat', {'vector_output': predicted})

Reading ../BSPM/data/recall.fdt
Reading 0 ... 268019  =      0.000 ...  1046.949 secs...
Used Annotations descriptions: ['R  1', 'R  2', 'R  3', 'S  1', 'S  2', 'S  3', 'S  6', 'S  7', 'S  8', 'S  9', 'empty']
{'R  1': 1, 'R  2': 2, 'R  3': 3, 'S  1': 4, 'S  2': 5, 'S  3': 6, 'S  6': 7, 'S  7': 8, 'S  8': 9, 'S  9': 10, 'empty': 11}
300 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
0 projection items activated
Loading data for 300 events and 33 original time points ...
0 bad epochs dropped
(300, 98)
