In [47]:
"""
Format dataset, we read the file for the desired subject, and parse the data to extract:
- samplingRate
- trialLength
- X, a M x N x K matrix, which stands for trial x chan x samples
                         the actual values are 160 x 15 x 2560
- y, a M vector containing the labels {0,1}

ref:
Dataset description: https://lampx.tugraz.at/~bci/database/002-2014/description.pdf
"""

import scipy.io as sio
import numpy as np


# prepare data containers
y = []
X = []
"""
trainingFileList = ['BBCIData/S14T.mat', 
                    'BBCIData/S13T.mat', 
                    'BBCIData/S12T.mat', 
                    'BBCIData/S11T.mat', 
                    'BBCIData/S10T.mat', 
                    'BBCIData/S09T.mat', 
                    'BBCIData/S08T.mat', 
                    'BBCIData/S07T.mat', 
                    'BBCIData/S06T.mat', 
                    'BBCIData/S05T.mat', 
                    'BBCIData/S04T.mat', 
                    'BBCIData/S03T.mat', 
                    'BBCIData/S02T.mat', 
                    'BBCIData/S01T.mat']

validationFileList = ['BBCIData/S14E.mat', 
                      'BBCIData/S13E.mat', 
                      'BBCIData/S12E.mat', 
                      'BBCIData/S11E.mat', 
                      'BBCIData/S10E.mat', 
                      'BBCIData/S09E.mat', 
                      'BBCIData/S08E.mat', 
                      'BBCIData/S07E.mat', 
                      'BBCIData/S06E.mat', 
                      'BBCIData/S05E.mat', 
                      'BBCIData/S04E.mat', 
                      'BBCIData/S03E.mat', 
                      'BBCIData/S02E.mat', 
                      'BBCIData/S01E.mat']
"""

trainingFileList = ['BBCIData/S14T.mat']

validationFileList = ['BBCIData/S14E.mat']

for i in range(len(trainingFileList)):
    # read file
    d1T = sio.loadmat(trainingFileList[i])
    d1E = sio.loadmat(validationFileList[i])
    
    samplingRate = d1T['data'][0][0][0][0][3][0][0]
    trialLength = 5*samplingRate


    # run through all training runs
    for run in range(5):
        y.append(d1T['data'][0][run][0][0][2][0]) # labels
        timestamps = d1T['data'][0][run][0][0][1][0] # timestamps
        rawData = d1T['data'][0][run][0][0][0].transpose() # chan x data

        # parse out data based on timestamps
        for start in timestamps:
            end = start + trialLength
            X.append(rawData[:,start:end]) #15 x 2560


    # run through all validation runs (we do not discriminate at this point)
    for run in range(3):
        y.append(d1E['data'][0][run][0][0][2][0]) # labels
        timestamps = d1E['data'][0][run][0][0][1][0] # timestamps
        rawData = d1E['data'][0][run][0][0][0].transpose() # chan x data

        # parse out data based on timestamps
        for start in timestamps:
            end = start + trialLength
            X.append(rawData[:,start:end]) #15 x 2560

    del rawData
    del d1T
    del d1E

# arrange data into numpy arrays
# also torch expect float32 for samples
# and int64 for labels {0,1}
X = np.array(X).astype(np.float32)
y = (np.array(y).flatten()-1).astype(np.int64)
print(X.shape)
print(y.shape)

# erase unused references
d1T = []
d1E = []



(160, 15, 2560)
(160,)


In [48]:
"""
====================================================================
ERP EEG decoding in Tangent space.
====================================================================
Decoding applied to EEG data in sensor space decomposed using Xdawn.
After spatial filtering, covariances matrices are estimated, then projected in
the tangent space and classified with a logistic regression.
"""
# Authors: Alexandre Barachant <alexandre.barachant@gmail.com>
#
# License: BSD (3-clause)

import numpy as np

from pyriemann.estimation import XdawnCovariances
from pyriemann.tangentspace import TangentSpace
from pyriemann.utils.viz import plot_confusion_matrix

from sklearn.pipeline import make_pipeline
from sklearn.cross_validation import KFold
from sklearn.linear_model import LogisticRegression

from matplotlib import pyplot as plt

###############################################################################
# Decoding in tangent space with a logistic regression

n_components = 2  # pick some components

labels = y
epochs_data = X


# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(len(labels), 10, shuffle=True, random_state=42)

print("epoch data:")
print(epochs_data.shape)


clf = make_pipeline(XdawnCovariances(n_components),
                    TangentSpace(metric='riemann'),
                    LogisticRegression())

preds = np.zeros(len(labels))

print("labels:")
print(labels.shape)

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    preds[test_idx] = clf.predict(epochs_data[test_idx])

# Printing the results
acc = np.mean(preds == labels)
print("Classification accuracy: %f " % (acc))

#names = ['audio left', 'audio right', 'vis left', 'vis right']
#plot_confusion_matrix(preds, labels, names)
#plt.show()

epoch data:
(160, 15, 2560)
labels:
(160,)
Classification accuracy: 0.568750 


In [None]:

# subject 1
# Classification accuracy: 0.531250 

# subject 2
# Classification accuracy: 0.656250 

# subject 3
# Classification accuracy: 0.843750 

# subject 4
# Classification accuracy: 0.625000 

# subject 5
# Classification accuracy: 0.568750 

# subject 6
# Classification accuracy: 0.656250 

# subject 7
# Classification accuracy: 0.793750 

# subject 8
# Classification accuracy: 0.587500 

# subject 9
# Classification accuracy: 0.556250

# subject 10
# Classification accuracy: 0.581250

# subject 11
# Classification accuracy: 0.593750

# subject 12
# Classification accuracy: 0.556250

# subject 13
# Classification accuracy: 0.581250

# subject 14
# Classification accuracy: 0.568750

# subject 1-14
# Classification accuracy: 0.542411 