In [1]:
%matplotlib inline


# Cross Subject SSVEP



In [2]:
import warnings

import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import scipy
from pyriemann.estimation import Covariances as COVs
from estimation import Covariances, mean, scm
from classifiers import MDM, TangentSpace

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

import moabb
from moabb.datasets import SSVEPExo,Nakanishi2015,Wang2016,MAMEM1,MAMEM2,MAMEM3
from moabb.evaluations import CrossSubjectEvaluation,CrossSessionEvaluation,WithinSessionEvaluation
from moabb.paradigms import SSVEP, FilterBankSSVEP
from moabb.pipelines import SSVEP_CCA, ExtendedSSVEPSignal


warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)
moabb.set_log_level("info")

In [3]:
def huber_prime(x,r):
    if x < r:
        return 1
    else:
        return 0


In [4]:
def split(signal,p=32):
    n_sessions = signal.shape[0]//p
    trains_idx,tests_idx = [],[]
    for i in range(n_sessions):
        test_idx = list(range(i*p,(i+1)*p))
        train_idx=[]
        for j in range(signal.shape[0]):
            if not (j in test_idx):
                train_idx.append(j)
        trains_idx.append(train_idx)
        tests_idx.append(test_idx)
    return trains_idx,tests_idx


## Loading dataset



In [5]:
dataset = MAMEM1()
#dataset.interval = [0.5,5]
#dataset.subject_list = dataset.subject_list[:1]

In [6]:
print(dataset.subject_list)
print(dataset.interval)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 4]


In [None]:
records = dataset.get_data()
records

## Choose paradigm




In [None]:
paradigm_fb = FilterBankSSVEP(filters=None, n_classes=5)

Classes are defined by the frequency of the stimulation, here we use
the first two frequencies of the dataset, 13 and 17 Hz.
The evaluation function uses a LabelEncoder, transforming them
to 0 and 1



In [None]:
freqs = paradigm_fb.used_events(dataset)
freqs

## Create pipelines



In [None]:
pipeline = {}

pipeline["MDM + scm covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    COVs(estimator="scm"),
    MDM()
)

pipeline["MDM + lwf covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    COVs(estimator="lwf"),
    MDM()
)

pipeline["MDM + huber covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    Covariances(estimator="huber non adaptive"),
    MDM()
)

pipeline["MDM + tyler covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    Covariances(estimator="tyler adaptive"),
    MDM()
)

pipeline["MDM + student covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    Covariances(estimator="student"),
    MDM()
)

ddl = 5
n= 32 #adapt with number of classes
clean_prop = 0.9


param1 = 0.5*scipy.stats.chi2.ppf(clean_prop,2*n)
u_prime1 = lambda x : huber_prime(x,param1)
pipeline["rMDM with huber + scm covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    Covariances(estimator="scm"),
    MDM(u_prime= u_prime1 )
)

pipeline["rMDM with student + scm covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    COVs(estimator="scm"),
    MDM(u_prime = lambda x : (n+0.5*ddl)/(0.5*ddl+x))
)


pipeline["rMDM with tyler + scm covs"] = make_pipeline(
    ExtendedSSVEPSignal(),
    COVs(estimator="scm"),
    MDM(u_prime = lambda x : n/x)
)

In [None]:
X, y, metadata = paradigm_fb.get_data(dataset)

In [None]:
X.shape

In [None]:
dictio={}
for i in range(len(freqs)):
    dictio[freqs[i]]=i+1
print(dictio)
import numpy as np
y_= [dictio[y[l]] for l in range(len(y))]
y =np.asarray(y_)

In [None]:
signals = []
labels=[]
n_sessions=[]
n = 0
for subject in dataset.subject_list :
    n_session = len(records[subject]['session_0'])
    n_sessions.append(n_session)
    m =32*n_session +n
    signals.append(X[n:m])
    labels.append(y[n:m])
    n=m

## Evaluation

The evaluation will return a dataframe containing a single AUC score for
each subject / session of the dataset, and for each pipeline.

Results are saved into the database, so that if you add a new pipeline, it
will not run again the evaluation unless a parameter has changed. Results can
be overwritten if necessary.



In [None]:
results = {"subject":[],"score":[],"train samples":[], "test samples":[], "pipeline":[]}
for k in pipeline.keys():
    clf= pipeline[k]
    print(clf)
    for i in tqdm(range(len(dataset.subject_list))):
        accs= []
        subject = dataset.subject_list[i]
        signal,n_session,label = signals[i],n_sessions[i],labels[i]
        trains_idx,tests_idx = split(signal,n_session)
        for train_idx,test_idx in zip(trains_idx,tests_idx):
            X_train,X_test = signal[train_idx],signal[test_idx]
            y_train,y_test = label[train_idx],label[test_idx]
            clf.fit(X_train,y_train)
            preds= clf.predict(X_test)
            acc = np.mean(preds==y_test)
            accs.append(acc)
        results["subject"].append(subject)
        results["score"].append(np.mean(accs))
        results["pipeline"].append(k)
        results["train samples"].append(len(train_idx))
        results["test samples"].append(len(test_idx))

Filter bank processing, determine automatically the filter from the
stimulation frequency values of events.



## Plot Results

Here we plot the results.



In [None]:
df = pd.DataFrame(results)
df

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=df, y="score", x="subject", hue="pipeline")

plt.show()

In [None]:
for m in range(len(list(pipeline.keys()))):
    mean_  = 0
    for i in range(12):
        mean_ += df.loc[i+12*m].at['score']
    print(df.loc[12*m].at['pipeline'], "=",mean_/12)

## Cross session evaluation

In [None]:
#cross_session

evaluation1 = WithinSessionEvaluation(
    paradigm=paradigm_fb, datasets=dataset, overwrite=False
)
results1 = evaluation1.process(pipeline)

results1

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=results1, y="score", x="subject", hue="pipeline")

plt.show()

In [None]:
df1 = pd.DataFrame(results1)
df1

In [None]:
for m in range(len(list(pipeline.keys()))):
    mean_  = 0
    for i in range(12):
        mean_ += df1.loc[i+12*m].at['score']
    print(df1.loc[12*m].at['pipeline'], "=",mean_/12)