In [None]:

# Adapted from Pedro L. C. Rodrigues, Sylvain Chevallier
#
# https://github.com/plcrodrigues/Workshop-MOABB-BCI-Graz-2019

import warnings

import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
from mne.decoding import CSP
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

import moabb
from moabb.evaluations import WithinSessionEvaluation
from moabb.paradigms import MotorImagery
import mne


moabb.set_log_level("info")
warnings.filterwarnings("ignore")

# Datacleaning
Our data comes in raw, unshaped and forward filled.<br>
First we find individual trials, then reshape and possibly further transform the data before we can use it to fit a model.

In [None]:
df = pd.read_csv('eeg_data/p2s1t18_b.csv').dropna().reset_index(drop=True)
df = df[(df['markers'] != 86) & (df['markers'] != 99)]
trial_indices = df[(df['markers'] == df['direction']) & (df.shift()['markers'] != df['direction'])].index
trial_indices = trial_indices.append(df.iloc[-1:].index)
trial_indices

In [None]:
arr = np.empty((len(trial_indices-1), ))
for i, j in zip(range(len(trial_indices)-1), range(1, len(trial_indices))):
    arr.vstack(df.iloc[trial_indices[i]:trial_indices[j]][df['markers'] == 4])

data = pd.MultiIndex.from_product([arr])
data
# trial_indices = trial_indices.append(df.iloc[-1:].index)
# df.iloc[-1:].index
# trial_indices

## Instantiating Dataset

The first thing to do is to instantiate the dataset that we want to analyze.
MOABB has a list of many different datasets, each one containing all the
necessary information for describing them, such as the number of subjects,
size of trials, names of classes, etc.

The dataset class has methods for:

- downloading its files from some online source (e.g. Zenodo)
- importing the data from the files in whatever extension they might be
  (like .mat, .gdf, etc.) and instantiate a Raw object from the MNE package



In [None]:
path = Path('eeg_data/1-4.csv')
datafile = Path('/Volumes/EEGTRANSFER/melodies-recon/EEG/day1/ses-flute/eeg/sub-aaa_ses-flute_task-Default_run-001_eeg.xdf')
df = pd.read_csv(path).dropna()
df = df[df['markers'] == 4]
df.pop('markers')
labels = df.pop('direction')
df.rename(columns={'Unnamed: 0':'epoch'}, inplace=True)
# ts = df.pop('Unnamed: 0')
info = mne.create_info(ch_names=df.columns.tolist(), sfreq=500)
# data = mne.io.RawArray(df.dropna().multiply(1e-9).T, info)
dataset = mne.io.RawArray(df.dropna().T, info)


## Choosing a Paradigm

Once we have instantiated a dataset, we have to choose a paradigm. This
object is responsible for filtering the data, epoching it, and extracting
the labels for each epoch. Note that each dataset comes with the names of
the paradigms to which it might be associated. It would not make sense to
process a P300 dataset with a MI paradigm object.



In [None]:
# paradigm = MotorImagery()

In [None]:
# X, labels, meta = paradigm.get_data(dataset=dataset, subjects=[1])

## Create Pipeline

Our goal is to evaluate the performance of a given classification pipeline
(or several of them) when it is applied to the epochs from the previously
chosen dataset. We will consider a very simple classification pipeline in
which the dimension of the epochs are reduced via a CSP step and then
classified via a linear discriminant analysis.



In [None]:
pipeline = make_pipeline(CSP(), LDA())

## Evaluation

To evaluate the score of this pipeline, we use the `evaluation` class. When
instantiating it, we say which paradigm we want to consider, a list with the
datasets to analyze, and whether the scores should be recalculated each time
we run the evaluation or if MOABB should create a cache file.

Note that there are different ways of evaluating a classifier; in this
example, we choose `WithinSessionEvaluation`, which consists of doing a
cross-validation procedure where the training and testing partitions are from
the same recording session of the dataset. We could have used
`CrossSessionEvaluation`, which takes all but one session as training
partition and the remaining one as testing partition.



In [None]:
# evaluation = WithinSessionEvaluation(
#     paradigm=paradigm,
#     datasets=[dataset],
#     overwrite=True,
#     hdf5_path=None,
# )

In [None]:
labels

In [None]:
df.to_numpy().shape

In [None]:

X = df.to_numpy().res


In [None]:
CSP().fit(X.T, labels)

In [None]:
# results = evaluation.process({"csp+lda": pipeline})
clf = pipeline.fit(X, labels)

In [None]:
clf.predict([X[123]])

In [None]:
scores = cross_val_score(pipeline, X, labels, cv=5, scoring="roc_auc_ovr")
scores

## Plotting Results

We create a figure with the seaborn package comparing the classification
score for each subject on each session. Note that the 'subject' field from
the `results` is given in terms of integers, but seaborn accepts only
strings for its labeling. This is why we create the field 'subj'.

