In [1]:
import sys
sys.path.append("..")

import numpy as np
from pathlib import Path
from ipywidgets import interact

# from conc_obj import EEGData
from classes.eeg_data_auto import EEGDataAuto

# MNE imports
import mne
from mne.io.edf import read_raw_edf
from mne.datasets import eegbci
from mne.decoding import CSP
# from csp.CSPObj import CSP

from sklearn.ensemble import VotingClassifier
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA, FastICA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

import json
import warnings


***Macros***

>General use macros, importing JSON files to use as the configuration files

In [2]:
script_path = Path().resolve()
main_folder = (script_path / "../").resolve()
proj_folder = (script_path / "../../").resolve()

JSON_MAIN_PATH = main_folder / "config/config_main.json"
JSON_CSP_PATH = main_folder / "config/config_csp.json"
JSON_GRID_PATH = main_folder / "config/config_grid.json"
EVENTS_PATH = main_folder / "config/events.json"

with open(JSON_MAIN_PATH, "r") as f:
    config_main = json.load(f)

with open(JSON_CSP_PATH, "r") as f:
    config_csp = json.load(f)
    
with open(JSON_GRID_PATH, "r") as f:
    json_grid = json.load(f)

VERBOSE = config_main['verbose'].lower() == 'true'

L_FREQ = config_main['l_freq']
H_FREQ = config_main['h_freq']

N_SUBJECTS = config_main["n_subjects"]
N_COMPONENTS_ICA = config_main["n_components_ica"]
N_COMPONENTS_CSP = config_csp["n_components"]
N_COMPONENTS_PCA = N_COMPONENTS_CSP

"""
T0 corresponds to rest
T1 corresponds to onset of motion (real or imagined) of
the left fist (in runs 3, 4, 7, 8, 11, and 12)
both fists (in runs 5, 6, 9, 10, 13, and 14)
T2 corresponds to onset of motion (real or imagined) of
the right fist (in runs 3, 4, 7, 8, 11, and 12)
both feet (in runs 5, 6, 9, 10, 13, and 14)
"""

warnings.filterwarnings("ignore", category=UserWarning, module='mne')
mne.set_log_level("CRITICAL")

group_type = config_csp["group_type"]

if group_type == "hands":
    groupeve_dict = config_csp["event_dict_h"]
    freq_bands = config_csp["freq_exec_hands_01"]
elif group_type == "hands&feet":
    groupeve_dict = config_csp["event_dict_hf"]
    freq_bands = config_csp["freq_exec_hf"]
else:
    raise(ValueError("The given group type for the events in 'csp_config' is invalid, try one of these values:\n1. 'hands'\n2. 'hands&feet'"))

#** Modify this line in order to change the event types fed to the models
ev_list = config_csp["ev_blist_one"]
event_dict = {key: value for key, value in groupeve_dict.items() if value in ev_list[0]}


***Initialization of EEG object***

>***(If the files are not locally stored, it will download them to the user system automatically)***

>***Use of functions like .filter_data() also is obligatory if there is no data stored locally***

In [3]:

eeg_model = EEGDataAuto(config_main, config_csp, proj_folder, event_dict, freq_bands, verbose=VERBOSE)

features = eeg_model.get_features()
labels = eeg_model.get_labels()

#* Divide the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

print("Shape of features:", features.shape)


Event dict. :  {'do/left_hand': 1, 'do/right_hand': 2}
Shape of features: (900, 64, 401)


***K-Fold Training/Testing***

>Split the data to train over different data sets, improves generalization.

>Remove the last step of the pipeline to get the proccesed data without the ML CLF algorimth

In [4]:

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


****Common Spatial Patterns + Principal Component Analysis + VotingClassifier****

In [5]:
ensemble = VotingClassifier(estimators=[
    ('svm', SVC(kernel='rbf', C=15, gamma=0.5, probability=True)),
    ('mlp', RandomForestClassifier())],
    voting='soft')

csp_pca_vote = Pipeline([
    ('csp', CSP(n_components=N_COMPONENTS_CSP, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
	('clf', ensemble)
])

# Fit the pipeline to the transformed data
csp_pca_vote.fit(X_train, y_train)


In [6]:
# Perform tests
train_score = csp_pca_vote.score(X_train, y_train)
test_score = csp_pca_vote.score(X_test, y_test)
scores = cross_val_score(csp_pca_vote, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 1.0
Test set accuracy: 0.6277777777777778
<------------------------->
Cross-validation scores: [0.58333333 0.59027778 0.60416667 0.58333333 0.54861111]
Mean Cross-Validation:  0.5819444444444445


****Common Spatial Patterns + Principal Component Analysis + SVC****

In [7]:
csp_pca_svm = Pipeline([
    ('csp', CSP(n_components=N_COMPONENTS_CSP, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
    ('svm', SVC(kernel='rbf', C=15, gamma=0.5, probability=True))
])

csp_pca_svm.fit(X_train, y_train)


In [8]:

train_score = csp_pca_svm.score(X_train, y_train)
test_score = csp_pca_svm.score(X_test, y_test)
scores = cross_val_score(csp_pca_svm, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 1.0
Test set accuracy: 0.6
<------------------------->
Cross-validation scores: [0.56944444 0.63194444 0.56944444 0.57638889 0.52083333]
Mean Cross-Validation:  0.5736111111111112


****Common Spatial Patterns + Principal Component Analysis + MinMaxScaler + SVC****

In [9]:
csp_pca_minmax_svm = Pipeline([
    ('csp', CSP(n_components=N_COMPONENTS_CSP, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
    ('scaler', MinMaxScaler()),
    ('svm', SVC(kernel='rbf', C=15, gamma=0.5, probability=True))
])

csp_pca_minmax_svm.fit(X_train, y_train)


In [10]:

train_score = csp_pca_minmax_svm.score(X_train, y_train)
test_score = csp_pca_minmax_svm.score(X_test, y_test)
scores = cross_val_score(csp_pca_minmax_svm, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 0.9444444444444444
Test set accuracy: 0.6222222222222222
<------------------------->
Cross-validation scores: [0.625      0.65277778 0.56944444 0.59027778 0.5625    ]
Mean Cross-Validation:  0.6


****Common Spatial Patterns + Principal Component Analysis + StandardScaler + RandomForestClassifier****

In [11]:
csp_pca_std_rf = Pipeline([
    ('csp', CSP(n_components=64, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
    ('scaler', MinMaxScaler()),
	('rf', RandomForestClassifier(n_estimators=250, max_depth=25, min_samples_split=10, random_state=42))
])

csp_pca_std_rf.fit(X_train, y_train)


In [12]:

train_score = csp_pca_std_rf.score(X_train, y_train)
test_score = csp_pca_std_rf.score(X_test, y_test)
scores = cross_val_score(csp_pca_std_rf, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 1.0
Test set accuracy: 0.6222222222222222
<------------------------->
Cross-validation scores: [0.60416667 0.63194444 0.61805556 0.5625     0.61805556]
Mean Cross-Validation:  0.6069444444444445


****Common Spatial Patterns + Principal Component Analysis + MinMaxScaler + MLP****

In [13]:
csp_pca_mlp = Pipeline([
    ('csp', CSP(n_components=N_COMPONENTS_CSP, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
    ('scaler', MinMaxScaler()),
    ('mlp', MLPClassifier(hidden_layer_sizes=(200, 100), max_iter=500))
])

# Fit the 3D pipeline to the transformed data
csp_pca_mlp.fit(X_train, y_train)


In [14]:
# Test the model
train_score = csp_pca_mlp.score(X_train, y_train)
test_score = csp_pca_mlp.score(X_test, y_test)
scores = cross_val_score(csp_pca_mlp, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))



Train set accuracy: 1.0
Test set accuracy: 0.6111111111111112
<------------------------->
Cross-validation scores: [0.67361111 0.63888889 0.5625     0.61111111 0.59722222]
Mean Cross-Validation:  0.6166666666666667




****StandardScaler + MLP****

In [15]:
std_mlp = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500))
])

# Fit the 3D pipeline to the transformed data
std_mlp.fit(X_train_flattened, y_train)


In [16]:
# Test the model
train_score = std_mlp.score(X_train_flattened, y_train)
test_score = std_mlp.score(X_test_flattened, y_test)
scores = cross_val_score(std_mlp, X_train_flattened, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 1.0
Test set accuracy: 0.49444444444444446
<------------------------->
Cross-validation scores: [0.49305556 0.55555556 0.47916667 0.46527778 0.4375    ]
Mean Cross-Validation:  0.48611111111111116


****CSP + PCA + LDA****

In [17]:
csp_pca_lda = Pipeline([
    ('csp', CSP(n_components=32, reg='ledoit_wolf', log=True, norm_trace=False)),
    ('pca', PCA()),
	('lda', LDA(solver='lsqr', shrinkage='auto'))
])

csp_pca_lda.fit(X_train, y_train)


In [18]:
# Test the model
train_score = csp_pca_lda.score(X_train, y_train)
test_score = csp_pca_lda.score(X_test, y_test)
scores = cross_val_score(csp_pca_lda, X_train, y_train, cv=cv)

print("Train set accuracy:", train_score)
print("Test set accuracy:", test_score)
print("<------------------------->")
print("Cross-validation scores:", scores)
print("Mean Cross-Validation: ", np.mean(scores))

Train set accuracy: 0.7152777777777778
Test set accuracy: 0.5944444444444444
<------------------------->
Cross-validation scores: [0.57638889 0.59027778 0.59722222 0.61805556 0.59722222]
Mean Cross-Validation:  0.5958333333333334


**Saving data locally**
> ***Saving and loading data locally saves time and computational power***

> ***Note that it is also needed to change the configuration at the JSON files to import local files***

In [19]:
#* Saves filtered and concatenated data for faster loading
# eeg_obj.save_type_data(type="raw")
# eeg_obj.save_type_data(type="filtered")
# eeg_obj.save_type_data(type="norm")
# eeg_obj.save_type_data(type="ica")
# eeg_obj.save_models()