In [9]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from ipywidgets import interact

import sys
sys.path.append("..")

# from conc_obj import EEGData
from eegdata_multi import EEGData
from utils.plt import plot_psd, plot_montage
from utils.ica import plot_ica_comp

# MNE imports
import mne
from mne.io.edf import read_raw_edf
from mne.datasets import eegbci

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

import json


***Macros***

>General use macros, importing JSON files to use as the configuration files

In [15]:
script_path = Path().resolve()
main_folder = (script_path / "../").resolve()
proj_folder = (script_path / "../../").resolve()

JSON_MAIN_PATH = main_folder / "config/config_main.json"
JSON_CSP_PATH = main_folder / "config/config_csp.json"
JSON_GRID_PATH = main_folder / "config/config_grid.json"
EVENTS_PATH = main_folder / "config/events.json"

with open(JSON_MAIN_PATH, "r") as f:
    config_main = json.load(f)

with open(JSON_CSP_PATH, "r") as f:
    config_csp = json.load(f)

VERBOSE = config_main['verbose'].lower() == 'true'

L_FREQ = config_main['l_freq']
H_FREQ = config_main['h_freq']

N_SUBJECTS = config_main["n_subjects"]
N_COMPONENTS_ICA = config_main["n_components_ica"]



***Initialization of EEG object***

>***(If the files are not locally stored, it will download them to the user system automatically)***

>***Use of functions like .filter_data() also is obligatory if there is no data stored locally***

In [16]:
eeg_obj = EEGData(config_main, config_csp, proj_folder, verbose=VERBOSE)
# eeg_obj.save_type_data(type="events", folder_path=folder, verbose=VERBOSE)

#* Filters data and plots PSD to see differences
# eeg_obj.filter_data()
# eeg_obj.plot_psd_ba_filt(verbose=VERBOSE)


Loaded data:
<Raw | sample_mmi_h_raw.fif, 64 x 2333120 (14582.0 s), ~1.11 GiB, data loaded> <Raw | sample_mmi_hf_raw.fif, 64 x 2333120 (14582.0 s), ~1.11 GiB, data loaded>
<Raw | sample_mmi_h_filt_raw.fif, 64 x 2333120 (14582.0 s), ~1.11 GiB, data loaded> <Raw | sample_mmi_hf_filt_raw.fif, 64 x 2333120 (14582.0 s), ~1.11 GiB, data loaded>
Reading /Users/Shared/42/ML/projects/total_perspective_vortex/data/ica/sample_mmi_h_ica.fif ...
Now restoring ICA solution ...
Ready.
Reading /Users/Shared/42/ML/projects/total_perspective_vortex/data/ica/sample_mmi_hf_ica.fif ...
Now restoring ICA solution ...
Ready.
<ICA | raw data decomposition, method: fastica (fit in 85 iterations on 2333120 samples), 64 ICA components (64 PCA components available), channel types: eeg, 3 sources marked for exclusion> <ICA | raw data decomposition, method: fastica (fit in 80 iterations on 2333120 samples), 64 ICA components (64 PCA components available), channel types: eeg, 3 sources marked for exclusion>
<ICA | r

***Prediction - Loading testing data & ML models***

>***If we don't want to train our model and want to make predictions or see any metric right away, we can load the data and call the methods right away.***

>***Take into account that in order to change the events or any other parameter in the tools/methods loaded, you will have to train and save the entire model/data again!***

In [None]:

# X_test, y_test = eeg_obj.load_models()
# eeg_obj.pred(X_test, y_test)

**Basic information and montage plotting in 2D & 3D**
> ***The channel names can also be printed***

In [None]:

#* Plots different montages in 2D & 3D
# data = eeg_obj.get_raw_h()

# ch_names = data.info["ch_names"] 

# plot_montage(eeg_obj.montage, ch_names)

**ICA(Independent Component Analysys)**
> ***The number of components that ICA will try to sort out can be changed, it is advised to use values in the range [16-64]***

> ***Ocular artifacts are also removed, since they don't contribute to the muscular movement on this evaluation***

> ***The components can also be plotted and ocular artifacts, EOG, will be clearly visible***

In [None]:

#* Computes ICA components ( If loaded locally do not use! )
# eeg_obj.decomp_ica(n_components=N_COMPONENTS_ICA, plt_show=True, verbose=VERBOSE)

#* Plot components of ICA
# plot_ica_comp(folder / config_main["path_ica_h"])

**Specify events & create Epochs**
> ***The events used along with the JSON configuration will be crucial for the ML algorimths to work properly***


In [17]:

#* Loads cleaned data and events
data_h, data_hf = eeg_obj.get_clean()
events_h, events_hf = eeg_obj.get_events()

#* Creates epochs and frequency bands
ev_list = config_csp["ev_mlist_eight"]
epochs, freq_bands = eeg_obj.crt_epochs(data_h, events_h, ev_list, "hands", verbose=VERBOSE)

print()
epochs_data = epochs.get_data()
labels = epochs.events[:, -1]
print()


Event dict. :  {'do/left_hand': 1, 'do/right_hand': 2, 'imagine/left_hand': 3, 'imagine/right_hand': 4, 'rest': 5}

Using data from preloaded Raw for 3540 events and 433 original time points ...
0 bad epochs dropped



***CSP - Common Spatial Pattern(s)***

>Separates multivariate signals into additive components which have maximum differences in variance between two windows.

>Specially used on MEG & EEG datasets for motor imagery decoding.

In [18]:
N_COMPONENTS_CSP = config_csp["n_components"]
features, csp = eeg_obj.csp(epochs_data, labels, freq_bands, epochs.info, verbose=VERBOSE)

#* Only use plot_patters if you are not using PCA before
# csp.plot_patterns(epochs.info, ch_type="eeg", units="Patterns (AU)", size=1.5)

#* Displays the performance of CSP along classifiers through a timeline
# eeg_obj.csp_performance(epochs, labels, clf_type='svm', verbose=False)

#* Two step CSP
# features, labels = eeg_obj.two_step_csp(epochs1, epochs2, freq_bands, verbose=VERBOSE)

#* Verify any shape
print("Shape after CSP:", features.shape)

(3540, 64, 433) (3540,)
Shape after CSP: (3540, 48)


**Normalize and apply PCA(Principal Component Analysis)**
> ***Faster computation, training, testing, ...***

> ***Reduces the risk of over-fitting***

> ***Improves the accuracy of classification ML algorimths***

In [19]:
#* Normalizes data
# features_norm = eeg_obj.normalize(features)
features_norm = StandardScaler().fit_transform(features)

#* Reduce dimensionality (PCA)
# features_pca = eeg_obj.pca(features_norm)
pca = PCA(n_components=N_COMPONENTS_CSP)
features_pca = pca.fit_transform(features_norm)

***GridSearch - Parameter selection***

>Exhaustive search over specified parameter values for an estimator.

>The default values have been tested. Performance varies from event type selection.

In [None]:
from utils.gridsearch import grid_finder, grid_search

# grid = grid_finder(json_grid, 'svm', 'wide')
# print(grid)
# grid_search(data, labels, pipeline, grid)

***Pipeline***

>The dimensionality reduction tools, classifications algorimths and signal processing (CSP) are included.

>The default values/functions have been proved to be good over tests.

In [20]:
from utils.pipeline import crt_pipeline

pipeline = crt_pipeline(clf=True, voting='soft')


***Cross Validation - The good ol' tester***

>We can choose the n_splits along with the pipeline. (Can be both customized before)

>It ensures that the training/testing datasets are not mixed & calculates the average score over the K-folds.

In [21]:
#* Trains and evaluates model
scores = eeg_obj.cross_val(features_pca, labels, pipeline, n_splits=5)
print("Mean score:", np.mean(scores))

scores = eeg_obj.cross_validate(features_pca, labels, pipeline, n_splits=5)

[0.89548023 0.88559322 0.89124294 0.87853107 0.91242938]
Mean score: 0.8926553672316384
{'fit_time': array([2.20092392, 2.25810194, 2.21403909, 2.19763708, 2.19787502]), 'score_time': array([0.05967522, 0.0770812 , 0.06047201, 0.05854392, 0.05881786]), 'test_score': array([0.89548023, 0.88559322, 0.89124294, 0.87853107, 0.91242938]), 'train_score': array([1., 1., 1., 1., 1.])}


In [22]:
from sklearn.model_selection import train_test_split

#* Divide the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_pca, labels, test_size=0.2, random_state=42, shuffle=True)

eeg_obj.train_model(X_train, y_train)

eeg_obj.pred(X_test, y_test, n_preds=30)

Train Accuracy LDA: 0.5264830508474576
Train Accuracy SVM: 1.0
Train Accuracy RF: 1.0

epoch nb: [prediction] [truth] equal?
epoch 000:	[5]		[5]  True
epoch 001:	[1]		[1]  True
epoch 002:	[5]		[5]  True
epoch 003:	[5]		[5]  True
epoch 004:	[5]		[5]  True
epoch 005:	[5]		[5]  True
epoch 006:	[5]		[5]  True
epoch 007:	[5]		[5]  True
epoch 008:	[5]		[5]  True
epoch 009:	[1]		[1]  True
epoch 010:	[4]		[4]  True
epoch 011:	[5]		[5]  True
epoch 012:	[3]		[3]  True
epoch 013:	[5]		[5]  True
epoch 014:	[5]		[5]  True
epoch 015:	[3]		[3]  True
epoch 016:	[1]		[1]  True
epoch 017:	[5]		[5]  True
epoch 018:	[5]		[5]  True
epoch 019:	[5]		[5]  True
epoch 020:	[3]		[3]  True
epoch 021:	[2]		[2]  True
epoch 022:	[5]		[5]  True
epoch 023:	[5]		[5]  True
epoch 024:	[3]		[3]  True
epoch 025:	[5]		[3]  False
epoch 026:	[5]		[5]  True
epoch 027:	[5]		[5]  True
epoch 028:	[5]		[5]  True
epoch 029:	[5]		[5]  True
epoch 030:	[1]		[1]  True

LDA Accuracy: 0.5677966101694916
SVM Accuracy: 0.8983050847457628
R

In [None]:
#* Saves filtered and concatenated data for faster loading
# eeg_obj.save_type_data(type="raw")
# eeg_obj.save_type_data(type="filtered")
# eeg_obj.save_type_data(type="ica")
# eeg_obj.save_type_data(type="clean")
# eeg_obj.save_type_data(type="epochs")
# eeg_obj.save_models()