In [None]:
import pebl_lib as pl
import pickle
import pandas as pd

# Only run these cells if all code shall be rerun

Read PEBL and EEG files

In [None]:
pebl = pl.PeblAll()
eeg = pl.EegAllPreparing(pebl)

Get ICA and PCA Matrices for all data (tutorial has been removed)

In [None]:
alldata = pd.DataFrame()
for i in range(len(eeg.participant)):
    for j in range(1,4):
        for k in range(len(eeg.participant[i].level[j])):
            alldata = alldata.append(eeg.participant[i].level[j][k]['eeg_data'])

peeg = pl.ProcessEeg(alldata)
peeg.ica()
peeg.pca()

spat_struct = {'scaler':peeg.scaler, 'pca':peeg.pca, 'ica':peeg.ica}

pickle.dump(alldata,open("alldata.pkl" , "wb"))
pickle.dump(eeg,open("eeg.pkl", "wb"))
pickle.dump(pebl,open("pebl.pkl", "wb"))
pickle.dump(peeg,open("peeg.pkl", "wb"))
with open('spat_struct.pkl', 'wb') as handle:
            pickle.dump(spat_struct, handle, 
                protocol=pickle.HIGHEST_PROTOCOL)

Checkpoint: All data, EEG, PEBL, Processed EEG, and the spatial structor have been saved in .pkl files

Sidenote: spatial_structers.pkl is included, which is an old version which takes into account the tutorial part

In [None]:
eeg = pickle.load(open("eeg.pkl", "rb"))
spat_struct = pickle.load(open("spatial_structers.pkl", "rb"))

create features out of input data

In [None]:
features = pl.FeatureExtractor(eeg, spat_struct) 
features.extract_features()
fft = features.fft_features.T.reset_index(drop=True)
psd = features.psd_features.T.reset_index(drop=True)
with open('features.pkl', 'wb') as handle:
            pickle.dump({'fft':fft, 'psd':psd}, handle, 
                protocol=pickle.HIGHEST_PROTOCOL)

Checkpoint: Features have been saved

In [None]:
features = pickle.load(open("features.pkl", "rb"))

detect and remove outliers

In [None]:
imputed = {}
removals = ['max', '0', 'nan', 'mean']
for technique in ['fft', 'psd']:
    for removal in removals:
        imputed.update({technique + '_' + removal: pl.Outlier(features, technique, removal)})

removed = {key: imputed[key].newfeatures for key in imputed.keys()}
removed.update({'fft_raw': features['fft'].T[:-4].T,
                    'psd_raw': features['psd'].T[:-4].T,
                    'label': features['psd']['Label']})
pickle.dump(removed, open("removed.pkl", "wb"))

Checkpoint: Outliers have been removed

In [None]:
removed = pd.read_pickle('removed.pkl')

Apply boosted trees and iterate over different hyperparameters

In [None]:
bt = pl.BoostedTrees(removed)

In [None]:
bt.allresults['psd_raw'].loc[
    bt.allresults['psd_raw']['Test Accuracy'].idxmax()
]

Apply random forest and iterate over n_estimators

In [None]:
rf = pl.RandomForest(removed)

In [None]:
rf.allresults['psd_raw'].loc[
    rf.allresults['psd_raw']['Test Accuracy'].idxmax()
]

In [None]:
with open('results.pkl', 'wb') as handle:
            pickle.dump({'bt':bt, 'rf':rf}, handle, 
                protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
for key, t in bt.allresults.items():
    print(key + ": " + str(max(t['Test Accuracy'])))

In [None]:
for key, t in rf.allresults.items():
    print(key + ": " + str(max(t['Test Accuracy'])))

for reference: old values were

Train accuracy 0.881140350877193 <br>
Accuracy 0.660245183887916 <br>
Precision 0.671875 <br>
Recall 0.7914110429447853 <br>

In [None]:
X = removed['psd_raw']
y = removed['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = XGBClassifier(seed=42)
clf.fit(X_train, y_train)
y_pred_rf = clf.predict(X_test)
y_pred_train = clf.predict(X_train)
print('Train accuracy', accuracy_score(y_train, y_pred_train))
print('Accuracy', accuracy_score(y_test, y_pred_rf))
print('Precision', precision_score(y_test, y_pred_rf))
print('Recall', recall_score(y_test, y_pred_rf))

In [None]:
import pickle
results = pickle.load(open("results.pkl", "rb"))

In [None]:
for key, bt in results["bt"].allresults.items():
    print(key)
    print(bt.loc[
        bt['Test Accuracy'].idxmax()
    ])

In [None]:
for key, rf in results["rf"].allresults.items():
    print(key)
    print(rf.loc[
        rf['Test Accuracy'].idxmax()
    ])