In [1]:
import os
import pickle
import numpy as np
from mne import viz
from scipy import io, stats
from matplotlib import colors, pyplot as plt
from sklearn import model_selection, neural_network, metrics

In [2]:
CAF_DOSE = 200

DATA_PATH = 'C:\\Users\\Philipp\\Documents\\Caffeine\\Features{dose}\\Combined'.format(dose=CAF_DOSE)
RESULTS_PATH = 'C:\\Users\\Philipp\\GoogleDrive\\Caffeine\\results\\mlp{dose}'.format(dose=CAF_DOSE)
PROJECT_PATH = '..\\data'

STAGES = ['AWA', 'AWSL', 'NREM', 'REM']

In [3]:
sensor_pos = io.loadmat(os.path.join(PROJECT_PATH, 'Coo_caf'))['Cor'].T
sensor_pos = np.array([sensor_pos[1], sensor_pos[0]]).T

In [4]:
with open(os.path.join(DATA_PATH, 'data_avg.pickle'), 'rb') as file:
    data = pickle.load(file)
with open(os.path.join(DATA_PATH, 'labels_avg.pickle'), 'rb') as file:
    y = pickle.load(file)
with open(os.path.join(DATA_PATH, 'groups_avg.pickle'), 'rb') as file:
    groups = pickle.load(file)

In [5]:
feature_names = np.concatenate([[feature + '-' + str(i) for i in range(20)] for feature in data[STAGES[0]].keys() if 'Perm' not in feature and 'SpecSamp' not in feature])

x = {}
for stage in STAGES:
    x[stage] = []
    for feature in data[stage].keys():
        if 'Perm' in feature or 'SpecSamp' in feature:
            continue
        x[stage].append(data[stage][feature])
    x[stage] = np.concatenate(x[stage], axis=1)

In [6]:
iterations = 100

estimator_dict = {}
testing_data_dict = {}

for stage in STAGES:
    testing_data = []
    estimators = []
    avg_score = []
    
    counter = 0
    cv = model_selection.LeavePGroupsOut(n_groups=4)
    cv_split = list(cv.split(x[stage], y[stage], groups[stage]))
    for i in np.random.permutation(len(cv_split)):
        train, test = cv_split[i]
        if counter % 25 == 0:
            print(f'{stage} iteration {counter}/{iterations}')
        if counter >= iterations:
            break
        
        clf = neural_network.MLPClassifier(max_iter=1000)

        params = {
            'hidden_layer_sizes': [(128, 128, 32,)],
        }
        
        kfold_inner = model_selection.GroupKFold(n_splits=10)
        inner_cross_validation_split = kfold_inner.split(x[stage][train],
                                                         y[stage][train],
                                                         groups[stage][train])

        grid_search = model_selection.GridSearchCV(estimator=clf,
                                                   param_grid=params,
                                                   cv=inner_cross_validation_split,
                                                   iid=False,
                                                   refit=True,
                                                   n_jobs=-1)
        grid_search.fit(x[stage][train], y[stage][train], groups[stage][train])
        
        testing_data.append((x[stage][test], y[stage][test]))
        estimators.append(grid_search.best_estimator_)
        avg_score.append(grid_search.best_estimator_.score(x[stage][test], y[stage][test]))
        counter += 1

    testing_data_dict[stage] = testing_data
    estimator_dict[stage] = estimators
        
    ensemble_prediction = np.round(np.mean([est.predict(x[stage][test]) for est in estimators], axis=0))
    ensemble_score = metrics.accuracy_score(y[stage][test], ensemble_prediction)
    print(stage, 'mean score:', np.mean(avg_score), 'ensemble score:', ensemble_score, '\n')

AWA iteration 0/100


KeyboardInterrupt: 