# Run all train, all test and upload

In [1]:
import time
import numpy as np
from tools.csp import generate_projection, generate_eye, extract_feature
from tools.filters import load_filterbank
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from tools.data import DreemDatasets
from preprocessing import Compose, ExtractBands, ExtractSpectrum
from models.riemannian_multiscale import riemannian_multiscale
import pandas as pd

  from ._conv import register_converters as _register_converters


In [2]:
fs = 50.  # sampling frequency
NO_channels = 7  # number of EEG channels
NO_riem = int(NO_channels * NO_channels + 1) / 2  # Total number of CSP feature per band and timewindow
bw = np.array([2, 4, 8, 13, 22])
ftype = 'butter'  # 'fir', 'butter'
forder = 2  # 4
filter_bank = load_filterbank(bw, fs, order=forder, max_freq=23, ftype=ftype)  # get filterbank coeffs
time_windows_flt = np.array([[0, 30],
                             [15, 30],
                             [10, 25],
                             [5, 20],
                             [0, 15],
                             [15, 25],
                             [10, 20],
                             [5, 15],
                             [0, 10]
]) * fs

#time_windows = time_windows[0:1]  # use only largest timewindow


riem_opt = "No_Adaptation"  # {"Riemann","Riemann_Euclid","Whitened_Euclid","No_Adaptation"}
rho = 0.1

time_windows = time_windows_flt.astype(int)
NO_bands = filter_bank.shape[0]
NO_csp = 24  # Total number of CSP feature per band and timewindow
useCSP = False

In [3]:
def get_data(path, train=True):
    if train:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/test/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/test/eeg_" + str(i + 1) + ".npy")
        X = X.transpose((1, 0, 2))
        return(X)
    
def get_data_extra_data_eeg(path, train=True):
    if train:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/train_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/train_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/test/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/test/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        X = X.transpose((1, 0, 2))
        return(X)

def get_extra_data(path, train=True):
    if train:
        use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                print(feature_0.shape)
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/train_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/test/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                print(feature_0.shape)
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/test/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        X = X.transpose((1, 0, 2))
        return(X)
    
def get_data_val(path):
    for i in range(7):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
            X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
            X[0] = feature_0
            del feature_0
        else:
            X[i] = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
    Y = np.load("dataset/"+path+"/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

def get_extrat_data_val_eeg(path):
    for i in range(7):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/extra_eeg/val_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
            X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
        else:
            feature_0 = np.load("dataset/"+path+"/extra_eeg/val_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
    Y = np.load("dataset/"+path+"/extra_eeg/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

def get_extra_data_val(path):
    use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
    for i in range(4):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
            print(feature_0.shape)
            X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
        else:
            feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
    Y = np.load("dataset/"+path+"/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

path = "balanced"
train_data, train_label = get_data(path, train = True)
path = "all"
test_data, test_label = get_data(path)

## Features simples

In [4]:
path = "balanced"
train_extra_data, train_extra_label = get_extra_data(path, train = True)
path = "all"
test_extra_data,_ = get_extra_data(path)
train_extra_data = train_extra_data.reshape(-1, 4*16)
test_extra_data = test_extra_data.reshape(-1,  4*16)

(5412, 4, 4)
(30631, 4, 4)


In [5]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(train_extra_data, train_label)
print("trained")

labels_pred = clf.predict(test_extra_data)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)


print("time : ", time.time() - start)

trained
predicted
[[1990  482  123   62  227]
 [  54  950   39   18   32]
 [ 681 1321 6650 2290 2753]
 [ 130  182  420 3499  314]
 [ 518  841 1593  730 4732]] 0.5817962195161764 0.5638819160728847
time :  26.15091872215271


In [5]:
path = "balanced"
train_extra_data_eeg, train_extra_label = get_data_extra_data_eeg(path, train = True)
path = "all"
test_extra_data_eeg,_ = get_data_extra_data_eeg(path)
print(test_extra_data_eeg.shape)
train_extra_data_eeg = train_extra_data_eeg.reshape(-1, 7*24)
test_extra_data_eeg = test_extra_data_eeg.reshape(-1,  7*24)

(30631, 7, 24)


In [7]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(train_extra_data_eeg, train_label)
print("trained")

labels_pred = clf.predict(test_extra_data_eeg)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2316  338   53   51  126]
 [  59  958   29   18   29]
 [ 843 1291 7439 2067 2055]
 [ 151  167  363 3835   29]
 [ 638  803 1231  264 5478]] 0.6537821161568346 0.6254347640147918
time :  38.527586936950684


In [8]:
all_final_features = np.concatenate((train_extra_data_eeg, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((test_extra_data_eeg, test_extra_data), axis= 1)
print(all_final_features.shape)

(5412, 232)


In [9]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2294  367   52   41  130]
 [  56  963   30   16   28]
 [ 656 1218 7684 2051 2086]
 [ 118  162  328 3904   33]
 [ 518  788 1181  312 5615]] 0.6679507688289641 0.6397347891229647
time :  46.03363013267517


## Avec CSP

In [6]:
def get_features(data, label, time_windows, useCSP = True):
    if useCSP:
        w = generate_projection(data, label, NO_csp, filter_bank, time_windows, NO_classes=5)
    else:
        w = generate_eye(data, label, filter_bank, time_windows)
    feature_mat = extract_feature(data, w, filter_bank, time_windows)
    return(w, feature_mat)

In [7]:
w, train_feat_CSP = get_features(train_data, train_label, time_windows, useCSP)
test_feature_CSP = extract_feature(test_data, w, filter_bank, time_windows)
#val_feature_CSP = extract_feature(val_data, w, filter_bank, time_windows)
del w

In [8]:
features_CSP_train = train_feat_CSP
features_CSP_test = test_feature_CSP

In [16]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(features_CSP_train, train_label)
print("trained")

labels_pred = clf.predict(features_CSP_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2334  326   56   36  132]
 [  48  974   24    9   38]
 [ 832 1299 7080 2390 2094]
 [ 152  141  463 3738   51]
 [ 523  903 1207  351 5430]] 0.6384381835395514 0.6153261240810197
time :  124.42029213905334


In [17]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data), axis= 1)
print(all_final_features.shape)

(5412, 1198)


In [18]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2354  320   60   31  119]
 [  45  983   27    6   32]
 [ 796 1216 7214 2357 2112]
 [ 150  122  453 3764   56]
 [ 499  838 1238  361 5478]] 0.6461754431784793 0.624394433438417
time :  130.97637939453125


In [19]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(5412, 1302)


In [20]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2338  314   64   37  131]
 [  43  979   25   11   35]
 [ 818 1238 7491 2052 2096]
 [ 148  133  434 3787   43]
 [ 523  867 1272  289 5463]] 0.6548268094414156 0.6298723684993595
time :  139.8023717403412


In [24]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(5412, 1198)
(5412, 1366)


In [22]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2380  303   52   28  121]
 [  43  983   24   10   33]
 [ 790 1169 7604 2022 2110]
 [ 138  123  420 3819   45]
 [ 506  817 1268  298 5525]] 0.6630864157226339 0.639209140962282
time :  124.20704317092896


## Rieman

In [9]:
riemann = riemannian_multiscale(filter_bank, time_windows, riem_opt=riem_opt, rho=rho, vectorized=True)
features_CSP_train_R = riemann.fit(train_data)
features_CSP_test_R = riemann.features(test_data)

In [10]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(features_CSP_train_R, train_label)
print("trained")

labels_pred = clf.predict(features_CSP_test_R)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2358  327   55   21  123]
 [  41  977   28    8   39]
 [ 780 1211 7219 2353 2132]
 [ 164  122  441 3771   47]
 [ 506  910 1139  362 5497]] 0.6471221964676308 0.6238878372262942
time :  266.51126074790955


In [11]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

(5412, 4600)


In [12]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2369  322   57   17  119]
 [  44  981   24    9   35]
 [ 788 1157 7334 2316 2100]
 [ 153  126  448 3774   44]
 [ 510  817 1158  369 5560]] 0.6535209428356893 0.6303864279955638
time :  286.47879576683044


In [13]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(5412, 4704)


In [14]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2357  329   57   22  119]
 [  42  979   27    7   38]
 [ 772 1236 7455 2111 2121]
 [ 156  119  451 3779   40]
 [ 489  904 1219  296 5506]] 0.6554144494139924 0.6310620901244586
time :  269.5309417247772


In [23]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(5412, 4600)
(5412, 4768)


In [16]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2365  324   59   18  118]
 [  47  983   23   10   30]
 [ 765 1139 7616 2062 2113]
 [ 143  121  441 3796   44]
 [ 494  822 1233  281 5584]] 0.6641637556723581 0.6397853700614984
time :  260.5643541812897


## Tout

In [21]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, features_CSP_train), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, features_CSP_test), axis= 1)
print(all_final_features.shape)

(5412, 4600)
(5412, 4768)
(5412, 5902)


In [22]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[2372  321   60   16  115]
 [  45  982   23    8   35]
 [ 777 1169 7533 2103 2113]
 [ 139  122  429 3810   45]
 [ 489  859 1196  302 5568]] 0.6615846691260487 0.6373075172953593
time :  287.50331234931946
