# Run all methods not balanced

In [1]:
import time
import numpy as np
from tools.csp import generate_projection, generate_eye, extract_feature
from tools.filters import load_filterbank
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from tools.data import DreemDatasets
from preprocessing import Compose, ExtractBands, ExtractSpectrum
from models.riemannian_multiscale import riemannian_multiscale
import pandas as pd

  from ._conv import register_converters as _register_converters


## Paramètres

In [2]:
fs = 50.  # sampling frequency
NO_channels = 7  # number of EEG channels
NO_riem = int(NO_channels * NO_channels + 1) / 2  # Total number of CSP feature per band and timewindow
bw = np.array([2, 4, 8, 13, 22])
ftype = 'butter'  # 'fir', 'butter'
forder = 2  # 4
filter_bank = load_filterbank(bw, fs, order=forder, max_freq=23, ftype=ftype)  # get filterbank coeffs
time_windows_flt = np.array([[0, 30],
                             [15, 30],
                             [10, 25],
                             [5, 20],
                             [0, 15],
                             [15, 25],
                             [10, 20],
                             [5, 15],
                             [0, 10]
]) * fs

#time_windows = time_windows[0:1]  # use only largest timewindow


riem_opt = "No_Adaptation"  # {"Riemann","Riemann_Euclid","Whitened_Euclid","No_Adaptation"}
rho = 0.1

time_windows = time_windows_flt.astype(int)
NO_bands = filter_bank.shape[0]
NO_csp = 24  # Total number of CSP feature per band and timewindow
useCSP = False

In [3]:
def get_data(path, train=True):
    if train:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/test/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/test/eeg_" + str(i + 1) + ".npy")
        X = X.transpose((1, 0, 2))
        return(X)
    
def get_data_extra_data_eeg(path, train=True):
    if train:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/train_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/train_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/test/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/extra_eeg/test/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        X = X.transpose((1, 0, 2))
        return(X)

def get_extra_data(path, train=True):
    if train:
        use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                print(feature_0.shape)
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/train_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
        return(X, Y)
    else:
        use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/test/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                print(feature_0.shape)
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/test/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        X = X.transpose((1, 0, 2))
        return(X)
    
def get_data_val(path):
    for i in range(7):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
            X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
            X[0] = feature_0
            del feature_0
        else:
            X[i] = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
    Y = np.load("dataset/"+path+"/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

def get_extrat_data_val_eeg(path):
    for i in range(7):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/extra_eeg/val_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
            X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
        else:
            feature_0 = np.load("dataset/"+path+"/extra_eeg/val_split/eeg_" + str(i + 1) + ".npy").transpose((1, 0, 2))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
    Y = np.load("dataset/"+path+"/extra_eeg/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

def get_extra_data_val(path):
    use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
    for i in range(4):
        if i==0:
            feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
            print(feature_0.shape)
            X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
        else:
            feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
            X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            del feature_0
    Y = np.load("dataset/"+path+"/val_split/targets.npy")
    X = X.transpose((1, 0, 2))
    return(X, Y)

path = "all"
train_data, train_label = get_data(path, train = True)
test_data, test_label = get_data_val(path)

## Features simples

### Min - max - freq - energy on pulse et accelerometre

In [4]:
path = "all"
train_extra_data, train_extra_label = get_extra_data(path, train = True)
path = "all"
test_extra_data,_ = get_extra_data_val(path)
train_extra_data = train_extra_data.reshape(-1, 4*16)
test_extra_data = test_extra_data.reshape(-1,  4*16)

(30631, 4, 4)
(7658, 4, 4)


In [5]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(train_extra_data, train_label)
print("trained")

labels_pred = clf.predict(test_extra_data)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)


print("time : ", time.time() - start)

trained
predicted
[[ 381    2  250    4   98]
 [  77    1  154    1   27]
 [ 103    1 3050   66  217]
 [  26    0  432  679   49]
 [  98    2  871   14 1055]] 0.6745886654478976 0.518054836809841
time :  193.4650731086731


### Idem sur les eegs + mm et XXX

In [6]:
path = "all"
train_extra_data_eeg, train_extra_label = get_data_extra_data_eeg(path, train = True)
path = "all"
test_extra_data_eeg,_ = get_extrat_data_val_eeg(path)
print(test_extra_data_eeg.shape)
train_extra_data_eeg = train_extra_data_eeg.reshape(-1, 7*24)
test_extra_data_eeg = test_extra_data_eeg.reshape(-1,  7*24)

(7658, 7, 24)


In [7]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(train_extra_data_eeg, train_label)
print("trained")

labels_pred = clf.predict(test_extra_data_eeg)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 453    0  202   10   70]
 [  59    1  146    0   54]
 [  96    0 2924  104  313]
 [  21    0  388  764   13]
 [  80    0  645    8 1307]] 0.7115434839383651 0.5632674016257432
time :  308.0689322948456


### concatener

In [8]:
all_final_features = np.concatenate((train_extra_data_eeg, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((test_extra_data_eeg, test_extra_data), axis= 1)
print(all_final_features.shape)

(30631, 232)


In [9]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 495    0  163    5   72]
 [  72    1  142    0   45]
 [  99    0 2992   79  267]
 [  25    0  339  810   12]
 [  83    3  564    8 1382]] 0.741708017759206 0.5893988863012602
time :  403.27120065689087


## Avec CSP

Juste CSP puis concatener avec les autres (csp-acc / csp-eeg / csp acc eeg)

In [10]:
def get_features(data, label, time_windows, useCSP = True):
    if useCSP:
        w = generate_projection(data, label, NO_csp, filter_bank, time_windows, NO_classes=5)
    else:
        w = generate_eye(data, label, filter_bank, time_windows)
    feature_mat = extract_feature(data, w, filter_bank, time_windows)
    return(w, feature_mat)

In [11]:
w, train_feat_CSP = get_features(train_data, train_label, time_windows, useCSP)
test_feature_CSP = extract_feature(test_data, w, filter_bank, time_windows)
#val_feature_CSP = extract_feature(val_data, w, filter_bank, time_windows)
del w

In [12]:
features_CSP_train = train_feat_CSP
features_CSP_test = test_feature_CSP

In [13]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(features_CSP_train, train_label)
print("trained")

labels_pred = clf.predict(features_CSP_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 470    9  179    3   74]
 [  43   19  130    0   68]
 [  71   10 2904  104  348]
 [  16    0  486  672   12]
 [  54    4  625   14 1343]] 0.7061896056411596 0.5842877621848805
time :  919.6256680488586


In [14]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data), axis= 1)
print(all_final_features.shape)

(30631, 1198)


In [15]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 491   12  155    2   75]
 [  48   23  129    0   60]
 [  65   10 2930  108  324]
 [  19    0  457  696   14]
 [  51   11  596   14 1368]] 0.7192478453904414 0.6004105644463827
time :  1096.9947135448456


In [16]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(30631, 1302)


In [17]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 473   10  172    4   76]
 [  40   21  135    0   64]
 [  73    7 2926  100  331]
 [  15    0  419  741   11]
 [  47    8  614   10 1361]] 0.7210759989553408 0.6016196373934014
time :  1151.7565553188324


In [18]:
all_final_features = np.concatenate((features_CSP_train, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(30631, 1198)
(30631, 1366)


In [19]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 502    9  154    2   68]
 [  48   24  129    0   59]
 [  66   12 2952   86  321]
 [  19    0  384  773   10]
 [  56   11  590   10 1373]] 0.7343954034996083 0.6178183804270211
time :  1332.3991961479187


## Rieman

idem avec Rieman au lieu de csp

In [20]:
riemann = riemannian_multiscale(filter_bank, time_windows, riem_opt=riem_opt, rho=rho, vectorized=True)
features_CSP_train_R = riemann.fit(train_data)
features_CSP_test_R = riemann.features(test_data)

In [21]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(features_CSP_train_R, train_label)
print("trained")

labels_pred = clf.predict(features_CSP_test_R)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 484    7  174    0   70]
 [  44   15  133    0   68]
 [  57    8 2949  107  316]
 [  21    0  490  665   10]
 [  50   10  596   15 1369]] 0.7158527030556281 0.5870286488868578
time :  2091.5074892044067


In [22]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

(30631, 4600)


In [23]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 495    6  165    0   69]
 [  52   19  129    0   60]
 [  66   13 2938  104  316]
 [  18    1  491  667    9]
 [  47   13  587   16 1377]] 0.7176808566205276 0.5936981532731522
time :  2749.4361786842346


In [24]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(30631, 4704)


In [25]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 484    6  174    0   71]
 [  41   18  129    0   72]
 [  53   10 2970   87  317]
 [  18    0  452  705   11]
 [  52   10  602   11 1365]] 0.7236876469051972 0.5993303214694508
time :  2546.7298605442047


In [26]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

(30631, 4600)
(30631, 4768)


In [27]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 498    8  162    0   67]
 [  50   19  130    0   61]
 [  65   10 2961   87  314]
 [  16    1  441  718   10]
 [  43   14  605    8 1370]] 0.7268216244450248 0.6046064049161487
time :  2495.229054927826


## Tout

In [28]:
all_final_features = np.concatenate((features_CSP_train_R, train_extra_data), axis= 1)
all_final_features_test = np.concatenate((features_CSP_test_R, test_extra_data), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, train_extra_data_eeg), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, test_extra_data_eeg), axis= 1)
print(all_final_features.shape)

all_final_features = np.concatenate((all_final_features, features_CSP_train), axis= 1)
all_final_features_test = np.concatenate((all_final_features_test, features_CSP_test), axis= 1)
print(all_final_features.shape)

(30631, 4600)
(30631, 4768)
(30631, 5902)


In [29]:
start = time.time()

clf = RandomForestClassifier(n_estimators=700,max_features='auto', random_state=0)
clf.fit(all_final_features, train_label)
print("trained")

labels_pred = clf.predict(all_final_features_test)
print("predicted")

cm = confusion_matrix(test_label, labels_pred)
acc = accuracy_score(test_label, labels_pred)
f1 = f1_score(test_label, labels_pred, average='macro')
print(cm, acc, f1)

print("time : ", time.time() - start)

trained
predicted
[[ 500    7  165    0   63]
 [  50   20  129    0   61]
 [  67   11 2960   84  315]
 [  18    0  437  722    9]
 [  42   11  599   12 1376]] 0.7283886132149386 0.6073168052488374
time :  2750.351104259491


In [31]:
np.save("features_CSP_false_allfenetres_sans0_train.npy", features_CSP_train)
np.save("features_CSP_false_allfenetres_sans0_test.npy", features_CSP_test)
np.save("features_R_allfenetres_sans0_train.npy", features_CSP_train_R)
np.save("features_R_allfenetres_sans0_test.npy", features_CSP_test_R)

TODO : 

Deal with unbalanced data et UP le label 1

Normaliser les données (avant et après le préproscess ?)

Cross val et hyper parametres RF, SVM gradient boosting

https://stats.stackexchange.com/questions/260736/multiclass-classification-having-class-imbalance-with-gradient-boosting-classifi

https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74

ya github 
