# Model for common spatial pattern (CSP) and Riemannian method feature calculation and classification for EEG data


In [1]:
import time
import numpy as np
from tools.csp import generate_projection, generate_eye, extract_feature
from tools.filters import load_filterbank
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from tools.data import DreemDatasets
from preprocessing import Compose, ExtractBands, ExtractSpectrum
from models.riemannian_multiscale import riemannian_multiscale

  from ._conv import register_converters as _register_converters


## Config

In [2]:
fs = 50.  # sampling frequency
NO_channels = 7  # number of EEG channels
NO_riem = int(NO_channels * NO_channels + 1) / 2  # Total number of CSP feature per band and timewindow
bw = np.array([2, 4, 8, 13, 22])
ftype = 'butter'  # 'fir', 'butter'
forder = 2  # 4
filter_bank = load_filterbank(bw, fs, order=forder, max_freq=23, ftype=ftype)  # get filterbank coeffs
time_windows_flt = np.array([[0, 30],
                            [5, 25],
                             [15, 30],
                             [10, 25],
                             [5, 20],
                             [5, 15],
                             [0, 10],
                             [5, 15],
                             [15, 25],
                             [10, 20],
                             [5, 15],
                             [5, 10]]) * fs
time_windows = time_windows_flt.astype(int)
# restrict time windows and frequency bands
#time_windows = time_windows[0:1]  # use only largest timewindow

NO_bands = filter_bank.shape[0]
riem_opt = "No_Adaptation"  # {"Riemann","Riemann_Euclid","Whitened_Euclid","No_Adaptation"}
rho = 0.1
NO_csp = 20  # Total number of CSP feature per band and timewindow
useCSP = True

## Generate dataset qui vont biens

On veut : 
    
    1- Tout le train sans split
    2- Tout le train avec split
    3- Tout le train équilibré sans split
    4- Tout le train équilibré avec split
    5- Tout le test 

In [3]:
use_datasets = ['accelerometer_x','accelerometer_y','accelerometer_z','eeg_1', 'eeg_2', 'eeg_3', 'eeg_4', 'eeg_5', 'eeg_6', 'eeg_7', 'pulse_oximeter_infrared']
seed = 1
"""train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=1, seed=seed, balance_data=False,keep_datasets=use_datasets).get()
train_set.save_data("dataset/all/train")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=0.8, seed=seed, balance_data=False,keep_datasets=use_datasets).get()
train_set.save_data("dataset/all/train_split")
val_set.save_data("dataset/all/val_split")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=1, seed=seed, balance_data=True,keep_datasets=use_datasets).get()
train_set.save_data("dataset/balanced/train")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=0.8, seed=seed, balance_data=True,keep_datasets=use_datasets).get()
train_set.save_data("dataset/balanced/train_split")
val_set.save_data("dataset/balanced/val_split")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()


from tools.data import DreemDataset
test_set = DreemDataset('dataset/test.h5', keep_datasets=use_datasets).init()
test_set.save_data("dataset/all/test")
test_set.close()"""

'train_set, val_set = DreemDatasets(\'dataset/train.h5\', \'dataset/train_y.csv\', \n                                   split_train_val=1, seed=seed, balance_data=False,keep_datasets=use_datasets).get()\ntrain_set.save_data("dataset/all/train")\ntrain_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !\nval_set.close()\ntrain_set, val_set = DreemDatasets(\'dataset/train.h5\', \'dataset/train_y.csv\', \n                                   split_train_val=0.8, seed=seed, balance_data=False,keep_datasets=use_datasets).get()\ntrain_set.save_data("dataset/all/train_split")\nval_set.save_data("dataset/all/val_split")\ntrain_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !\nval_set.close()\ntrain_set, val_set = DreemDatasets(\'dataset/train.h5\', \'dataset/train_y.csv\', \n                                   split_train_val=1, seed=seed, balance_data=True,keep_datasets=use_datasets).get()\ntrain_set.s

In [4]:
def get_data(path, train= True,  one_vs_all = False, limit= None):
    if train:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/train/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/train/targets.npy")
        X = X.transpose((1, 0, 2))
    else:
        for i in range(7):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((7, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/val_split/targets.npy")
        X = X.transpose((1, 0, 2))
    if one_vs_all:
        Y[Y > 2] = 0
        Y[Y < 2] = 0
        Y[Y == 2] = 1
    if limit is not None:
        X = X[:limit]
        Y = Y[:limit]
    return(X, Y)



path = "balanced"
train_data, train_label = get_data(path, train = True, one_vs_all = False)
path = "all"
eval_data, eval_label = get_data(path, train = False, one_vs_all = False)
print(train_data.shape, train_label.shape)

(6765, 7, 1500) (6765,)


## Extraction des features par CSP

In [5]:
def get_features(data, label, time_windows, useCSP = True, NO_csp = 20):
    if useCSP:
        w = generate_projection(data, label, NO_csp, filter_bank, time_windows, NO_classes=5)
    else:
        w = generate_eye(data, label, filter_bank, time_windows)
    feature_mat = extract_feature(data, w, filter_bank, time_windows)
    return(w, feature_mat)

In [6]:
w, train_feat_CSP = get_features(train_data, train_label, time_windows, useCSP)
eval_feature_CSP = extract_feature(eval_data, w, filter_bank, time_windows)

## Extraction des features par Riemann

In [7]:
riemann = riemannian_multiscale(filter_bank, time_windows, riem_opt=riem_opt, rho=rho, vectorized=True)
train_feat_R = riemann.fit(train_data)
eval_feature_R = riemann.features(eval_data)

## Random Forest

In [10]:
RF_CSP = RandomForestClassifier(n_estimators=500, random_state=0)
RF_CSP.fit(train_feat_CSP, train_label)

RF_R = RandomForestClassifier(n_estimators=500, random_state=0)
RF_R.fit(train_feat_R, train_label)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

## Eval

In [11]:
labels_pred = RF_CSP.predict(eval_feature_CSP)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')

print(CM, Acc, F1)

labels_pred = RF_R.predict(eval_feature_R)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')

print(CM, Acc, F1)

[[ 627   62   21    8   17]
 [   0  260    0    0    0]
 [ 177  323 1864  546  527]
 [  28   37   85 1021   15]
 [ 115  222  256   83 1364]] 0.6706711935231131 0.6513909892605189
[[ 619   79   16    3   18]
 [   0  260    0    0    0]
 [ 146  363 1832  545  551]
 [  31   33   98 1014   10]
 [ 117  231  248   77 1367]] 0.6649255680334291 0.6459635244279128


In [17]:
path="balanced"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
path="all"
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)

all_final_features = np.concatenate((train_feat_CSP, X), axis= 1)
all_final_features_R = np.concatenate((train_feat_R, X), axis= 1)
all_final_features_val = np.concatenate((eval_feature_CSP, X_val), axis= 1)
all_final_features_val_R = np.concatenate((eval_feature_R, X_val), axis= 1)


RF_CSP = RandomForestClassifier(n_estimators=500, random_state=0)
RF_CSP.fit(all_final_features, train_label)

RF_R = RandomForestClassifier(n_estimators=500, random_state=0)
RF_R.fit(all_final_features_R, train_label)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [18]:
labels_pred = RF_CSP.predict(all_final_features_val)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')

print(CM, Acc, F1)

labels_pred = RF_R.predict(all_final_features_val_R)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')

print(CM, Acc, F1)

[[ 630   69   18    4   14]
 [   0  260    0    0    0]
 [ 163  299 1904  540  531]
 [  25   35   86 1024   16]
 [ 108  187  261   93 1391]] 0.6802037085400888 0.6621325628028887
[[ 613   85   13    4   20]
 [   0  260    0    0    0]
 [ 132  358 1861  544  542]
 [  32   37   99 1004   14]
 [ 107  210  250   84 1389]] 0.6694959519456777 0.6499583710353787


Il faudrait : 

    1 - Comprendre pk N = 20 marche
    2 - Faire des stats sur les méthodes (temps et accuracy)      ok
    4 - Ajouter les features des 3 autres courbes + les probas    ok
    
    6 - Images + Resnet

## Stats

In [6]:
path = "all"
train_data, train_label = get_data(path, train = True, one_vs_all = False)
eval_data, eval_label = get_data(path, train = False, one_vs_all = False)
print(train_data.shape, train_label.shape)

(30631, 7, 1500) (30631,)


In [9]:
useCSP = True
NO_time_windows = int(time_windows.size / 2)
start = time.time()
w, train_feat_CSP = get_features(train_data, train_label, useCSP)
RF_CSP = RandomForestClassifier(n_estimators=1000, random_state=0)
RF_CSP.fit(train_feat_CSP, train_label)
eval_feature_CSP = extract_feature(eval_data, w, filter_bank, time_windows)
np.save("features_CSP_train_split_True.npy", train_feat_CSP)
np.save("features_CSP_val_split_True.npy", eval_feature_CSP)
labels_pred = RF_CSP.predict(eval_feature_CSP)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')
print(time.time()-start)
print(CM, Acc, F1)

useCSP = False
NO_time_windows = int(time_windows.size / 2)
start = time.time()
w, train_feat_CSP = get_features(train_data, train_label, useCSP)
RF_CSP = RandomForestClassifier(n_estimators=1000, random_state=0)
RF_CSP.fit(train_feat_CSP, train_label)
eval_feature_CSP = extract_feature(eval_data, w, filter_bank, time_windows)
np.save("features_CSP_train_split_False.npy", train_feat_CSP)
np.save("features_CSP_val_split_False.npy", eval_feature_CSP)
labels_pred = RF_CSP.predict(eval_feature_CSP)
CM = confusion_matrix(eval_label, labels_pred)
Acc = accuracy_score(eval_label, labels_pred)
F1 = f1_score(eval_label, labels_pred, average='macro')
print(time.time()-start)
print(CM, Acc, F1)

6050.65074467659
[[ 502    5  165    4   59]
 [  37   13  149    0   61]
 [  53    4 2995   79  306]
 [  15    0  425  722   24]
 [  59    6  546    9 1420]] 0.7380517106294071 0.6062919675553691
2918.735505580902
[[ 464    8  179    4   80]
 [  40   16  133    0   71]
 [  76    7 2902  107  345]
 [  13    0  502  659   12]
 [  53    6  622   17 1342]] 0.7029250457038391 0.5768417304725457


In [10]:
methods = ["No_Adaptation", "Riemann","Riemann_Euclid","Whitened_Euclid"]
NO_time_windows = time_windows.shape[0]
for riem_opt in methods:
    try:
        start = time.time()
        NO_time_windows = time_windows.shape[0]
        NO_features = NO_riem * NO_bands * NO_time_windows
        riemann = riemannian_multiscale(filter_bank, time_windows, riem_opt=riem_opt, rho=rho, vectorized=True)
        train_feat_R = riemann.fit(train_data)
        RF_R = RandomForestClassifier(n_estimators=1000, random_state=0)
        RF_R.fit(train_feat_R, train_label)
        eval_feature_R = riemann.features(eval_data)
        np.save("features_R_train_split_"+str(riem_opt), train_feat_CSP)
        np.save("features_R_val_split_"+str(riem_opt), eval_feature_CSP)
        labels_pred = RF_R.predict(eval_feature_R)
        CM = confusion_matrix(eval_label, labels_pred)
        Acc = accuracy_score(eval_label, labels_pred)
        F1 = f1_score(eval_label, labels_pred, average='macro')
        print(time.time()-start)
        print(CM, Acc, F1)
    except Exception as e:
        print(e)
        pass

5549.880460977554
[[ 480    5  179    0   71]
 [  44   12  140    0   64]
 [  55    6 2934  110  332]
 [  17    1  499  657   12]
 [  46    8  598   18 1370]] 0.7120658135283364 0.5808282267416092


  eigvals = numpy.diag(operator(eigvals))


Covariance matrices must be positive definite. Add regularization to avoid this error.
Input contains NaN, infinity or a value too large for dtype('float32').
4730.435000419617
[[ 480    2  196    5   52]
 [  48    6  159    3   44]
 [  52    3 3021   79  282]
 [  11    0  541  567   67]
 [  57    3  745   38 1197]] 0.6882998171846435 0.5475961588532392
5817.639447450638
[[ 480    5  179    0   71]
 [  44   12  140    0   64]
 [  55    6 2934  110  332]
 [  17    1  499  657   12]
 [  46    8  598   18 1370]] 0.7120658135283364 0.5808282267416092


## RF - Boosting - NN

## Add features of other signals

In [3]:
from preprocessing.features import ExtractFeatures
extract_features = ExtractFeatures(bands='*', features=['min', 'max', 'energy', 'frequency','mmd','esis'])
use_datasets = ["eeg_1","eeg_2","eeg_3","eeg_4","eeg_5","eeg_6","eeg_7"]

transformations = {
    "eeg_1": extract_features,
    "eeg_2": extract_features,
    "eeg_3": extract_features,
    "eeg_4":extract_features,
    "eeg_5":extract_features,
    "eeg_6":extract_features,
    "eeg_7":extract_features
}

seed = 1
"""
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=1, seed=seed, balance_data=False,keep_datasets=use_datasets,transforms=transformations).get()
train_set.save_data("dataset/all/extra_eeg/train")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=0.8, seed=seed, balance_data=False,keep_datasets=use_datasets,transforms=transformations).get()
train_set.save_data("dataset/all/extra_eeg/train_split")
val_set.save_data("dataset/all/extra_eeg/val_split")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=1, seed=seed, balance_data=True,keep_datasets=use_datasets,transforms=transformations).get()
train_set.save_data("dataset/balanced/extra_eeg/train")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close()
train_set, val_set = DreemDatasets('dataset/train.h5', 'dataset/train_y.csv', 
                                   split_train_val=0.8, seed=seed, balance_data=True,keep_datasets=use_datasets,transforms=transformations).get()
train_set.save_data("dataset/balanced/extra_eeg/train_split")
val_set.save_data("dataset/balanced/extra_eeg/val_split")
train_set.close()  # Ne ferme que les fichiers h5. Si mis en mémoire, on a toujours accès aux données !
val_set.close
"""
#from preprocessing.features import ExtractFeatures
#extract_features = ExtractFeatures(bands='*', features=['min', 'max', 'energy', 'frequency'])
#use_datasets = ["eeg_1","eeg_2","eeg_3","eeg_4","eeg_5","eeg_6","eeg_7"]




from tools.data import DreemDataset
test_set = DreemDataset('dataset/test.h5', keep_datasets=use_datasets,transforms=transformations).init()
test_set.save_data("dataset/all/extra_eeg/test")
test_set.close()

Saving into dataset/all/extra_eeg/test ...
Loading dataset eeg_1 ...
Apply transformations...
Applied.
Loading dataset eeg_2 ...
Apply transformations...
Applied.
Loading dataset eeg_3 ...
Apply transformations...
Applied.
Loading dataset eeg_4 ...
Apply transformations...
Applied.
Loading dataset eeg_5 ...
Apply transformations...
Applied.
Loading dataset eeg_6 ...
Apply transformations...
Applied.
Loading dataset eeg_7 ...
Apply transformations...
Applied.
Loading dataset accelerometer_x ...


ValueError: Not a dataset (not a dataset)

In [15]:
def get_extra_data(path, train= True,  one_vs_all = False, limit= None):
    use_datasets = ["accelerometer_x","accelerometer_y","accelerometer_z","pulse_oximeter_infrared"]
    if train:
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/train/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
            else:
                feature_0 = np.load("dataset/"+path+"/train/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/train/targets.npy")
        X = X.transpose((1, 0, 2))
    else:
        for i in range(4):
            if i==0:
                feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X = np.zeros((4, feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2]))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
            else:
                feature_0 = np.load("dataset/"+path+"/val_split/" + use_datasets[i] + ".npy").transpose((1, 0, 2))
                X[i] = feature_0.reshape(feature_0.shape[0], feature_0.shape[1]*feature_0.shape[2])
                del feature_0
        Y = np.load("dataset/"+path+"/val_split/targets.npy")
        X = X.transpose((1, 0, 2))
    if one_vs_all:
        Y[Y > 2] = 0
        Y[Y < 2] = 0
        Y[Y == 2] = 1
    if limit is not None:
        X = X[:limit]
        Y = Y[:limit]
    return(X, Y)

In [49]:
path = "balanced"
train_data, train_label = get_data(path, train = True, one_vs_all = False, limit= 1000)
eval_data, eval_label = get_data(path, train = False, one_vs_all = False, limit= 1000)
print(train_data.shape, train_label.shape)
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False, limit= 1000)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False, limit= 1000)
print(train_extra_data.shape, train_extra_data.shape)

(1000, 7, 1500) (1000,)
(1000, 4, 16) (1000, 4, 16)


### RF with only extra data

In [54]:
clf = RandomForestClassifier(n_estimators=1000, random_state=0)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
clf.fit(X, train_extra_label)
labels_pred = clf.predict(X_val)
cm = confusion_matrix(eval_extra_label, labels_pred)
acc = accuracy_score(eval_extra_label, labels_pred)
f1 = f1_score(eval_extra_label, labels_pred, average='macro')
print(cm, acc, f1)

[[109  40  11  14  27]
 [ 65  70  19  34  22]
 [ 14  23  63  51  43]
 [  4  14  27 128  23]
 [ 19  25  33  39  83]] 0.453 0.44619586527672295


In [56]:
path = "all"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
print(train_extra_data.shape, train_extra_data.shape)
clf = RandomForestClassifier(n_estimators=1000, random_state=0)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
clf.fit(X, train_extra_label)
labels_pred = clf.predict(X_val)
cm = confusion_matrix(eval_extra_label, labels_pred)
acc = accuracy_score(eval_extra_label, labels_pred)
f1 = f1_score(eval_extra_label, labels_pred, average='macro')
print(cm, acc, f1)

(30631, 4, 16) (30631, 4, 16)
[[ 379    2  249    4  101]
 [  76    2  153    2   27]
 [ 103    2 3048   64  220]
 [  26    0  434  677   49]
 [  99    2  853   14 1072]] 0.6761556542178114 0.5203379071054419


## Train and eval with all features

In [7]:
path = "all"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
features_CSP_train = np.load("dataset/all/features_CSP_train_split_False.npy")
features_CSP_val = np.load("dataset/all/features_CSP_val_split_False.npy")
all_final_features = np.concatenate((features_CSP_train, X), axis= 1)
all_final_features_val = np.concatenate((features_CSP_val, X_val), axis= 1)
print(all_final_features.shape)

(30631, 4384)


In [8]:
clf = RandomForestClassifier(n_estimators=1000, random_state=0)
clf.fit(all_final_features, train_extra_label)
labels_pred = clf.predict(all_final_features_val)
cm = confusion_matrix(eval_extra_label, labels_pred)
acc = accuracy_score(eval_extra_label, labels_pred)
f1 = f1_score(eval_extra_label, labels_pred, average='macro')
print(cm, acc, f1)

[[ 532    7  142    1   53]
 [  54   19  133    0   54]
 [  59    4 2992   85  297]
 [  17    0  410  741   18]
 [  57   10  520    9 1444]] 0.7479759728388613 0.6229231518045213


In [7]:
path = "all"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
features_CSP_train = np.load("dataset/all/features_CSP_train_split_False.npy")
features_CSP_val = np.load("dataset/all/features_CSP_val_split_False.npy")
all_final_features = np.concatenate((features_CSP_train, X), axis= 1)
all_final_features_val = np.concatenate((features_CSP_val, X_val), axis= 1)
print(all_final_features.shape)

clf = RandomForestClassifier(n_estimators=1000, random_state=0)
clf.fit(all_final_features, train_extra_label)
labels_pred = clf.predict(all_final_features_val)
cm = confusion_matrix(eval_extra_label, labels_pred)
acc = accuracy_score(eval_extra_label, labels_pred)
f1 = f1_score(eval_extra_label, labels_pred, average='macro')
print(cm, acc, f1)

(30631, 1576)
[[ 492    9  158    1   75]
 [  48   18  130    0   64]
 [  67    9 2923  103  335]
 [  18    0  469  685   14]
 [  50   10  607   15 1358]] 0.7150692086706711 0.5919485074191881


In [None]:
path = "all"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
features_CSP_train = np.load("dataset/all/features_R_train_split_No_Adaptation.npy")
features_CSP_val = np.load("dataset/all/features_R_val_split_No_Adaptation.npy")
all_final_features = np.concatenate((features_CSP_train, X), axis= 1)
all_final_features_val = np.concatenate((features_CSP_val, X_val), axis= 1)
print(all_final_features.shape)

clf = RandomForestClassifier(n_estimators=1000, random_state=0)
clf.fit(all_final_features, train_extra_label)
labels_pred = clf.predict(all_final_features_val)
cm = confusion_matrix(eval_extra_label, labels_pred)
acc = accuracy_score(eval_extra_label, labels_pred)
f1 = f1_score(eval_extra_label, labels_pred, average='macro')
print(cm, acc, f1)

(30631, 1576)


A tester : en supprimant des EEGS, en ajoutant les extrafeatures, en ajouant les extar features des EEGS, en mettant la filetr bank, pour 1000 et 5000 et 500 estimateurs

In [58]:
def get_data_f(momo, jojo,compteur, path, train= True,  one_vs_all = False, limit= None):
    if train:
        for i in range(jojo):
            print(i)
            if i==momo:
                feature_0 = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((compteur, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/train_split/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/train_split/targets.npy")
        X = X.transpose((1, 0, 2))
    else:
        for i in range(momo,jojo):
            if i==momo:
                feature_0 = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
                X = np.zeros((compteur, feature_0.shape[0], feature_0.shape[1]))
                X[0] = feature_0
                del feature_0
            else:
                X[i] = np.load("dataset/"+path+"/val_split/eeg_" + str(i + 1) + ".npy")
        Y = np.load("dataset/"+path+"/val_split/targets.npy")
        X = X.transpose((1, 0, 2))
    if one_vs_all:
        Y[Y > 2] = 0
        Y[Y < 2] = 0
        Y[Y == 2] = 1
    if limit is not None:
        X = X[:limit]
        Y = Y[:limit]
    return(X, Y)

In [59]:
path = "balanced"
train_extra_data, train_extra_label = get_extra_data(path, train = True, one_vs_all = False)
eval_extra_data, eval_extra_label = get_extra_data(path, train = False, one_vs_all = False)
X = train_extra_data.reshape(-1, 4*16)
X_val = eval_extra_data.reshape(-1,  4*16)
#all_final_features = np.concatenate((train_feat_CSP, X), axis= 1)
#all_final_features_val = np.concatenate((eval_feature_CSP, X_val), axis= 1)
for k in [time_windows[0:1], time_windows]:
    for i in range(8):
        for j in range(i):
            try:
                print(i,j)
                train_data, train_label = get_data_f(i,j, i+j,path, train = True, one_vs_all = False)
                eval_data, eval_label = get_data_f(i,j,path, train = False, one_vs_all = False)
                start = time.time()
                print('ok')
                train_data = train_data.transpose((1,0,2))[j:i].transpose((1, 0, 2))
                eval_data = eval_data.transpose((1,0,2))[j:i].transpose((1, 0, 2))
                print(train_data.shape, train_label.shape)
                w, train_feat_CSP = get_features(train_data, train_label, k, useCSP=False)
                eval_feature_CSP = extract_feature(eval_data, w, k, time_windows)
                print("train")
                clf = RandomForestClassifier(n_estimators=1000, random_state=0)
                clf.fit(train_feat_CSP, train_label)
                labels_pred = clf.predict(eval_feature_CSP)
                CM = confusion_matrix(eval_label, labels_pred)
                Acc = accuracy_score(eval_label, labels_pred)
                F1 = f1_score(eval_label, labels_pred, average='macro')
                print(CM, Acc, F1)
                print(time.time() - start)
                start = time.time()
                print("add features")
                all_final_features = np.concatenate((train_feat_CSP, X), axis= 1)
                all_final_features_val = np.concatenate((eval_feature_CSP, X_val), axis= 1)
                print(all_final_features.shape, train_label.shape)
                clf = RandomForestClassifier(n_estimators=1000, random_state=0)
                clf.fit(all_final_features, train_label)
                labels_pred = clf.predict(all_final_features_val)
                CM = confusion_matrix(eval_label, labels_pred)
                Acc = accuracy_score(eval_label, labels_pred)
                F1 = f1_score(eval_label, labels_pred, average='macro')
                print(CM, Acc, F1)
                print(time.time() - start)
            except Exception as e:
                print(e)
                pass

1 0
local variable 'X' referenced before assignment
2 0
local variable 'X' referenced before assignment
2 1
0
local variable 'X' referenced before assignment
3 0
local variable 'X' referenced before assignment
3 1
0
local variable 'X' referenced before assignment
3 2
0
local variable 'X' referenced before assignment
4 0
local variable 'X' referenced before assignment
4 1
0
local variable 'X' referenced before assignment
4 2
0
local variable 'X' referenced before assignment
4 3
0
local variable 'X' referenced before assignment
5 0
local variable 'X' referenced before assignment
5 1
0
local variable 'X' referenced before assignment
5 2
0
local variable 'X' referenced before assignment
5 3
0
local variable 'X' referenced before assignment
5 4
0
local variable 'X' referenced before assignment
6 0
local variable 'X' referenced before assignment
6 1
0
local variable 'X' referenced before assignment
6 2
0
local variable 'X' referenced before assignment
6 3
0
local variable 'X' referenced befo