In [1]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd
from scipy import signal as sg
from Visualizer import Visualizer
from FeatureManager import FeatureManager
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from tempfile import mkdtemp
from shutil import rmtree
from sklearn.externals.joblib import Memory
import pickle

%matplotlib inline

### Obtention des données, features et labels

In [2]:
feat = FeatureManager()
#feat.get_all_esis()
#feat.get_all_minmax()
#feat.get_argmax_tf('pulse_oximeter_infrared')
#feat.get_all_time_Hjorth('activity',excepted=['pulse_oximeter_infrared','accelerometer_x','accelerometer_y','accelerometer_z'])
#feat.get_all_time_Hjorth('mobility',excepted=['pulse_oximeter_infrared','accelerometer_x','accelerometer_y','accelerometer_z'])
#feat.get_all_time_Hjorth('complexity',excepted=['pulse_oximeter_infrared','accelerometer_x','accelerometer_y','accelerometer_z'])
#feat.get_all_freq_Hjorth('mean',excepted=['pulse_oximeter_infrared','accelerometer_x','accelerometer_y','accelerometer_z'])
#feat.get_all_freq_Hjorth('std',excepted=['pulse_oximeter_infrared','accelerometer_x','accelerometer_y','accelerometer_z'])
feat.get_labels()

In [3]:
file = open('Results/features_df.pickle','rb')
features_df = pickle.load(file)
file.close()

In [27]:
feat.data.shape

(38289, 92)

### Train test split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(features_df,feat.labels, test_size=0.3)

### Scaling & svm

#### Memory

In [4]:
cachedir = mkdtemp()
memory = Memory(cachedir=cachedir, verbose=10)

In [5]:
pipeline = Pipeline([('scaler',StandardScaler()),('pca',PCA())\
                     ,('svm',svm.SVC(kernel='rbf',class_weight='balanced',C=10,gamma=6,decision_function_shape='ovr',verbose=True))]\
                   ,memory=memory)

### Fit & test

In [None]:
pipeline.fit(X_train,y_train)

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(StandardScaler(copy=True, with_mean=True, with_std=True), None,        esis alpha eeg_1  esis alpha eeg_2  esis alpha eeg_3  esis alpha eeg_4  \
38149      1.645997e+07      4.072828e+07      1.145500e+07      1.226489e+08   
10852      5.023313e+06      9.921479e+06      4.366552e+06      3.001176e+07   
25970      1.047674e+07      1.961525e+07      8.492807e+06      1.090926e+08   
25644      4.352397e+06      8.196890e+06      3.665243e+06      3.384170e+07   
22089      3.587898e+07      7.156218e+06      3.150458e+07      9.196406e+07   
15246      4.449893e+06      5.562772e+06      3.273412e+06      1.255859e+07   
20147      8.563359e+06      1.544841e+07      5.662264e+06      9.473529e+07   
26924      3.240536e+06      4.086798e+06      3.073..., 
id
38149    3
10852    3
25970    0
25644    2
22089    4
15246    2
20147

In [25]:
y_pred_test = pipeline.predict(X_test)

In [26]:
print(f1_score(y_test,y_pred_test,average='macro'))

0.2581238622505673


### Grid Search

In [6]:
paramGrid={'pca__n_components':np.arange(5,20)}

In [7]:
grid = GridSearchCV(pipeline,paramGrid,cv=3,scoring='f1_macro')

In [None]:
grid.fit(features_df,feat.labels)

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(StandardScaler(copy=True, with_mean=True, with_std=True), None,        esis alpha eeg_1  esis alpha eeg_2  esis alpha eeg_3  esis alpha eeg_4  \
12419      1.920279e+06      3.562400e+06      2.951758e+06      1.046340e+07   
12420      2.922446e+10      2.849931e+10      9.148503e+08      5.392722e+14   
12422      2.570998e+06      6.454411e+06      3.690224e+06      2.978813e+07   
12423      1.372674e+07      1.573976e+07      6.170383e+06      6.454161e+07   
12428      2.634718e+06      3.907489e+06      2.954893e+06      1.786511e+07   
12434      3.429428e+06      4.846539e+06      3.416694e+06      1.592699e+07   
12436      2.797656e+06      2.665974e+06      2.840865e+06      1.616888e+07   
12438      8.571367e+07      2.305022e+08      1.406..., 
id
12419    4
12420    4
12422    4
12423    4
12428    4
12434    4
12436

#### Deleting cache

In [None]:
rmtree(cachedir)

### Cross_val_score

In [10]:
score = cross_val_score(pipeline,feat.data,feat.labels,cv=3,scoring='f1_macro')

In [11]:
print(score)

[0.25591232 0.30247879 0.27913686]


### Sauvegarde de la grid

In [None]:
file = open('Results/grid.pickle','wb')
pickle.dump(grid,file)
file.close()

### Base de test

In [None]:
feat_train = FeatureManager(path='Data/test.h5')
feat_train.get_all_esis()
feat_train.get_all_minmax()

In [None]:
pred = Pipeline.predict(feat_train.data)

### Csv pour évaluation

In [None]:
serie = pd.Series(pred,index=pd.Index(range(len(pred)),name='id'),name='sleep_stage')
serie.to_csv('Results/SVC_base.csv',header=True)