# Importation des librairies

In [None]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import svm

import importlib
spec = importlib.util.spec_from_file_location("preprocessing", "..\\utils\\preprocessing.py")
preprocessing = importlib.util.module_from_spec(spec)
spec.loader.exec_module(preprocessing)

spec = importlib.util.spec_from_file_location("fspliter", "..\\utils\\files_spliter.py")
fspliter = importlib.util.module_from_spec(spec)
spec.loader.exec_module(fspliter)

spec = importlib.util.spec_from_file_location("results", "..\\utils\\results.py")
results = importlib.util.module_from_spec(spec)
spec.loader.exec_module(results)

# Lecture des données et preprocessing

In [None]:
data = fspliter.get_mice(0)
data = fspliter.retrieve_day(data, 3)
data = preprocessing.clean_missing_values(data)
data = preprocessing.remove_outliers(data)

# Séparation en sets d'entrainement et de test

In [None]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

#remove filename columns
train = train.drop(['filename'], axis=1)
test = test.drop(['filename'], axis=1)

#separate data between sleep/wake and rem/nrem
train_wake_sleep = preprocessing.wake_sleep_data(train)
test_wake_sleep = preprocessing.wake_sleep_data(test)

train_rem_nrem = preprocessing.rem_nrem_data(train)
test_rem_nrem = preprocessing.rem_nrem_data(test)

In [None]:
#state column as label and the others as features (wake/sleep)
train_wake_sleep_features = train_wake_sleep.drop(['state'], axis=1)
train_wake_sleep_labels = train_wake_sleep['state']

test_wake_sleep_features = test_wake_sleep.drop(['state'], axis=1)
test_wake_sleep_labels = test_wake_sleep['state']

#state column as label and the others as features (rem/nrem)
train_rem_nrem_features = train_rem_nrem.drop(['state'], axis=1)
train_rem_nrem_labels = train_rem_nrem['state']

test_rem_nrem_features = test_rem_nrem.drop(['state'], axis=1)
test_rem_nrem_labels = test_rem_nrem['state']

# Models

In [None]:
clf_wake_sleep = svm.SVC()
clf_rem_nrem = svm.SVC()

clf_wake_sleep.fit(train_wake_sleep_features, train_wake_sleep_labels.values)
clf_rem_nrem.fit(train_rem_nrem_features, train_rem_nrem_labels.values)

predictions_wake_sleep = clf_wake_sleep.predict(test_wake_sleep_features)
predictions_rem_nrem = clf_rem_nrem.predict(test_rem_nrem_features)

# Résultats

In [None]:
results.scores(test_wake_sleep_labels, predictions_wake_sleep, test_wake_sleep.state.unique())

In [None]:
results.scores(test_rem_nrem_labels, predictions_rem_nrem, test_rem_nrem.state.unique())

# Prédiction avec hiérarchie

In [None]:
predict_first = clf_wake_sleep.predict(test_wake_sleep_features)
for i in range(len(predict_first)):
    if predict_first[i] == 's':
        predict_first[i] = clf_rem_nrem.predict(pd.DataFrame([test_wake_sleep_features.iloc[i]], columns=test_wake_sleep_features.columns))[0]

print("Number of w in predict_first: ", len([x for x in predict_first if x == 'w']))
print("Number of s in predict_first: ", len([x for x in predict_first if x == 's']))
print("Number of r in predict_first: ", len([x for x in predict_first if x == 'r']))
print("Number of n in predict_first: ", len([x for x in predict_first if x == 'n']))
results.scores(test['state'], predict_first, ['n', 'r', 'w'])

# Explication

- On constate que la classification de façon hiérarchique n'est pas très efficace.
- Cela est explicable car comme les états `wake` et `rem` sont assez similaire lors de la première séparation en `wake` et `sleep` notre modèle a déjà classifié la moitié des `rem` comme étant des `wake`. 
- En conséquence, lors de la seconde classification en `rem` et `nrem`, il ne peut déjà plus trouvé que la moitié des `rem` et on constate qu'il en trouve quand même un peu plus de 2/3 parmis ceux restant.