# Projet CentraleSupelec - Dreem 
# Machine Learning course 3A OBT
## Alexis Tuil et Adil Bousfiha
### Approche Deep Learning

# Load useful libraries

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split

from keras.models import Sequential
from keras.layers import Activation, Flatten, Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, \
    concatenate, SpatialDropout1D, TimeDistributed, Bidirectional, LSTM, Reshape, Conv1D, MaxPooling1D
from keras.optimizers import SGD
from keras.utils import np_utils

import h5py

from sklearn.metrics import f1_score

import operator

# Define the network architecture

In [None]:
model = Sequential()

model.add(Reshape((1500, 7), input_shape=(10500,) ))
# Depending on the way you decide to stack the input data (cf report part 3.2) you may have 
# to change the Reshape layer

model.add(Conv1D(16, 3, activation='relu' ))
model.add(Conv1D(16, 3, activation='relu'))


model.add(MaxPooling1D(3))
model.add(Conv1D(32, 3, activation='relu'))
model.add(Conv1D(32, 3, activation='relu'))


model.add(MaxPooling1D(3))
model.add(Conv1D(64, 3, activation='relu'))
model.add(Conv1D(64, 3, activation='relu'))


model.add(GlobalAveragePooling1D())


model.add(Dense(64, activation='relu'))
model.add(Dense(5, activation='softmax'))
print(model.summary())

# Extract the training set

In [None]:
data = h5py.File('train.h5', 'r')
y = pd.read_csv ('train_y.csv').sleep_stage

### Transform labels to One Hot Encoding representation

In [None]:
y_ohe = np_utils.to_categorical(y, 5)

### Stack the signals following the approach you choose

In [None]:
X = np.hstack((data['eeg_1'], data['eeg_2'], data['eeg_3'],data['eeg_4'],data['eeg_5'],data['eeg_6'],data['eeg_7']))

### Compile the model

In [None]:
model.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])

BATCH_SIZE = 32
EPOCHS = 30

callbacks_list = [
    ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    EarlyStopping(monitor='acc', patience=10)
]

history = model.fit(X,
                    y_ohe,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    callbacks=callbacks_list,
                    validation_split=0.2,
                    verbose=1)

model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'

# Extract the test set

In [None]:
data_test = h5py.File('test.h5', 'r')

In [None]:
X_test = np.hstack((data_test['eeg_1'], data_test['eeg_2'], data_test['eeg_3'],data_test['eeg_4'],
                      data_test['eeg_5'],data_test['eeg_6'],data_test['eeg_7']))

### Predict the test labels

In [None]:
y_predicted = model.predict(X_test)

### Convert the test labels from One Hot Encoding to scalar representation

In [None]:
y_predicted_r = [max(enumerate(score), key=operator.itemgetter(1))[0] for score in y_predicted]

### Write the submit CSV

In [None]:
res = pd.DataFrame()
res['id'] = range(len(y_predicted_r))
res['sleep_stage'] = y_predicted_r
res.set_index('id').to_csv('submit.csv')