# Import

### Librerie

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from keras.models import Sequential, Model
from keras.layers import Dense, Bidirectional, LSTM, SimpleRNN, Dropout, TimeDistributed, Layer, Input, Conv1D, MaxPooling1D, GlobalMaxPool1D
from sklearn.metrics import *
from keras.optimizers import Adam

### Dataset

In [None]:
dataset = pd.read_csv('dati_continui_con_id.csv')

In [None]:
# creo una lista di di dataframe dove ogni elemento è l'insieme delle date per utente
utenti = []
# carico elenco utenti per scorrere tutti gli id
elenco_utenti = pd.read_csv('../dataset/userinfo.csv')
elenco_utenti.drop(elenco_utenti.columns[[0, 3]], axis=1, inplace=True)
elenco_utenti.columns = ['user_id', 'timezone', 'sex', 'age', 'height']
elenco_utenti.head()

In [None]:
dataset.set_index(['user_id','date'], inplace=True)

In [None]:
# inserisco per ogni elemento della lista il dataframe relativo ad un utente presente nel dataset
for i, r in elenco_utenti.iterrows():
    utente = r['user_id']
    if utente in dataset.index:
        utenti.append(dataset.loc[utente])

In [None]:
# Funzione normalizzazione
scaler = MinMaxScaler(feature_range=(0,1))
def normalizza(dat):
    df_for_training_scaled = scaler.fit_transform(dat)
    return df_for_training_scaled

In [None]:
# Adesso normalizza ogni dataset
utenti_normalizzati = []
for u in utenti:
    utenti_normalizzati.append(normalizza(u))

In [None]:
len(utenti_normalizzati)

In [None]:
#n_past is the number of step we will look in the past to predict the next target value.
col2pred = 4  # 0:steps, 1:bedin, 2:bedout, 3:sleep duration, 4:deep duration, 5:lightduration
window_size = 7
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            #per ogni feature fa l'append dei precedenti
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])  
            dataY.append(dataset[i,col2pred])
    return np.array(dataX),np.array(dataY)

In [None]:
# crea insieme di train
train_lista = utenti_normalizzati[:7000]

trainX_lista = []
trainY_lista = []

for t in train_lista:
    trainX_temp, trainY_temp = createXY(t,window_size)
    if len(trainX_temp.shape) == 3 and len(trainY_temp.shape) == 1:
        trainX_lista.append(trainX_temp)
        trainY_lista.append(trainY_temp)

trainX = np.concatenate(trainX_lista)
trainY = np.concatenate(trainY_lista)

In [None]:
# crea insieme di test
test_lista = utenti_normalizzati[7000:]

testX_lista = []
testY_lista = []

for t in test_lista:
    testX_temp, testY_temp = createXY(t,window_size)
    if len(testX_temp.shape) == 3 and len(testY_temp.shape) == 1:
        testX_lista.append(testX_temp)
        testY_lista.append(testY_temp)

testX = np.concatenate(testX_lista)
testY = np.concatenate(testY_lista)

In [None]:
print(trainX.shape, testX.shape, trainY.shape, testY.shape)

In [None]:
# features num
features_num = dataset.shape[1]
features_num

In [None]:
trainX_cnn = trainX.reshape(trainX.shape[0], trainX.shape[1], trainX.shape[2], 1)


def create_cnn_BiLSTM():
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=128, kernel_size=4, activation='tanh')))
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=2, activation='tanh')))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(GlobalMaxPool1D()))
    model.add(Bidirectional(LSTM(128, return_sequences=True, activation='tanh')))
    model.add(TimeDistributed(Dense(1, activation='linear')))

    opt = Adam(learning_rate=0.0001)
    model.compile(loss='mse', optimizer=opt)
    return model


model = create_cnn_BiLSTM()


In [None]:
history = model.fit(trainX_cnn, trainY, epochs = 5, verbose = 1, batch_size = 4096) # 8192

In [None]:
prediction_train = model.predict(trainX_cnn)
print("prediction\n", prediction_train)
print("\nPrediction Shape-", prediction_train.shape)

prediction_train_copies_array = np.repeat(prediction_train, features_num, axis=-1)
pred_train = scaler.inverse_transform(np.reshape(prediction_train_copies_array, (len(prediction_train), features_num)))[:, col2pred]
true_value_train_copies_array = np.repeat(trainY, features_num, axis=-1)
true_value_train = scaler.inverse_transform(np.reshape(true_value_train_copies_array, (len(trainY), features_num)))[:, col2pred]

In [None]:
def metrics(true_value_train, pred_value_train):
    print("**********TRAIN-SET***********")
    print("MAE: {:.3f}".format(mean_absolute_error(
        true_value_train, pred_value_train)))
    print("MSE: {:.3f}".format(
        mean_squared_error(true_value_train, pred_value_train)))
    print("RMSE: {:.3f}".format(
        mean_squared_error(true_value_train, pred_value_train, squared=True)))


metrics(true_value_train, pred_train)