### Imports

In [5]:
#imports 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
from datetime import datetime
 
from sklearn.model_selection import train_test_split # to split the data into two parts
from sklearn.cross_validation import KFold # use for cross validation
from sklearn.preprocessing import StandardScaler # for normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline # pipeline making
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics # for the check the error and accuracy of the model
from sklearn.metrics import mean_squared_error,r2_score

## for Deep-learing:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import TimeDistributed
from keras.utils import to_categorical
from keras.optimizers import SGD 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Dropout

import warnings
warnings.filterwarnings('ignore')

### Useful Keras tools

#### TimeDistributed Wraper Layer
This wrapper applies a layer to every temporal slice of an input.
The input should be at least 3D, and the dimension of index one will be considered to be the temporal dimension.
Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions. The batch input shape of the layer is then (32, 10, 16), and the input_shape, not including the samples dimension, is  (10, 16).

In [6]:
# as the first layer in a model
model = Sequential()
model.add(TimeDistributed(Dense(8), input_shape=(10, 16)))
# now model.output_shape == (None, 10, 8)

#### TimeSeriesGenerator
Utility class for generating batches of temporal data.
This class takes in a sequence of data-points gathered at equal intervals, along with time series parameters such as stride, length of history, etc., to produce batches for training/validation.

In [8]:
from keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np

data = np.array([[i] for i in range(50)])
targets = np.array([[i] for i in range(50)])

data_gen = TimeseriesGenerator(data, targets,
                               length=10, sampling_rate=2,
                               batch_size=2)
assert len(data_gen) == 20

batch_0 = data_gen[0]
x, y = batch_0
assert np.array_equal(x,
                      np.array([[[0], [2], [4], [6], [8]],
                                [[1], [3], [5], [7], [9]]]))
assert np.array_equal(y,
                      np.array([[10], [11]]))

#### Recurrent Layers
https://keras.io/layers/recurrent/

#### Using TensorFlow optimisers (Irwan optimiser)
keras.optimizers.TFOptimizer(optimizer)

#### Other things that could be used: 
https://github.com/keras-team/keras/issues/5348

### TimeDistributed usage for many-to-many sequence prediction
source: https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/

In [None]:
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import LSTM

# prepare sequence 
# you need to have a 3D-sequence with (number of samples, number of timesteps, number of features)
length = 5
seq = array([i/float(length) for i in range(length)])
X = seq.reshape(1, length, 1)
y = seq.reshape(1, length, 1)

# define LSTM configuration
n_neurons = length
n_batch = 1 
n_epoch = 1000

# create LSTM
model = Sequential()
model.add(LSTM(n_neurons, input_shape=(length, 1), return_sequences=True))
model.add(TimeDistributed(Dense(1))) # 1 is the number of features you want to return. 
#TimeDistributed applies a layer (can be dense, LTSM, conv...) to every output timestep. 
#To be used for many-to-many sequence prediction and for one-to-many sequence prediction. 
#see post here: https://github.com/keras-team/keras/issues/1029

model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())
# train LSTM
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=2) 
#if you have multiple time-series, you set the batch_size to the number of your time-series. 

# evaluate
result = model.predict(X, batch_size=n_batch, verbose=0)
for value in result[0,:,0]:
	print('%.1f' % value)
    
#source: https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/

Look at this post as well: https://groups.google.com/forum/#!topic/keras-users/9GsDwkSdqBg

### Data Preparation steps: 
0. use diagfeat_EADL exported csv file. 
1. do some padding so that you have sequences of same length
1.bis: do not forget normaization!
2. Have a clean training 3D-array of shape (number_of_patients,number_of_visits,number_of_features)
3. Have a clean target 3D-array of shape (number_of_patients,timesteps_to_predict,1)

### Models to try
1. LSTM/GRUs on subset of features
2. seq2seq model (multivariate with only the target)
3. LSTM-MFCN: see github repo and paper

### Functions to defined

In [13]:
def load_data(): 
    '''transform the data in a 3D-array shape with padding'''
    return

In [14]:
def train_test_split(): 
    return

In [15]:
def RNN_model(): 
    return