In [5]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import matplotlib.patches as patches
import matplotlib.gridspec as gridspec

import warnings                                  # `do not disturbe` mode
warnings.filterwarnings('ignore')
sns.set(style='whitegrid', palette='deep', font_scale=1.2)

In [10]:
# Tensorflow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input, BatchNormalization, \
    multiply, concatenate, Flatten, Activation, dot
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
# this is for making Graphviz work (plot_model needs GraphViz)
from IPython.display import HTML
from keras.utils.vis_utils import plot_model, model_to_dot
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
os.sys.path.append('..')
from utils.model_utils import serialize_model
from utils.model_utils import split_sequences_multivariate

In [7]:
# Block to load file depending if you are in Colab or in Jupyter
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    !wget --no-check-certificate \
        https://www.dropbox.com/s/qbn9y5ooqxipxki/single_feature.csv?dl=0 \
        -O /tmp/Features.csv
        
    data = pd.read_csv('/tmp/Features.csv', parse_dates=['Timestamp'], index_col='Timestamp')
else:
    path_of_file = '../Data/single_feature.csv'
    data = pd.read_csv(path_of_file, parse_dates=['Timestamp'], index_col='Timestamp')

In [8]:
# The data is higly irregular so let's resample it to 10 min and take the mean
resampled = data.resample('30Min').mean()
resampled = resampled.fillna(resampled.bfill())
# Now let's take only a slice of it

init_date = '2019-03'
train_end_date = '2019-10-25'
end_date = '2019-04-20'
#Separate Train and test
train_data = resampled[init_date:end_date]

In [9]:
from sklearn.preprocessing import MinMaxScaler
train_ratio = 0.8
train_len = int(train_ratio*len(train_data))
scaler = MinMaxScaler()
normalized_train = scaler.fit_transform(train_data)
print('Lenght of Data {}'.format(len(normalized_train)))
print('Lenght of Train {}'.format(train_len))

Lenght of Data 2448
Lenght of Train 1958


In [None]:
n_timesteps = [32, 64, 128, 256]
n_features = normalized_train.shape[-1]
history_list = []
model_list = []
batches = [16,32,64,128]

train_predictions_list = []
test_predictions_list = []
MAE_train = []
MAE_test = []
mae_overall_train_list = []
mae_overall_test_list = []

for timestep in n_timesteps:
    for batch in batches:

    train_X_lstm, train_y_lstm = split_sequences_multivariate(normalized_train[:train_len], n_steps=timestep)
    test_X_lstm, test_y_lstm = split_sequences_multivariate(normalized_train[train_len:], n_steps=timestep)
    #print('Training Shapes. X{}, y{} '.format(train_X_lstm.shape, train_y_lstm.shape))
    #print('Testing Shapes. X{}, y{} '.format(test_X_lstm.shape, test_y_lstm.shape))

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(units=128, input_shape=(n_timesteps, n_features),
                                        return_sequences=True, dropout=0.2))
    model.add(tf.keras.layers.LSTM(units=128, input_shape=(n_timesteps, n_features),
                                        return_sequences=False, dropout=0.2))
    #model_LSTM.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(n_features))
    optimizer = tf.keras.optimizers.Adam(lr=0.01)
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])
    
    model_list.append(model)
    early_stop = EarlyStopping(monitor='val_loss', patience=3)

    history = model.fit(train_X_lstm, train_y_lstm,
                             validation_split=0.2, 
                             epochs=epochs, verbose=1,
                             callbacks=[early_stop], 
                             batch_size=batch)

    history_df.append(pd.DataFrame(history.history, columns=['mae', 'val_mae']))