In [1]:
# Importing libraries
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers

# Setting random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

dataset = pd.read_csv('../input/an2dl-homework-2/Training.csv')
print(dataset.shape)
dataset.head()

# Printing dataset info
dataset.info()

In [2]:
def inspect_dataframe(df, columns):
    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(df[col])
        axs[i].set_title(col)
    plt.show()
    
# Plotting time series
inspect_dataframe(dataset, dataset.columns)

In [3]:
window = 400
stride = 4
validation_size = 13700 #(20%)
target_labels = dataset.columns
telescope = 864 # Direct method

In [4]:
X_train_raw = dataset.iloc[:-validation_size]
X_validation_raw = dataset.iloc[-validation_size:]

print(X_train_raw.shape, X_validation_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)
X_validation_raw = (X_validation_raw-X_min)/(X_max-X_min)

In [5]:
def printplots():
    for i in range(X_train_raw.shape[1]):
        plt.figure(figsize=(17,5))
        plt.plot(X_train_raw[(target_labels[i])], label='Train')
        plt.plot(X_validation_raw[(target_labels[i])], label='Test')
        plt.title((target_labels[i]))
        plt.legend()
        plt.show()
        
printplots()

In [6]:
future = dataset[-window:]
future = (future-X_min)/(X_max-X_min)
future = np.expand_dims(future, axis=0)
future.shape

In [7]:
def build_sequences(df, target_labels, window=window, stride=stride, telescope=telescope):
    
    assert window % stride == 0
    dataset = []
    temp_df = df.copy().values
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window
    labels_in = []
    labels_out = []

    if(padding_len != 0):
        # Compute padding length
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float32')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float32')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0

    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window]) # Encoder input for training [Ex: 0 1 2 ... 199]
        
        y_dec_in = temp_label[idx+window-1:idx+window+telescope-1] # Decoder input for training [Ex: 199 200 201 ...]
        labels_in.append(y_dec_in)
        y_dec_out = temp_label[idx+window:idx+window+telescope] # Decoder output for training [Ex: 200 201 202 ...]
        labels_out.append(y_dec_out)

    dataset = np.array(dataset)
    labels_in = np.array(labels_in)
    labels_out = np.array(labels_out)
    
    return dataset, labels_out, labels_in

In [8]:
X_train, y_train_dec_out, y_train_dec_in = build_sequences(X_train_raw, target_labels, window, stride, telescope)
X_val, y_val_dec_out, y_val_dec_in = build_sequences(X_validation_raw, target_labels, window, stride, telescope)

In [9]:
print(X_train.shape)
print(y_train_dec_out.shape)
print(y_train_dec_in.shape)
print(X_val.shape)
print(y_val_dec_out.shape)
print(y_val_dec_in.shape)

In [10]:
input_shape = X_train.shape[1:]
output_shape = y_train_dec_out.shape[1:]
batch_size = 32
epochs = 200

In [11]:
def SEQ2SEQ_3_Models(input_shape, output_shape, memelem):
   
    # Encoder 
    encoder_input = tfkl.Input(shape=input_shape, name='encoder_in')
    encoder_output, enc_h, enc_c = tfkl.LSTM(memelem, return_state=True, name='enc_lstm')(encoder_input)
    encoder_state = [enc_h, enc_c]
     
    # Decoder - Layers written in a different way because we need to call them later
    decoder_input = tfkl.Input(shape=output_shape, name='decoder_in')
    decoder_lstm = tfkl.LSTM(memelem, return_sequences=True, return_state=True, name='decoder_lstm') 
    decoder_output, _, _ = decoder_lstm(decoder_input, initial_state=encoder_state)
    
    # Dense Output - Layers written in a different way because we need to call them later
    dense_out = tfkl.Dense(7, activation='relu', name='dense_out')
    out = dense_out(decoder_output)
    
    # Model (Not for forecasting)
    general_model = tfk.Model([encoder_input, decoder_input], out)
    
    # ----------------------------------------------------------------------------------
    # One step-at-a-time prediction (Like real time translation - last exercise session)
    # ----------------------------------------------------------------------------------
    
    # Encoder
    encoder_only = tfk.Model(encoder_input, encoder_state)
    
    # Decoder state
    dec_h = tfkl.Input(shape=memelem)
    dec_c = tfkl.Input(shape=memelem)
    decoder_state_in = [dec_h, dec_c]
    
    # New version of the decoder - Takes in input previous prediction and previous state
    decoder_input_single = tf.keras.Input(shape=[1, 7]) 
    decoder_output, h, c = decoder_lstm(decoder_input_single, initial_state=decoder_state_in)
    
    decoder_state = [h, c]
    decoder_output = dense_out(decoder_output)
    decoder_only = tfk.Model([decoder_input_single] + decoder_state_in, [decoder_output] + decoder_state) # Takes in input previous prediction and previous state
    
    return general_model, encoder_only, decoder_only

In [12]:
general_model, encoder_for_forecasting, decoder_for_forecasting = SEQ2SEQ_3_Models(input_shape, output_shape, 256)
general_model.summary()

In [13]:
encoder_for_forecasting.summary()

In [14]:
decoder_for_forecasting.summary()

In [15]:
general_model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam(5e-4), metrics=[tfk.metrics.RootMeanSquaredError()])

In [16]:
# Training
history = general_model.fit(
    x = [X_train, y_train_dec_in],
    y = y_train_dec_out,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = ([X_val,y_val_dec_in], y_val_dec_out),
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
                tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)]
).history

In [17]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Root Mean Squared Error (Loss)')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(17,4))
plt.plot(history['root_mean_squared_error'], label='Training RMSE', alpha=.8, color='#ff7f0e')
plt.plot(history['val_root_mean_squared_error'], label='Validation RMSE', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Root Mean Squared Error')
plt.legend()
plt.grid(alpha=.3)
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [18]:
general_model.save('./general')
encoder_for_forecasting.save('./encoder')
decoder_for_forecasting.save('./decoder')

In [19]:
# Predict the test set 
predictions = general_model.predict([X_val, y_val_dec_out])
print(predictions.shape)

mean_squared_error = tfk.metrics.mse(y_val_dec_out.flatten(),predictions.flatten())
mean_absolute_error = tfk.metrics.mae(y_val_dec_out.flatten(),predictions.flatten())
mean_squared_error, mean_absolute_error

In [20]:
def inspect_multivariate_prediction(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[idx,:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()
    
    
inspect_multivariate_prediction(X_val, y_val_dec_out[:,:864,:], predictions, target_labels, telescope)

In [21]:
temp_telescope = 864 # To not work always with 864 samples

In [22]:
def inspect_multivariate_prediction2(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [23]:
for seq in range (100, 1100, 100): # To plot more than one prediction

    states_value = encoder_for_forecasting.predict(np.expand_dims(X_val[seq,:,:], axis=0))

    curr_value = (X_val[seq,window-1,:])*np.ones((1, X_val.shape[2]), dtype='float32')
    curr_value = np.expand_dims(curr_value, axis=0)
    predictions = []
    predictions = tf.convert_to_tensor(predictions)

    for _ in range(temp_telescope):
        preds, h_, c_ = decoder_for_forecasting.predict([curr_value] + states_value)
        curr_value = preds
        states_value = [h_, c_]

        if(len(predictions)==0):
            predictions = preds[0]
        else:
            predictions = np.concatenate((predictions,preds[0]),axis=0)

    inspect_multivariate_prediction2(X_val, y_val_dec_out[:,:temp_telescope,:], predictions, target_labels, temp_telescope, seq)