________________________________________________________________________________________________________________________
# Experiment In comparing the predictions of an LSTM model with a RNN with attention layer for predicting temperature sequences
## - Import Dependencies
## - Define Dataset Class
## - Define LSTM Class
## - Define RNN with Attention Class
## - Train and make predictions for both models in a main function
## - Save each model's predicitons and actuals to CSV files for use in analysis using Tableau
________________________________________________________________________________________________________________________

________________________________________________________________________________________________________________________
### References:
________________________________________________________________________________________________________________________
- #### Video: LSTM Time Series Forecasting Tutorial in Python
- #### Author: Greg Hogg
- #### Link: https://www.youtube.com/watch?v=c0k-YLQGKjY
________________________________________________________________________________________________________________________
- #### Video: Neural Transformer Encoders for Timeseries Data in Keras (10.5)
- #### Author: Jeff Heaton
- #### Link: https://www.youtube.com/watch?v=SX67Mni0Or4


________________________________________________________________________________________________________________________

________________________________________________________________________________________________________________________
#### Dependencies
________________________________________________________________________________________________________________________

In [1]:
import tensorflow as tf  # NN Library

import os  # For working with dataset
import pandas as pd  # Data management
import numpy as np  # Lin Alg

from keras.models import Sequential  # Model initialization and format
from keras.layers import *  # For setting up architecture
from keras.callbacks import ModelCheckpoint  # For saving model that does best on val set
from keras.losses import MeanSquaredError  # MSE works well for our loss function given the problem
from keras.metrics import RootMeanSquaredError  # For extra evaluation
from keras.optimizers import Adam  # Optimizer
from keras.models import load_model  # To load saved models
from tensorflow import keras
from tensorflow.keras import layers

________________________________________________________________________________________________________________________
#### Dataset Class
________________________________________________________________________________________________________________________

In [2]:
# Defining Dataset class
class Dataset():
    # Define attributes for the model
    def __init__(self, window_size=5):
        # Window size for the input sequence
        self.window_size = window_size

        # Load data and keep only the temperature column
        self.df = self.load_data()
        self.df = self.df["T (degC)"]

        # Convert DataFrame to numpy array
        self.df_as_np = self.df.to_numpy()

        # Transform data to format that works for supervised learning
        self.X, self.y = self.to_X_y()

        # Split data into training, validation and testing datasets
        self.X_train, self.y_train, self.X_val, self.y_val, self.X_test, self.y_test = self.train_test_split()
        
    def load_data(self):
        # Download and extract dataset
        zip_path = tf.keras.utils.get_file(
                        origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
                        fname='jena_climate_2009_2016.csv.zip',
                        extract=True)

        # Locate the CSV file
        csv_path, _ = os.path.splitext(zip_path)

        # Load data into a DataFrame and set the index to datetime
        df = pd.read_csv(csv_path)
        df.index = pd.to_datetime(df['Date Time'], format='%d.%m.%Y %H:%M:%S')

        return df
    
    def to_X_y(self):
        # Function to transform the sequence data into a format suitable for supervised learning
        X, y = [], []
        for i in range(len(self.df_as_np) - self.window_size):
            # Create sequences of the given window size
            row = [[a] for a in self.df_as_np[i:i + self.window_size]]

            # Append the sequence and the corresponding label
            X.append(row)
            label = self.df_as_np[i + self.window_size]
            y.append(label)
        
        # Transform data into numpy arrays
        return np.array(X), np.array(y).astype('float32')
    
    def train_test_split(self):
        # Split data into training, validation and testing datasets
        # First 400,000 samples are used for training
        X_train, y_train = self.X[:400000], self.y[:400000]

        # Next 10,000 samples are used for validation
        X_val, y_val = self.X[400000:410000], self.y[400000:410000]

        # The remaining samples are used for testing
        X_test, y_test = self.X[410000:], self.y[410000:]

        return X_train, y_train, X_val, y_val, X_test, y_test

________________________________________________________________________________________________________________________
#### LSTM Class
________________________________________________________________________________________________________________________

In [3]:
# Defining LSTM model for time series prediction
class LSTM_for_timeseries():
    def __init__(self):
        # Initializing model with Sequential API, which stacks layers sequentially
        self.model = Sequential()

        # Input layer with input shape of 5 time steps with 1 feature
        self.model.add(InputLayer((5,1)))
        
        # Adding LSTM layer with 64 units 
        self.model.add(LSTM(64))

        # Dense layer with 8 units and a Rectified Linear Unit activation function
        self.model.add(Dense(8, 'relu'))

        # Output layer with single unit for regression task
        self.model.add(Dense(1, 'linear'))
        
        # Defining a callback for model checkpointing, it saves the model that performs best on validation data
        self.checkpoint = ModelCheckpoint('best_model_LSTM', save_best_only = True)

        # Variables to store the best model, the predictions it makes, and the results
        self.model_best = None
        self.predictions = None
        self.results = None

    # Function to train the model
    def train_model(self, X_train, y_train, X_val, y_val):
        # Compiling model with Mean Squared Error as loss function, Adam optimizer and Root Mean Squared Error as a metric
        self.model.compile(loss = MeanSquaredError(), optimizer = Adam(learning_rate = 0.01), metrics = [RootMeanSquaredError()])

        # Training the model for 10 epochs, validation data is provided to compute validation loss and metrics at the end of each epoch
        # Checkpoint callback is provided which will save the best model observed during training 
        self.model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 10, callbacks = [self.checkpoint])

    # Function to test the model
    def test_model(self, X_test, y_test):
        # Loading the best saved model
        self.model_best = load_model('best_model_LSTM')

        # Making predictions using the best model and flattening the predictions array
        self.predictions = self.model_best.predict(X_test).flatten()

        # Creating a dataframe with predictions and actual values
        self.results = pd.DataFrame(data = {"Predictions": self.predictions, 'Actuals': y_test})

    # Function to save the predictions and actual values to a csv file
    def save_results_to_csv(self):
        self.results.to_csv("Predictions_and_Actuals_for_test_set_LSTM_best_model.csv")


________________________________________________________________________________________________________________________
#### RNN with Attention Mechanism Class
________________________________________________________________________________________________________________________

In [4]:
# Defining a recurrent neural network (RNN) with attention mechanism for time series prediction
class RNN_with_Attention_for_timeseries():
    def __init__(self):
        # Build and initialize the model with predefined parameters
        self.model = self.build_model(
                    (5,1),                      # Input shape: 5 time steps with 1 feature
                    head_size=256,              # The dimensionality of the output space of the attention heads
                    num_heads=4,                # Number of attention heads
                    ff_dim=4,                   # Dimensionality of the output space of the feed-forward network
                    num_transformer_blocks=4,   # Number of transformer blocks
                    mlp_units=[128],            # Units in the dense layer
                    mlp_dropout=0.4,            # Dropout rate for the dense layer
                    dropout=0.25                # Dropout rate for attention and feed-forward network
                    )
                    
        # Defining a callback for model checkpointing, it saves the model that performs best on validation data
        self.checkpoint = ModelCheckpoint('best_model_RNN_with_Attention', save_best_only = True)

        # Variables to store the best model, the predictions it makes, and the results
        self.model_best = None
        self.predictions = None
        self.results = None
        
    # Function to implement transformer encoder
    def transformer_encoder(self, inputs, head_size, num_heads, ff_dim, dropout=0):
        # Layer Normalization and Multihead Attention
        x = layers.LayerNormalization(epsilon=1e-6)(inputs)
        x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
        x = layers.Dropout(dropout)(x)
        res = x + inputs

        # Feed-Forward network
        x = layers.LayerNormalization(epsilon=1e-6)(res)
        x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
        x = layers.Dropout(dropout)(x)
        x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
        return x + res                    

    # Function to build the model
    def build_model(self, input_shape,
                    head_size,
                    num_heads,
                    ff_dim,
                    num_transformer_blocks,
                    mlp_units,
                    dropout=0,
                    mlp_dropout=0):
        inputs = layers.Input(shape=input_shape)
        x = inputs
        # Add the specified number of transformer blocks
        for _ in range(num_transformer_blocks):
            x = self.transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

        # Apply Global Average Pooling
        x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
        
        # Dense layers
        for dim in mlp_units:
            x = layers.Dense(dim, activation="relu")(x)
            x = layers.Dropout(mlp_dropout)(x)
        
        # Output layer
        outputs = layers.Dense(1)(x)
                            
        return keras.Model(inputs, outputs)
                            
    # Function to train the model
    def train_model(self, X_train, y_train, X_val, y_val):
        # Compiling model with Mean Squared Error as loss function, Adam optimizer and Root Mean Squared Error as a metric
        self.model.compile(loss = MeanSquaredError(), optimizer = Adam(learning_rate = 0.01), metrics = [RootMeanSquaredError()])

        # Training the model for 10 epochs, validation data is provided to compute validation loss and metrics at the end of each epoch
        # Checkpoint callback is provided which will save the best model observed during training 
        self.model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 10, callbacks = [self.checkpoint])
    
    # Function to test the model
    def test_model(self, X_test, y_test):
        # Loading the best saved model
        self.model_best = load_model('best_model_RNN_with_Attention')

        # Making predictions using the best model and flattening the predictions array
        self.predictions = self.model_best.predict(X_test).flatten()

        # Creating a dataframe with predictions and actual values
        self.results = pd.DataFrame(data = {"Predictions": self.predictions, 'Actuals': y_test})

    # Function to save the predictions and actual values to a csv file
    def save_results_to_csv(self):
        self.results.to_csv("Predictions_and_Actuals_for_test_set_RNN_with_Attention_best_model.csv")

________________________________________________________________________________________________________________________
#### Class to Calculate RMSE score for either model
________________________________________________________________________________________________________________________

In [5]:
# Define a class for calculating and displaying Root Mean Square Error (RMSE)
class Calculate_RMSE():
    def __init__(self):
        # Initialize Mean Squared Error (MSE) loss function from TensorFlow
        self.mse_loss = tf.keras.losses.MeanSquaredError()

    # Method to calculate and display RMSE
    def display_RMSE(self, y_test, test_predictions, modelname):
        # Calculate MSE score
        mse_score = self.mse_loss(y_test, test_predictions)
        # Calculate RMSE by taking square root of MSE
        rmse_score = tf.sqrt(mse_score)
        # Print RMSE score
        print(f"Best {modelname}'s RMSE on test dataset: {rmse_score}")

________________________________________________________________________________________________________________________
#### Set up dataset, train models, use best model to make predictions, save predictions and actuals for apples-to-apples model comparison for predicting temperature sequences in a time-series dataset used for monitoring climate change
________________________________________________________________________________________________________________________

In [6]:
def main():
    # Create an instance of Dataset
    data = Dataset()
    
    # Transform the sequence data into a format suitable for supervised learning
    data.to_X_y()
    
    # Split the data into training, validation, and testing datasets
    data.train_test_split()
    
    # Create an instance of the LSTM_for_timeseries model
    LSTM_model = LSTM_for_timeseries()
    
    # Train the LSTM model
    LSTM_model.train_model(data.X_train, data.y_train, data.X_val, data.y_val)
    
    # Test the LSTM model on the testing dataset
    LSTM_model.test_model(data.X_test, data.y_test)
    
    # Save the results to a CSV file
    LSTM_model.save_results_to_csv()
    
    # Calculate RMSE for LSTM model
    LSTM_RMSE_scores = Calculate_RMSE()
    LSTM_RMSE_scores.display_RMSE(data.y_test, LSTM_model.predictions, "LSTM model")
    
    # Create an instance of the RNN_with_Attention_for_timeseries model
    RNN_with_Attention_model = RNN_with_Attention_for_timeseries()
    
    # Train the RNN_with_Attention model
    RNN_with_Attention_model.train_model(data.X_train, data.y_train, data.X_val, data.y_val)
    
    # Test the RNN_with_Attention model on the testing dataset
    RNN_with_Attention_model.test_model(data.X_test, data.y_test)
    
    # Save the results to a CSV file
    RNN_with_Attention_model.save_results_to_csv()
    
    # Calculate RMSE for RNN_with_Attention model
    RNN_with_Attention_model_RMSE_scores = Calculate_RMSE()
    RNN_with_Attention_model_RMSE_scores.display_RMSE(data.y_test, RNN_with_Attention_model.predictions, "RNN with Attention model")

In [7]:
main()

Epoch 1/10



INFO:tensorflow:Assets written to: best_model_LSTM\assets


INFO:tensorflow:Assets written to: best_model_LSTM\assets


Epoch 2/10



INFO:tensorflow:Assets written to: best_model_LSTM\assets


INFO:tensorflow:Assets written to: best_model_LSTM\assets


Epoch 3/10



INFO:tensorflow:Assets written to: best_model_LSTM\assets


INFO:tensorflow:Assets written to: best_model_LSTM\assets


Epoch 4/10



INFO:tensorflow:Assets written to: best_model_LSTM\assets


INFO:tensorflow:Assets written to: best_model_LSTM\assets


Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best LSTM model's RMSE on test dataset: 0.15906491875648499
Epoch 1/10



INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


Epoch 2/10



INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


Epoch 3/10



INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


Epoch 4/10



INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


Epoch 5/10



INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


INFO:tensorflow:Assets written to: best_model_RNN_with_Attention\assets


Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best RNN with Attention model's RMSE on test dataset: 0.19057580828666687
