# Deep Learning Models (LSTM & GRU)

### Libraries

In [474]:
#Tensorflow
import tensorflow as tf
import keras
from keras.layers import LSTM, Dense, InputLayer, GRU
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.losses import MeanAbsoluteError, MeanSquaredError
from keras.metrics import RootMeanSquaredError
from keras.optimizers import Adam, SGD

#Data Manipulation
import numpy as np
import pandas as pd

#Functionalities
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

#Plots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline

keras.utils.set_random_seed(812)
tf.config.experimental.enable_op_determinism()

parameters = {
    "dataset":{
        "path": "../data/data_casal_montserratina/LaMonserratina_CLEAN.csv",
        "trainingSize": .70,
        "validationSize": .10,
        "testSize": .20
    },
    "validation": {
        "n_splits" : 10
    },
    "lstm":{
        "epochs": 50,
        "batch_size": 64
    }
}

In [475]:
print("Keras backend:", keras.backend.backend())
print("TensorFlow version:", tf.__version__)
print("GPUs Available:", tf.config.list_physical_devices('GPU'))

Keras backend: tensorflow
TensorFlow version: 2.16.1
GPUs Available: []


In [476]:
#Use GPU if possible
if len(tf.config.list_physical_devices('GPU')) != 0:
    tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices('GPU')[0], True)

### Read and Split Dataset

In [477]:
#Read df
df = pd.read_csv(parameters['dataset']['path'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12046 entries, 0 to 12045
Data columns (total 41 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Date                           12046 non-null  object 
 1   temperature_2m_C               12046 non-null  float64
 2   relative_humidity_2m_%         12046 non-null  float64
 3   dew_point_2m_C                 12046 non-null  float64
 4   apparent_temperature_C         12046 non-null  float64
 5   pressure_msl_hPa               12046 non-null  float64
 6   surface_pressure_hPa           12046 non-null  float64
 7   cloud_cover_%                  12046 non-null  float64
 8   cloud_cover_high_%             12046 non-null  float64
 9   et0_fao_evapotranspiration_mm  12046 non-null  float64
 10  vapour_pressure_deficit_kPa    12046 non-null  float64
 11  wind_speed_10m_km/h            12046 non-null  float64
 12  wind_direction_10m_º           12046 non-null 

In [478]:
#Change Data Types
def parseData(df):
    for col in df:
        df[col] = df[col].astype(float)
    return df

#Normalize Data
def normalizeData(data):
    df = data.copy()
    scaler = MinMaxScaler()
    return scaler.fit_transform(df)

#Add N Lags
def addNLags(df, lags_list):
    lag_df = df.copy()
    for lag in lags_list:
        lag_df[f'lag_{lag}'] = lag_df["Demanda_kWh"].shift(lag * 24)

    return lag_df.fillna(0)

In [479]:
#Drop Columns
dates = df["Date"]
dates = pd.to_datetime(dates)
df = df.drop("Date", axis=1)

In [480]:
#Convert to float
df = parseData(df)
df = df.dropna()
#df["Demanda_kWh"] = np.log1p(df["Demanda_kWh"].values)
df = addNLags(df, [1,2,3,5,7])

y = df["Demanda_kWh"]
X = df.drop(columns=["Demanda_kWh", "pressure_msl_hPa", "dew_point_2m_C", "surface_pressure_hPa",
    "cloud_cover_%", "cloud_cover_high_%", "et0_fao_evapotranspiration_mm", "vapour_pressure_deficit_kPa",
    "wind_speed_10m_km/h", "wind_direction_10m_º", "wind_gusts_10m", "terrestrial_radiation_W/m2",
    "DayOfYear", "DayOfWeek", "temp_cluster", "Season", "Month", "relative_humidity_2m_%", "Month_sin", "is_day", "DayOfYear_sin", "sunshine_duration_sec",
    "direct_radiation_W/m2", "global_tilted_irradiance_W/m2", "DayOfYear_cos", "DayOfWeek_cos", "apparent_temperature_C", "Temp_interna_Modulo_C"], axis=1)
col_names = list(X.columns)

#Convert df to np

X = X.to_numpy()
y = y.to_numpy()

trainingSize = int(parameters["dataset"]["trainingSize"] * df.shape[0])
validationSize = trainingSize + int(parameters["dataset"]["validationSize"] * df.shape[0])

X_train, y_train = X[:trainingSize, :].copy(), y[:trainingSize].copy()
X_val, y_val = X[trainingSize:validationSize, :].copy(), y[trainingSize:validationSize].copy()
X_test, y_test = X[validationSize:, :].copy(), y[validationSize:].copy()

X_train_dates = dates.loc[:trainingSize].copy()
X_val_dates = dates.loc[trainingSize:validationSize].copy()
X_test_dates = dates.loc[validationSize:].copy()
print(f"Total Lags: {len(df)}")
print(f"Training Size:{len(X_train)}")
print(f"Validation Size:{len(X_val)}")
print(f"Test Size: {len(X_test)}")
df.columns

#X_train = normalizeData(X_train)
#X_val = normalizeData(X_val)
#X_test = normalizeData(X_test)

#Reshape Datasets
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
y_train = y_train.reshape((y_train.shape[0], 1))
y_val = y_val.reshape((y_val.shape[0], 1))
y_test = y_test.reshape((y_test.shape[0], 1))

print(f"Training ==> X Shape: {X_train.shape}, y Shape: {y_train.shape}")
print(f"Validation ==> X Shape: {X_val.shape}, y Shape: {y_val.shape}")
print(f"Test ==> X Shape: {X_test.shape}, y Shape: {y_test.shape}")


Total Lags: 12046
Training Size:8432
Validation Size:1204
Test Size: 2410
Training ==> X Shape: (8432, 17, 1), y Shape: (8432, 1)
Validation ==> X Shape: (1204, 17, 1), y Shape: (1204, 1)
Test ==> X Shape: (2410, 17, 1), y Shape: (2410, 1)


### <b>Plots</b>

#### Predictions & Real Values

In [481]:
def plotPredictions(dates, y_pred, y_test, filename=None):
    fig = go.Figure()
    trace1 = go.Scatter(x=dates, y=y_test, name="test", mode="lines")
    trace2 = go.Scatter(x=dates, y=y_pred, name="predictions", mode="lines")
    fig.add_trace(trace1)
    fig.add_trace(trace2)
    fig.update_layout(
        title="Real value vs Predicted in Test Data",
        xaxis_title="Date Time",
        yaxis_title="Demand",
        width=1020,
        height=450,
        margin = dict(l=70, r=20, t=55, b=20),
        legend = dict(
            orientation = "h",
            yanchor="top",
            y=1.1,
            xanchor="left",
            x=0.76
        )
    )
    if filename is not None:
        fig.write_image(f"../results/{filename}.png", format="png", scale=2, width=1000)
    fig.show()

#### Plot Losses 

In [482]:
def plotLosses(train_loss, validation_loss):
    epochs_list = np.arange(0, 50)
    fig = go.Figure()
    trace1 = go.Scatter(x=epochs_list, y=train_loss, name="Training Loss", mode="lines")
    trace2 = go.Scatter(x=epochs_list, y=validation_loss, name="Validation Loss", mode="lines")
    fig.add_trace(trace1)
    fig.add_trace(trace2)
    fig.update_layout(
        title = "Training and Validation Losses",
        xaxis_title = "Epoch",
        yaxis_title = "MSE Loss Value",
        width = 800,
        height = 400,
        margin = dict(l=40, r=30, t=50, b=30),
        legend = dict(
            orientation = "h",
            yanchor = "top",
            y = .98,
            xanchor = "right",
            x = .98
        ) 
    )
    fig.show()

#### Plot Metrics

In [483]:
def plotMetrics(train_rmse, validation_rmse, train_mae, validation_mae):
    epochs_list = np.arange(0, 50)
    fig = go.Figure()
    trace1 = go.Scatter(x=epochs_list, y=train_rmse, name="Training RMSE", mode="lines")
    trace2 = go.Scatter(x=epochs_list, y=validation_rmse, name="Validation RMSE", mode="lines")
    trace3 = go.Scatter(x=epochs_list, y=train_mae, name="Training MAE", mode="lines")
    trace4 = go.Scatter(x=epochs_list, y=validation_mae, name="Validation MAE", mode="lines")

    fig.add_trace(trace1)
    fig.add_trace(trace2)
    fig.add_trace(trace3)
    fig.add_trace(trace4)
    fig.update_layout(
        title = "Metrics in Training/Validation",
        xaxis_title = "Epoch",
        yaxis_title = "RMSE / MAE",
        width = 800,
        height = 400,
        margin = dict(l=40, r=30, t=50, b=30),
        legend = dict(
            orientation = "h",
            yanchor = "top",
            y = .98,
            xanchor = "right",
            x = .98
        ) 
    )
    fig.show()

### Find Best Models

In [484]:
def selectModels(model_params, data, parameters):
    X_train, y_train = data["train"]["X"], data["train"]["y"]
    X_val, y_val = data["validation"]["X"], data["validation"]["y"]
    X_test, y_test = data["test"]["X"], data["test"]["y"]
    results = pd.DataFrame(columns=["Model", "nUnits", "nLayers", "dropout", "Loss_All", "MAE", "RMSE"])
    for param in tqdm(ParameterGrid(model_params), total=len(list(ParameterGrid(model_params)))):
        units = param.get('nUnits')
        layers = param.get("nLayers")
        dropout = param.get("dropout")
        #Build LSTM Model
        lstm_model = Sequential()
        for _ in range(layers - 1):
            lstm_model.add(LSTM(units=units, return_sequences=True, dropout=dropout))
        lstm_model.add(LSTM(units=units, dropout=dropout))
        lstm_model.add(Dense(1))

        #Compile LSTM Model
        lstm_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[MeanAbsoluteError(), RootMeanSquaredError()])
        lstm_model.fit(X_train, y_train, epochs = 40, batch_size = parameters["lstm"]["batch_size"], validation_data=(X_val, y_val), verbose=True)

        #Eval LSTM Model
        lstm_loss, lstm_mae, lstm_rmse = lstm_model.evaluate(
            x = X_test,
            y = y_test,
            batch_size = parameters["lstm"]["batch_size"],
            verbose=False
        )
        results.loc[len(results.index)] = ['LSTM', units, layers, dropout, lstm_loss, lstm_mae, lstm_rmse] 

        #Build GRU Model
        gru_model = Sequential()
        for _ in range(layers - 1):
            gru_model.add(GRU(units=units, return_sequences=True, dropout=dropout))
        gru_model.add(GRU(units=units, dropout=dropout))
        gru_model.add(Dense(1))

        #Compile GRU Model
        gru_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[MeanAbsoluteError(), RootMeanSquaredError()])
        gru_model.fit(X_train, y_train, epochs = 40, batch_size = parameters["lstm"]["batch_size"], validation_data=(X_val, y_val), verbose=True)

        #Eval GRU Model
        gru_loss, gru_mae, gru_rmse = gru_model.evaluate(
            x = X_test,
            y = y_test,
            batch_size = parameters["lstm"]["batch_size"],
            verbose=False
        )
        results.loc[len(results.index)] = ['GRU', units, layers, dropout, gru_loss, gru_mae, gru_rmse] 

    return results.sort_values(by=["RMSE", "MAE"])

data = {
    "train" : { "X" : X_train, "y" : y_train },
    "validation": { "X" : X_val, "y" : y_val },
    "test": { "X" : X_test, "y": y_test}
}

models_params = {
    'nUnits': [64, 128],     
    'nLayers': [1, 2],        
    'dropout': [0.0, 0.1]
}

#results = selectModels(models_params, data, parameters)
#results

### <b>Train One Model</b>

#### Select Model

In [485]:
#Choose Model
def get_LSTM_Model(num_features, n_units, n_layers, dropout):
    model = Sequential()
    model.add(InputLayer((num_features, 1)))
    for _ in range(n_layers - 1):
        model.add(LSTM(units=n_units, activation='tanh',kernel_initializer='glorot_normal', return_sequences=True, dropout=dropout))
    model.add(LSTM(n_units, activation='relu', kernel_initializer='glorot_normal'))
    model.add(Dense(1, activation='linear'))
    return model

def get_GRU_Model(num_features, n_units, n_layers, dropout):
    model = Sequential()
    model.add(InputLayer((num_features, 1)))
    for _ in range(n_layers - 1):
        model.add(GRU(units=n_units, kernel_initializer='glorot_normal', return_sequences=True, dropout=dropout))
    model.add(GRU(n_units, activation='relu', kernel_initializer='glorot_normal'))
    model.add(Dense(1, activation='linear'))
    return model

model = get_LSTM_Model(X_train.shape[1], 128, 2, 0)
model.summary()

#### Compile Model

In [486]:
#Compile the Model
model.compile(
    loss = "mse", 
    optimizer=Adam(learning_rate=0.0001), 
    metrics=[
        MeanAbsoluteError(),
        RootMeanSquaredError()
    ]
)

#### Train Model

In [487]:
train_history = model.fit(
    X_train, 
    y_train, 
    epochs=100,
    batch_size = parameters["lstm"]["batch_size"],
    validation_data = (X_val, y_val),
    verbose=1
)

Epoch 1/100
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 30ms/step - loss: 45.1327 - mean_absolute_error: 4.3391 - root_mean_squared_error: 6.6739 - val_loss: 47.5257 - val_mean_absolute_error: 3.9289 - val_root_mean_squared_error: 6.8939
Epoch 2/100
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - loss: 22.8695 - mean_absolute_error: 2.8648 - root_mean_squared_error: 4.7786 - val_loss: 41.2715 - val_mean_absolute_error: 3.5669 - val_root_mean_squared_error: 6.4243
Epoch 3/100
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 21.2963 - mean_absolute_error: 2.6976 - root_mean_squared_error: 4.6115 - val_loss: 40.3968 - val_mean_absolute_error: 3.5523 - val_root_mean_squared_error: 6.3558
Epoch 4/100
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 21.0576 - mean_absolute_error: 2.6674 - root_mean_squared_error: 4.5855 - val_loss: 39.9005 - val_mean_absolute_error: 3.54

KeyboardInterrupt: 

#### Evaluate Model 

In [None]:
y_pred = model.predict(
    x = X_test,
    batch_size = parameters["lstm"]["batch_size"]
).flatten()
#y_pred = np.expm1(y_pred)
#y_test = np.expm1(y_test.flatten())
y_test = y_test.flatten()


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step


In [None]:
mae = mean_absolute_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
print(f"MAE: {mae}, RMSE: {rmse}")

MAE: 1.6309332636428648, RMSE: 3.168885440024275


In [None]:
plotPredictions(X_test_dates, y_pred, y_test)

In [None]:
plotPredictions(X_test_dates[:96*7], y_pred.flatten()[:96*7], y_test.flatten()[:96*7])

In [None]:
plotLosses(train_history.history["loss"], train_history.history["val_loss"])

In [None]:
plotMetrics(train_history.history["root_mean_squared_error"], train_history.history["val_root_mean_squared_error"], train_history.history["mean_absolute_error"], train_history.history["val_mean_absolute_error"])

In [None]:
def scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * np.exp(-0.1)
scheduler_callback = LearningRateScheduler(scheduler)

model = get_GRU_Model(X_train.shape[1], 64, 1, 0.0)


model.compile(
    loss=MeanSquaredError(), 
    optimizer=Adam(learning_rate=0.0001), 
    metrics=[
        MeanAbsoluteError(),
        RootMeanSquaredError()
    ]
)
train_history = model.fit(
    X_train, 
    y_train, 
    validation_data=(X_val, y_val), 
    epochs=parameters["lstm"]["epochs"],
    batch_size = parameters["lstm"]["batch_size"],
    callbacks=[scheduler_callback]
)
predictions = model.predict(
    x = X_test,
    batch_size = parameters["lstm"]["batch_size"]
)
test_history = model.evaluate(
    x = X_test,
    y = y_test,
    batch_size = parameters["lstm"]["batch_size"],
)
plotPredictions(X_test_dates, predictions.flatten(), y_test.flatten())
print("MAE: ", mean_absolute_error(y_true=y_test.flatten(), y_pred=predictions.flatten()), ", RMSE: ", root_mean_squared_error(y_true=y_test.flatten(), y_pred=predictions.flatten()))

Epoch 1/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 4039.1772 - mean_absolute_error: 58.0082 - root_mean_squared_error: 62.2400 - val_loss: 56.4131 - val_mean_absolute_error: 4.8455 - val_root_mean_squared_error: 7.5109 - learning_rate: 1.0000e-04
Epoch 2/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 26.5444 - mean_absolute_error: 3.2144 - root_mean_squared_error: 5.1446 - val_loss: 45.7746 - val_mean_absolute_error: 4.1394 - val_root_mean_squared_error: 6.7657 - learning_rate: 1.0000e-04
Epoch 3/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 23.3506 - mean_absolute_error: 2.9091 - root_mean_squared_error: 4.8296 - val_loss: 44.4558 - val_mean_absolute_error: 4.0399 - val_root_mean_squared_error: 6.6675 - learning_rate: 1.0000e-04
Epoch 4/50
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 22.8594 - mean_absolute_error: 2.8454 - 

ValueError: Cannot take the length of shape with unknown rank.

In [None]:
plotMetrics(train_history.history["root_mean_squared_error"], train_history.history["val_root_mean_squared_error"], train_history.history["mean_absolute_error"], train_history.history["val_mean_absolute_error"])

In [None]:
permutation_feature_importance(model, X_test, y_test, X_columns)