<a href="https://colab.research.google.com/github/Nasim-Amani/Attention-based-seq2seq-/blob/main/BO_MHA_seq2seq_LSTM_24_Hour_Ahead_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [None]:

import keras_tuner
from keras_tuner import RandomSearch
from keras_tuner import HyperModel
from keras_tuner import HyperParameters

In [None]:
import pandas as pd
import numpy as np
import os
import random
import math
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import (mean_squared_error, mean_absolute_error,
                             r2_score, mean_absolute_percentage_error)
from keras.models import Model, Sequential
from keras.layers import (Input, TimeDistributed, LSTM, Concatenate,
                          Dense, Dropout, Attention)
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, SGD, RMSprop
import matplotlib.pyplot as plt

In [None]:
# Load the data
data = pd.read_csv("metro_all6.csv")
columns_to_keep = ['datetime', 'Load',
                   'Load_previous_hour','Load_same_hour_previous_day',
                   'day_part_encoded',
                   'ALLSKY_SFC_UV_INDEX',
                   'month_sin','month_cos','day_of_week_num_sin' , 'day_of_week_num_cos']
df = data[columns_to_keep]
df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index('datetime', inplace=True)

# Split the data into train, validation, and test sets
total_size = len(df)
train_size = int(total_size * 0.5)
valid_size = int(total_size * 0.2)
test_size = total_size - train_size - valid_size

train = df.iloc[:train_size]
valid = df.iloc[train_size:train_size+valid_size]
test = df.iloc[train_size+valid_size:]

# Create scaler and fit on the training data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train[['Load',
                   'Load_previous_hour','Load_same_hour_previous_day',
                   'day_part_encoded',
                   'ALLSKY_SFC_UV_INDEX',
                   'month_sin','month_cos','day_of_week_num_sin' , 'day_of_week_num_cos']])

# Apply the same scaler to the validation and test sets
valid_scaled = scaler.transform(valid[['Load',
                   'Load_previous_hour','Load_same_hour_previous_day',
                   'day_part_encoded',
                   'ALLSKY_SFC_UV_INDEX',
                   'month_sin','month_cos','day_of_week_num_sin' , 'day_of_week_num_cos']])
test_scaled = scaler.transform(test[['Load',
                   'Load_previous_hour','Load_same_hour_previous_day',
                   'day_part_encoded',
                   'ALLSKY_SFC_UV_INDEX',
                   'month_sin','month_cos','day_of_week_num_sin' , 'day_of_week_num_cos']])




def create_seq2seq_data(data, sequence_length, num_prediction_steps):
    X_enc = []
    X_dec = []
    y_dec = []

    for i in range(sequence_length, len(data) - num_prediction_steps):
        # Encoder input
        encoder_start = i - sequence_length
        encoder_end = i
        X_enc.append(data[encoder_start:encoder_end, :])  # Select only the first column

        # Decoder input
        decoder_start = i - sequence_length + 1
        decoder_end = i + 1
        X_dec.append(data[decoder_start:decoder_end, 0:1])  # Select only the first column

        # Decoder output
        dec_out_start = i + 1
        dec_out_end = i + 1 + num_prediction_steps
        y_dec.append(data[dec_out_start:dec_out_end, 0])  # Select only the first column

    return np.array(X_enc), np.array(X_dec), np.array(y_dec)



sequence_length = 24
num_prediction = 24

# Create the sequence data for train, validation, and test sets
X_enc_train, X_dec_train, y_train = create_seq2seq_data(train_scaled, sequence_length, num_prediction)
X_enc_valid, X_dec_valid, y_valid = create_seq2seq_data(valid_scaled, sequence_length, num_prediction)

# Reshape the data to change the second dimension with the third dimension
X_enc_train = np.transpose(X_enc_train, (0, 2, 1))
X_dec_train = np.transpose(X_dec_train, (0, 2, 1))
y_train = np.expand_dims(y_train, axis=2)
y_train = np.transpose(y_train, (0, 2, 1))


X_enc_valid = np.transpose(X_enc_valid, (0, 2, 1))
X_dec_valid = np.transpose(X_dec_valid, (0, 2, 1))
y_valid = np.expand_dims(y_valid, axis=2)
y_valid = np.transpose(y_valid, (0, 2, 1))
# Print the reshaped data shapes
print("Encoder input shape (train):", X_enc_train.shape)
print("Decoder input shape (train):", X_dec_train.shape)
print("Output shape (train):", y_train.shape)
print("Encoder input shape (valid):", X_enc_valid.shape)
print("Decoder input shape (valid):", X_dec_valid.shape)
print("Output shape (valid):", y_valid.shape)

Encoder input shape (train): (10859, 9, 24)
Decoder input shape (train): (10859, 1, 24)
Output shape (train): (10859, 1, 24)
Encoder input shape (valid): (4315, 9, 24)
Decoder input shape (valid): (4315, 1, 24)
Output shape (valid): (4315, 1, 24)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['datetime'] = pd.to_datetime(df['datetime'])


In [None]:
def build_model(hp):
    # Hyperparameter search
    num_layers_enc= hp.Choice('num_layers_enc',values=[1, 2] )
    num_layers_dec = hp.Choice('num_layers_dec',values=[1, 2] )
    units = hp.Int('units' , 16, 256, step=16)
    encoder_dropout_rates = [hp.Choice('encoder_dropout_rates_' + str(i), values=[0.0, 0.1, 0.2, 0.4, 0.6]) for i in range(num_layers_enc)]
    decoder_dropout_rates = [hp.Choice('decoder_dropout_rates_' + str(i), values=[0.0, 0.1, 0.2, 0.4, 0.6]) for i in range(num_layers_dec)]
    num_heads =hp.Int('num_heads' , 1, 8, step=1)
    #batch_size = hp.Choice('batch_size', values=[16, 32, 64, 128, 256])
    learning_rate = hp.Choice('learning_rate', values=[0.0001, 0.001, 0.01, 0.1, 0.2])


    # Encoder
    encoder_inputs = Input(shape=(X_enc_train.shape[1], X_enc_train.shape[2]))
    encoder_layers = []
    encoder_states = []
    for i in range(num_layers_enc):
        encoder_layer = LSTM(units, return_state=True, return_sequences=True)
        if i == 0:
            encoder_layer_output, encoder_layer_state_h, encoder_layer_state_c = encoder_layer(encoder_inputs)
        else:
            encoder_layer_output, encoder_layer_state_h, encoder_layer_state_c = encoder_layer(encoder_layers[-1])
        encoder_layer_dropout = Dropout(encoder_dropout_rates[i])(encoder_layer_output)
        encoder_layers.append(encoder_layer_dropout)
        encoder_states.append([encoder_layer_state_h, encoder_layer_state_c])

    # Decoder
    decoder_input = Input(shape=(X_dec_train.shape[1], X_dec_train.shape[2]))
    decoder_layers = []
    for i in range(num_layers_dec):
        decoder_layer = LSTM(units, return_sequences=True, return_state=True)
        if i == 0:
            decoder_layer_output, decoder_state_h, decoder_state_c = decoder_layer(
                decoder_input, initial_state=encoder_states[-1])
        else:
            decoder_layer_output, decoder_state_h, decoder_state_c = decoder_layer(
                concatenated_input, initial_state=[decoder_state_h, decoder_state_c])

        # Apply attention mechanism
        attention_layer = tf.keras.layers.MultiHeadAttention(num_heads=num_heads,key_dim=units//num_heads)(decoder_layer_output ,encoder_layers[-1] )
        concatenated_input = Concatenate()([decoder_layer_output, attention_layer])
        decoder_layers.append(concatenated_input)


    prediction = TimeDistributed(Dense(y_train.shape[2]))(decoder_layers[-1])

    model = Model([encoder_inputs, decoder_input], prediction)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

In [None]:
from keras_tuner import BayesianOptimization

tuner = BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=50,
    executions_per_trial=1
)

tuner.search_space_summary()

# Create an instance of EarlyStopping callback
custom_early_stopping = EarlyStopping(
    monitor='val_loss'  # Quantity to monitor for early stopping (validation loss)
 ,   patience=2,  # Number of epochs with no improvement after which training will be stopped
)


# Start the hyperparameter search
tuner.search([X_enc_train, X_dec_train], y_train, epochs=500, batch_size=64,
             validation_data=([X_enc_valid, X_dec_valid], y_valid),
             callbacks=[custom_early_stopping])


Trial 50 Complete [00h 01m 38s]
val_loss: 0.031167559325695038

Best val_loss So Far: 0.0012072663521394134
Total elapsed time: 02h 01m 17s


In [None]:
# Display a summary of the hyperparameter search results
tuner.results_summary()

# Retrieve the best model found during the hyperparameter search
model = tuner.get_best_models()[0]

Results summary
Results in ./untitled_project
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 39 summary
Hyperparameters:
num_layers_enc: 2
num_layers_dec: 2
units: 96
encoder_dropout_rates_0: 0.2
decoder_dropout_rates_0: 0.2
num_heads: 2
learning_rate: 0.001
decoder_dropout_rates_1: 0.6
encoder_dropout_rates_1: 0.2
Score: 0.0012072663521394134

Trial 02 summary
Hyperparameters:
num_layers_enc: 1
num_layers_dec: 2
units: 128
encoder_dropout_rates_0: 0.0
decoder_dropout_rates_0: 0.1
num_heads: 6
learning_rate: 0.01
decoder_dropout_rates_1: 0.2
Score: 0.0012395554222166538

Trial 26 summary
Hyperparameters:
num_layers_enc: 2
num_layers_dec: 1
units: 160
encoder_dropout_rates_0: 0.2
decoder_dropout_rates_0: 0.0
num_heads: 2
learning_rate: 0.001
decoder_dropout_rates_1: 0.4
encoder_dropout_rates_1: 0.1
Score: 0.0013087757397443056

Trial 33 summary
Hyperparameters:
num_layers_enc: 2
num_layers_dec: 2
units: 48
encoder_dropout_rates_0: 0.4
decoder_dropout_rates_0: 

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 9, 24)]              0         []                            
                                                                                                  
 lstm (LSTM)                 [(None, 9, 96),              46464     ['input_1[0][0]']             
                              (None, 96),                                                         
                              (None, 96)]                                                         
                                                                                                  
 dropout (Dropout)           (None, 9, 96)                0         ['lstm[0][0]']                
                                                                                              

In [None]:
# Retrieve the best model found during the hyperparameter search
best_model = tuner.get_best_models()[0]

# Retrieve the hyperparameter values
config = best_model.get_config()
num_layers = config['layers']
units = [layer['config']['units'] for layer in config['layers'] if layer['class_name'] == 'LSTM']
dropout_rates = [layer['config']['rate'] for layer in config['layers'] if layer['class_name'] == 'Dropout']

# Print the units in each layer
for i, unit in enumerate(units):
    print(f"Layer {i+1} - Units: {unit}")

# Print the dropout rates in each layer
for i, rate in enumerate(dropout_rates):
    print(f"Layer {i+1} - Dropout Rate: {rate}")

Layer 1 - Units: 96
Layer 2 - Units: 96
Layer 3 - Units: 96
Layer 4 - Units: 96
Layer 1 - Dropout Rate: 0.2
Layer 2 - Dropout Rate: 0.2
