<a href="https://colab.research.google.com/github/21020718/KLTN_2025_TrangNTT/blob/main/%5BFinal%5DDataset1_fix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Bộ dữ liệu thứ nhất**

In [None]:
%autosave 60
import warnings
warnings.filterwarnings('ignore')

Autosaving every 60 seconds


## Preprocessing

In [None]:
# Set random seeds for reproducibility
import random
import numpy as np
import torch

# Define a fixed seed value
SEED = 42

# Set random seeds for all libraries
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # For GPU if available
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import sklearn.preprocessing
import sklearn.preprocessing._data as data
import sys
sys.modules["sklearn.preprocessing.data"] = data

import pandas as pd
from numpy import concatenate
from math import sqrt

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import normalized_mutual_info_score

def Scaler(data):

    """
        Scaler all feature to range(0,1)
        Arguments:
          data:  Pandas DataFrame of data
        Return:
          scaler: scaler
          scaledDf:Pandas DataFrame of scaled data
    """

    values = data.values
    values = values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled = scaler.fit_transform(values)
    scaledDf = pd.DataFrame(scaled,columns=data.columns)
    return scaler,scaledDf


def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """

    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)

    agg.drop(agg.columns[-(df.shape[1]-1):],axis = 1,inplace=True)
    return agg



if __name__ == '__main__':

    # Load data with date column preserved
    data = pd.read_csv("/content/drive/MyDrive/VN2008-2020.csv", encoding='utf-8-sig')

    dates = data["date"].copy()
    data.drop(columns=["date"], inplace=True)
    data.fillna(0, inplace=True)

    scaler, scaledDf = Scaler(data)

    scaledDf_with_dates = scaledDf.copy()
    scaledDf_with_dates['date'] = dates

    reframed = series_to_supervised(scaledDf, n_in=12)
    reframed_dates = dates.reset_index(drop=True)
    reframed_with_dates = reframed.copy()
    reframed_with_dates['date'] = reframed_dates

## Build model

In [None]:
# # Giảm số lượng/GRU unit, dense unit, epochs và sử dụng batch size nhỏ hơn để huấn luyện nhanh hơn.
# model_types = ['hybrid']
# lstm_unit = [128, 256, 512]
# gru_unit = [8, 16, 32]
# drop_rate = [0.1, 0.2]
# dense_unit = [16, 32, 64]
# batch_size_num = [2, 4]
# epochs = [100]

model_types = ['lstm','gru','hybrid']
lstm_unit = [256,512]
gru_unit = [8,16]
drop_rate = [0.1,0.2]
dense_unit = [32,64]
batch_size_num = [4]
epochs = [100]

import pandas as pd
import numpy as np
from numpy import concatenate
import itertools
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import time

class AttentionGRU(nn.Module):
    def __init__(self, input_dim, time_steps, gru_units, dropout_rate, dense_units):
        super(AttentionGRU, self).__init__()
        self.input_dim = input_dim
        self.time_steps = time_steps

        # Attention layers
        self.attention1 = nn.Linear(time_steps, time_steps)
        self.attention2 = nn.Linear(time_steps, time_steps)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

        # GRU layer
        self.gru = nn.GRU(input_dim, gru_units, batch_first=True)

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        self.dense1 = nn.Linear(gru_units * time_steps, dense_units)
        self.tanh = nn.Tanh()
        self.dense2 = nn.Linear(dense_units, 1)
        self.final_activation = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)

        # Attention mechanism
        a = x.permute(0, 2, 1)
        a = self.attention1(a)
        a = self.sigmoid(a)
        a = self.attention2(a)
        a = self.softmax(a)
        a = a.permute(0, 2, 1)

        # Apply attention weights
        attention_mul = torch.mul(x, a)

        # GRU layer - output: (batch, seq, hidden_size)
        gru_out, _ = self.gru(attention_mul)

        # Flatten and Dense layers
        flattened = self.flatten(gru_out)
        dropout_out = self.dropout(flattened)
        dense1_out = self.tanh(self.dense1(dropout_out))
        output = self.final_activation(self.dense2(dense1_out))

        return output

class AttentionLSTM(nn.Module):
    def __init__(self, input_dim, time_steps, lstm_units, dropout_rate, dense_units):
        super(AttentionLSTM, self).__init__()
        self.input_dim = input_dim
        self.time_steps = time_steps

        # Attention layers
        self.attention1 = nn.Linear(time_steps, time_steps)
        self.attention2 = nn.Linear(time_steps, time_steps)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

        # LSTM layer
        self.lstm = nn.LSTM(input_dim, lstm_units, batch_first=True)

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        self.dense1 = nn.Linear(lstm_units * time_steps, dense_units)
        self.tanh = nn.Tanh()
        self.dense2 = nn.Linear(dense_units, 1)
        self.final_activation = nn.Sigmoid()

    def forward(self, x):
        # x shape: (batch_size, time_steps, input_dim)
        batch_size = x.size(0)

        # Attention mechanism
        a = x.permute(0, 2, 1)
        a = self.attention1(a)
        a = self.sigmoid(a)
        a = self.attention2(a)
        a = self.softmax(a)
        a = a.permute(0, 2, 1)

        # Apply attention weights
        attention_mul = torch.mul(x, a)

        # LSTM layer - output: (batch, seq, hidden_size)
        lstm_out, _ = self.lstm(attention_mul)

        # Flatten and Dense layers
        flattened = self.flatten(lstm_out)
        dropout_out = self.dropout(flattened)
        dense1_out = self.tanh(self.dense1(dropout_out))
        output = self.final_activation(self.dense2(dense1_out))

        return output

class HybridLSTM_GRU(nn.Module):
    def __init__(self, input_dim, time_steps, lstm_units, gru_units, dropout_rate, dense_units):
        super(HybridLSTM_GRU, self).__init__()
        self.input_dim = input_dim
        self.time_steps = time_steps

        # Attention layers
        self.attention1 = nn.Linear(time_steps, time_steps)
        self.attention2 = nn.Linear(time_steps, time_steps)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

        # LSTM and GRU layers
        self.lstm = nn.LSTM(input_dim, lstm_units, batch_first=True)
        self.gru = nn.GRU(input_dim, gru_units, batch_first=True)

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        # Combined size from both LSTM and GRU
        self.dense1 = nn.Linear((lstm_units + gru_units) * time_steps, dense_units)
        self.tanh = nn.Tanh()
        self.dense2 = nn.Linear(dense_units, 1)
        self.final_activation = nn.Sigmoid()

    def forward(self, x):
        # x shape: (batch_size, time_steps, input_dim)

        # Attention mechanism
        a = x.permute(0, 2, 1)
        a = self.attention1(a)
        a = self.sigmoid(a)
        a = self.attention2(a)
        a = self.softmax(a)
        a = a.permute(0, 2, 1)

        # Apply attention weights
        attention_mul = torch.mul(x, a)

        # LSTM and GRU layers
        lstm_out, _ = self.lstm(attention_mul)
        gru_out, _ = self.gru(attention_mul)

        # Concatenate LSTM and GRU outputs
        combined = torch.cat((lstm_out, gru_out), dim=2)

        # Flatten and Dense layers
        flattened = self.flatten(combined)
        dropout_out = self.dropout(flattened)
        dense1_out = self.tanh(self.dense1(dropout_out))
        output = self.final_activation(self.dense2(dense1_out))

        return output

class MultiScaleHybrid(nn.Module):
    def __init__(self, input_dim, time_steps, lstm_units, gru_units, dropout_rate, dense_units):
        super(MultiScaleHybrid, self).__init__()
        self.input_dim = input_dim
        self.time_steps = time_steps

        # LSTM for long-term dependencies
        self.lstm = nn.LSTM(input_dim, lstm_units, batch_first=True)

        # GRU for shorter-term dependencies (operating on windows)
        self.gru = nn.GRU(input_dim, gru_units, batch_first=True)

        # Attention mechanism
        self.attention = nn.Linear(time_steps, time_steps)
        self.attention2 = nn.Linear(time_steps, time_steps)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout_rate)
        self.dense1 = nn.Linear((lstm_units + gru_units) * time_steps, dense_units)
        self.tanh = nn.Tanh()
        self.dense2 = nn.Linear(dense_units, 1)
        self.final_activation = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)

        # Full sequence for LSTM (long-term)
        lstm_out, _ = self.lstm(x)

        # Attention mechanism for GRU input
        a = x.permute(0, 2, 1)
        a = self.attention(a)
        a = self.sigmoid(a)
        a = self.attention2(a)
        a = self.softmax(a)
        a = a.permute(0, 2, 1)

        # Apply attention weights for GRU (short-term focus)
        gru_input = torch.mul(x, a)
        gru_out, _ = self.gru(gru_input)

        # Combine outputs
        combined = torch.cat((lstm_out, gru_out), dim=2)

        # Dense layers
        flattened = self.flatten(combined)
        dropout_out = self.dropout(flattened)
        dense1_out = self.tanh(self.dense1(dropout_out))
        output = self.final_activation(self.dense2(dense1_out))

        return output

def build_model(train_X, train_Y, val_X, val_Y, model_type='gru', lstm_units=128, gru_units=128, drop_rate=0.3, dense_unit=64, batch_size=32, epochs=100):
    # Print training parameters
    train_X_tensor = torch.FloatTensor(train_X)
    train_Y_tensor = torch.FloatTensor(train_Y.reshape(-1, 1))
    val_X_tensor = torch.FloatTensor(val_X)
    val_Y_tensor = torch.FloatTensor(val_Y.reshape(-1, 1))

    # Create datasets and dataloaders
    train_dataset = TensorDataset(train_X_tensor, train_Y_tensor)
    val_dataset = TensorDataset(val_X_tensor, val_Y_tensor)

    # Create reproducible DataLoaders with fixed seeds
    train_generator = torch.Generator()
    train_generator.manual_seed(SEED)
    val_generator = torch.Generator()
    val_generator.manual_seed(SEED)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=train_generator)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, generator=val_generator)

    # Initialize model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    time_steps = train_X.shape[1]
    input_dim = train_X.shape[2]

    # Initialize model with fixed initial weights
    torch.manual_seed(SEED)

    if model_type == 'gru':
        model = AttentionGRU(input_dim, time_steps, gru_units, drop_rate, dense_unit).to(device)
    elif model_type == 'lstm':
        model = AttentionLSTM(input_dim, time_steps, lstm_units, drop_rate, dense_unit).to(device)
    elif model_type == 'hybrid':
        model = HybridLSTM_GRU(input_dim, time_steps, lstm_units, gru_units, drop_rate, dense_unit).to(device)
    else:
        raise ValueError(f"Unknown model type: {model_type}")

    # Initialize optimizer and loss function
    optimizer = optim.Adam(model.parameters())
    criterion = nn.L1Loss()  # MAE loss

    # Training loop
    best_val_loss = float('inf')
    patience_counter = 0
    patience = 5
    lr_factor = 0.01

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

        # Print progress
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}')

        # Learning rate schedule based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                for param_group in optimizer.param_groups:
                    param_group['lr'] *= lr_factor
                patience_counter = 0
                print(f'Reducing learning rate by factor of {lr_factor}')

    return model

def mean_absolute_percentage_error(y_true, y_pred):
    mask = y_true != 0
    mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
    return mape

def walk_forward(train_X, train_Y, val_X, val_Y, test_X, test_Y, grid_search, scaler, test_dates):
    r, f, c = test_X.shape
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Constants to ensure reproducibility
    SEED = 42
    torch.manual_seed(SEED)
    np.random.seed(SEED)

    all_predictions = {}
    all_adjusted_predictions = {}
    all_ground_truths = {}

    # Create lists to store all evaluation results
    original_valuelists = []
    adjusted_valuelists = []

    # Dictionary to store training times
    training_times = {}

    # Sort test samples by date
    sorted_test_indices = np.argsort(test_dates)

    for x in grid_search:
        model_type = x[0]

        if model_type in ['hybrid']:
            model_type, lstm_unit_val, gru_unit_val, drop, dense, batch, epoch = x
            units = f"L{lstm_unit_val}_G{gru_unit_val}"
            config_key = f"{model_type}_lstmUnit{lstm_unit_val}_gruUnit{gru_unit_val}_drop{drop}_dense{dense}_batch{batch}_epochs{epoch}"
        else:
            model_type, units, drop, dense, batch, epoch = x
            config_key = f"{model_type}_unit{units}_drop{drop}_dense{dense}_batch{batch}_epochs{epoch}"

        print("\n" + "*"*50)
        print(f"Starting walk-forward validation with parameters:")
        print(f"Model Type: {model_type}, Units: {units}, Dropout: {drop}, Dense Units: {dense}")
        print(f"Batch Size: {batch}, Epochs: {epoch}")
        print(f"Device: {device}")
        print(f"Total test samples: {len(test_X)}")
        print("*"*50 + "\n")

        # Start timing for this configuration
        start_time = time.time()

        predictions = []
        adjusted_predictions = []
        groundtrue = []

        # Initialize with original train data
        history_x = train_X.copy()
        history_y = train_Y.copy()

        # Store the mapping between test points and their dates for ordering
        test_dates_array = pd.to_datetime(test_dates).values
        test_date_mapping = [(i, test_dates_array[i]) for i in range(len(test_dates_array))]
        sorted_test_indices = [i for i, _ in sorted(test_date_mapping, key=lambda x: x[1])]

        for i in sorted_test_indices:
            test_date = pd.to_datetime(test_dates.iloc[i])
            print(f"\nTest iteration for date: {test_date}")

            # For each test point, only use training data from dates before current test date
            current_train_x = []
            current_train_y = []

            # Filter history data to only include data before current test date
            if test_date.year == 2019:
                # For 2019 test data, only use train data from before 2019 and validation data (2018)
                # We use initial train and validation data directly
                current_train_x = np.concatenate([train_X, val_X], axis=0)
                current_train_y = np.concatenate([train_Y, val_Y], axis=0)
            else:
                # For 2020 test data (second test period), use all available data up to that point
                current_train_x = history_x
                current_train_y = history_y

            print(f"Current training set size: {len(current_train_x)} samples")

            # Skip if not enough training data
            if len(current_train_x) < 10:
                print("Not enough training data, skipping this test point")
                continue

            # Train the model with appropriate data
            if model_type in ['hybrid']:
                model = build_model(current_train_x, current_train_y, val_X, val_Y, model_type=model_type,
                                lstm_units=lstm_unit_val, gru_units=gru_unit_val, drop_rate=drop,
                                dense_unit=dense, batch_size=batch, epochs=epoch)
            else:
                model = build_model(current_train_x, current_train_y, val_X, val_Y, model_type=model_type,
                                lstm_units=units if model_type == 'lstm' else 128,
                                gru_units=units if model_type == 'gru' else 128,
                                drop_rate=drop, dense_unit=dense, batch_size=batch, epochs=epoch)

            # Predict
            model.eval()
            test_tensor = torch.FloatTensor(test_X[i].reshape(1, f, c)).to(device)
            with torch.no_grad():
                yhat = model(test_tensor).cpu().numpy()

            inv_yhat, inv_y = inverscale(yhat, test_X[i], test_Y[i], scaler)
            prev_month_lockdown = test_X[i][11][5]
            adjusted_inv_yhat = inv_yhat * (1 - prev_month_lockdown)
            predictions.append(inv_yhat)
            adjusted_predictions.append(adjusted_inv_yhat)
            groundtrue.append(inv_y)

            # Add this observation to history for the next iteration
            history_x = np.append(history_x, [test_X[i]], axis=0)
            history_y = np.append(history_y, test_Y[i])

        # Calculate total training time
        total_time = time.time() - start_time
        training_times[config_key] = total_time
        print(f"\nTotal training time for {config_key}: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")

        # Process results as before
        all_predictions[config_key] = np.array(predictions).flatten()
        all_adjusted_predictions[config_key] = np.array(adjusted_predictions).flatten()
        all_ground_truths[config_key] = np.array(groundtrue).flatten()

        original_valuelist = evalue(np.array(predictions).flatten(), np.array(groundtrue).flatten())
        original_valuelist['model_type'] = model_type
        original_valuelist['units'] = units
        original_valuelist['drop_rate'] = drop
        original_valuelist['dense_unit'] = dense
        original_valuelist['batch_size'] = batch
        original_valuelist['epochs'] = epoch
        original_valuelist['training_time'] = total_time  # Add training time to results

        adjusted_valuelist = evalue(np.array(adjusted_predictions).flatten(), np.array(groundtrue).flatten())
        adjusted_valuelist['model_type'] = model_type
        adjusted_valuelist['units'] = units
        adjusted_valuelist['drop_rate'] = drop
        adjusted_valuelist['dense_unit'] = dense
        adjusted_valuelist['batch_size'] = batch
        adjusted_valuelist['epochs'] = epoch
        adjusted_valuelist['training_time'] = total_time  # Add training time to results

        original_valuelists.append(original_valuelist)
        adjusted_valuelists.append(adjusted_valuelist)

    all_original_valuelist = pd.concat(original_valuelists, ignore_index=True)
    all_adjusted_valuelist = pd.concat(adjusted_valuelists, ignore_index=True)

    # Save training times to CSV
    pd.DataFrame(list(training_times.items()), columns=['Model', 'Training Time (s)']).to_csv('model_training_times.csv', index=False)

    # Also add the time results to the return values
    return all_original_valuelist, all_adjusted_valuelist, all_predictions, all_ground_truths, all_adjusted_predictions, training_times

def evalue(yhat, inv_y):
    valuelist = {}
    DLM_rmse = sqrt(mean_squared_error(inv_y, yhat))
    valuelist.update({'RMSE': {'DLM': DLM_rmse}})
    DLM_mae = mean_absolute_error(inv_y, yhat)
    valuelist.update({'MAE': {'DLM': DLM_mae}})
    DLM_mape = mean_absolute_percentage_error(inv_y, yhat)
    valuelist.update({'MAPE': {'DLM': DLM_mape}})
    return pd.DataFrame(valuelist)

def inverscale(yhat, test_X, test_Y, scaler):
    feature = len(scaler.scale_)
    test_Y = np.array(test_Y)
    test_X = test_X[1, 0:feature]
    test_X = test_X.reshape(1, test_X.shape[0])

    if len(yhat.shape) == 1:
        yhat = yhat.reshape(len(yhat), 1)

    inv_yhat = concatenate((yhat, test_X[:, :-1]), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:, 0]

    test_Y = test_Y.reshape(1, 1)
    inv_y = concatenate((test_Y, test_X[:, :-1]), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:, 0]
    return inv_yhat, inv_y

if __name__ == '__main__':
    values = reframed.values
    reframed_with_dates_values = reframed_with_dates.values

    # Convert date strings to datetime objects
    dates = pd.to_datetime(reframed_with_dates['date'])

    # Create masks for each split according to the specified date ranges
    train_mask = ((dates >= '2008-01-01') & (dates <= '2017-12-31')) | ((dates >= '2020-01-01') & (dates <= '2020-05-31'))
    val_mask = (dates >= '2018-01-01') & (dates <= '2018-12-31')
    test_mask = ((dates >= '2019-01-01') & (dates <= '2019-12-31')) | ((dates >= '2020-06-01') & (dates <= '2020-07-31'))

    # Extract values for train, validation, and test sets (excluding the date column)
    train_data = reframed.loc[train_mask].values
    val_data = reframed.loc[val_mask].values
    test_data = reframed.loc[test_mask].values

    # Split into X and Y
    train_X, train_Y = train_data[:, :-1], train_data[:, -1]
    val_X, val_Y = val_data[:, :-1], val_data[:, -1]
    test_X, test_Y = test_data[:, :-1], test_data[:, -1]

    # Reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape(train_X.shape[0], 12, int(train_X.shape[1]/12))
    val_X = val_X.reshape(val_X.shape[0], 12, int(val_X.shape[1]/12))
    test_X = test_X.reshape(test_X.shape[0], 12, int(test_X.shape[1]/12))

    # Modified grid search creation for all model types
    grid_search = []
    for model_type in model_types:
        if model_type == 'lstm':
            grid_search.extend(
                list(itertools.product([model_type], lstm_unit, drop_rate, dense_unit, batch_size_num, epochs))
            )
        elif model_type == 'gru':
            grid_search.extend(
                list(itertools.product([model_type], gru_unit, drop_rate, dense_unit, batch_size_num, epochs))
            )
        else:
            # All other models (hybrid, sequential, stacked, etc.) need both LSTM and GRU units
            grid_search.extend(
                list(itertools.product([model_type], lstm_unit, gru_unit, drop_rate, dense_unit, batch_size_num, epochs))
            )

    # In the main section, update the function call:
    original_valuelist, adjusted_valuelist, all_predictions, all_ground_truths, all_adjusted_predictions, training_times = walk_forward(
        train_X, train_Y, val_X, val_Y, test_X, test_Y, grid_search, scaler,
        dates[test_mask]  # Pass the test dates
    )

    # Print training times summary
    print("\nTraining Times Summary:")
    for model, time_taken in training_times.items():
        print(f"{model}: {time_taken:.2f} seconds ({time_taken/60:.2f} minutes)")

    # Save results with training times included
    print("\nSaving results with training times...")
    adjusted_valuelist.to_csv('model_results_with_times.csv', index=False)

    print("Results:")
    print(adjusted_valuelist)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 50/100, Train Loss: 0.0360, Val Loss: 0.0274
Epoch 51/100, Train Loss: 0.0346, Val Loss: 0.0274
Epoch 52/100, Train Loss: 0.0342, Val Loss: 0.0274
Reducing learning rate by factor of 0.01
Epoch 53/100, Train Loss: 0.0346, Val Loss: 0.0274
Epoch 54/100, Train Loss: 0.0347, Val Loss: 0.0274
Epoch 55/100, Train Loss: 0.0355, Val Loss: 0.0274
Epoch 56/100, Train Loss: 0.0350, Val Loss: 0.0274
Epoch 57/100, Train Loss: 0.0351, Val Loss: 0.0274
Reducing learning rate by factor of 0.01
Epoch 58/100, Train Loss: 0.0344, Val Loss: 0.0274
Epoch 59/100, Train Loss: 0.0350, Val Loss: 0.0274
Epoch 60/100, Train Loss: 0.0358, Val Loss: 0.0274
Epoch 61/100, Train Loss: 0.0358, Val Loss: 0.0274
Epoch 62/100, Train Loss: 0.0335, Val Loss: 0.0274
Reducing learning rate by factor of 0.01
Epoch 63/100, Train Loss: 0.0342, Val Loss: 0.0274
Epoch 64/100, Train Loss: 0.0342, Val Loss: 0.0274
Epoch 65/100, Train Loss: 0.0339, Val Loss: 0.0

## Result

In [None]:
adjusted_valuelist

Unnamed: 0,RMSE,MAE,MAPE,model_type,units,drop_rate,dense_unit,batch_size,epochs,training_time
0,182253.718184,147104.125,11.074097,lstm,256,0.1,32,4,100,167.300521
1,123565.964505,93664.953125,7.049941,lstm,256,0.1,64,4,100,160.835905
2,101792.513045,77620.203125,5.938788,lstm,256,0.2,32,4,100,160.822445
3,113857.785259,85579.351562,6.451847,lstm,256,0.2,64,4,100,160.804775
4,105087.576126,78026.804688,5.912697,lstm,512,0.1,32,4,100,159.396672
5,127416.883779,98962.289062,7.480051,lstm,512,0.1,64,4,100,159.106784
6,109556.438642,85183.257812,6.588063,lstm,512,0.2,32,4,100,159.333252
7,123701.698485,93392.453125,7.061137,lstm,512,0.2,64,4,100,158.90029
8,197476.63323,154682.28125,11.442617,gru,8,0.1,32,4,100,137.411107
9,210967.309392,168735.859375,12.508427,gru,8,0.1,64,4,100,137.632552


In [None]:
all_adjusted_predictions

{'lstm_unit256_drop0.1_dense32_batch4_epochs100': array([1457751.1, 1414111. , 1310062. , 1202888.6, 1162393. , 1196769.6,
        1232553.2, 1279402.2, 1303493.9, 1359491.1, 1472966.1, 1579900.5,
              0. ,       0. ], dtype=float32),
 'lstm_unit256_drop0.1_dense64_batch4_epochs100': array([1515680.8, 1498410.6, 1428545.6, 1325597.2, 1265521.2, 1262136.4,
        1286336.6, 1328971.6, 1366635.2, 1436741.6, 1553205.4, 1647428.8,
              0. ,       0. ], dtype=float32),
 'lstm_unit256_drop0.2_dense32_batch4_epochs100': array([1518744.6, 1501224.4, 1441179.8, 1371678.1, 1326203. , 1335538. ,
        1358408.1, 1398536.6, 1434133.4, 1496476.4, 1576867.1, 1644279.6,
              0. ,       0. ], dtype=float32),
 'lstm_unit256_drop0.2_dense64_batch4_epochs100': array([1521142.9, 1494560.6, 1414871.4, 1316949.9, 1269541.4, 1282476.6,
        1316659.9, 1364574.9, 1397952.9, 1461431.6, 1565235.8, 1649003.9,
              0. ,       0. ], dtype=float32),
 'lstm_unit512_drop0.1_d

In [None]:
all_predictions

{'lstm_unit256_drop0.1_dense32_batch4_epochs100': array([1457751.1 , 1414111.  , 1310062.  , 1202888.6 , 1162393.  ,
        1196769.6 , 1232553.2 , 1279402.2 , 1303493.9 , 1359491.1 ,
        1472966.1 , 1579900.5 ,  565184.75,   80532.74], dtype=float32),
 'lstm_unit256_drop0.1_dense64_batch4_epochs100': array([1515680.8  , 1498410.6  , 1428545.6  , 1325597.2  , 1265521.2  ,
        1262136.4  , 1286336.6  , 1328971.6  , 1366635.2  , 1436741.6  ,
        1553205.4  , 1647428.8  ,  100009.63 ,   42122.082], dtype=float32),
 'lstm_unit256_drop0.2_dense32_batch4_epochs100': array([1518744.6 , 1501224.4 , 1441179.8 , 1371678.1 , 1326203.  ,
        1335538.  , 1358408.1 , 1398536.6 , 1434133.4 , 1496476.4 ,
        1576867.1 , 1644279.6 ,  674244.6 ,   82374.77], dtype=float32),
 'lstm_unit256_drop0.2_dense64_batch4_epochs100': array([1521142.9  , 1494560.6  , 1414871.4  , 1316949.9  , 1269541.4  ,
        1282476.6  , 1316659.9  , 1364574.9  , 1397952.9  , 1461431.6  ,
        1565235.8