In [1]:
import sys
import os

PROJECT_ROOT = os.path.abspath("..")
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error
from src.data_loader import load_processed_data

Load Processed Data

In [3]:
processed_folder = os.path.join(PROJECT_ROOT, "data/processed")
X_train, y_train, X_test, y_test, scaler = load_processed_data(processed_folder)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (112132, 48, 1)
Test shape: (27980, 48, 1)


Define Baseline LSTM Builder

In [4]:
def build_lstm(units, dropout, lr, input_shape, output_steps):
    inputs = Input(shape=input_shape)
    x = LSTM(units)(inputs)
    x = Dropout(dropout)(x)
    outputs = Dense(output_steps)(x)
    
    model = Model(inputs, outputs)
    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss="mse")
    return model


Hyperparameter Grid

In [5]:
param_grid = {
    "units": [32, 64],
    "dropout": [0.2, 0.3],
    "lr": [0.001, 0.0005],
    "batch_size": [32, 64]
}


Grid Search Loop (Baseline LSTM)

In [6]:
results = []

for units in param_grid["units"]:
    for dropout in param_grid["dropout"]:
        for lr in param_grid["lr"]:
            for batch_size in param_grid["batch_size"]:
                
                print(f"Training: units={units}, dropout={dropout}, lr={lr}, batch={batch_size}")
                
                model = build_lstm(
                    units=units,
                    dropout=dropout,
                    lr=lr,
                    input_shape=X_train.shape[1:],
                    output_steps=y_train.shape[1]
                )
                
                es = EarlyStopping(patience=5, restore_best_weights=True)
                
                model.fit(
                    X_train, y_train,
                    epochs=30,
                    batch_size=batch_size,
                    validation_split=0.2,
                    callbacks=[es],
                    verbose=0
                )
                
                y_pred = model.predict(X_test)
                
                rmse = np.sqrt(mean_squared_error(y_test.flatten(), y_pred.flatten()))
                mae = mean_absolute_error(y_test.flatten(), y_pred.flatten())
                
                results.append({
                    "units": units,
                    "dropout": dropout,
                    "lr": lr,
                    "batch_size": batch_size,
                    "RMSE": rmse,
                    "MAE": mae
                })


Training: units=32, dropout=0.2, lr=0.001, batch=32
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step
Training: units=32, dropout=0.2, lr=0.001, batch=64
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step
Training: units=32, dropout=0.2, lr=0.0005, batch=32
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step
Training: units=32, dropout=0.2, lr=0.0005, batch=64
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step
Training: units=32, dropout=0.3, lr=0.001, batch=32
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step
Training: units=32, dropout=0.3, lr=0.001, batch=64
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step
Training: units=32, dropout=0.3, lr=0.0005, batch=32
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step
Training: units=32, dropout=0.3, lr=0.0005, batch=64
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━

Results Table

In [7]:
results_df = pd.DataFrame(results)
results_df.sort_values("RMSE")

Unnamed: 0,units,dropout,lr,batch_size,RMSE,MAE
10,64,0.2,0.0005,32,0.07022,0.03608
12,64,0.3,0.001,32,0.070239,0.035984
6,32,0.3,0.0005,32,0.070301,0.035423
0,32,0.2,0.001,32,0.070446,0.035077
13,64,0.3,0.001,64,0.070489,0.036557
2,32,0.2,0.0005,32,0.070577,0.036109
15,64,0.3,0.0005,64,0.07063,0.036644
9,64,0.2,0.001,64,0.070717,0.036585
7,32,0.3,0.0005,64,0.07074,0.037099
8,64,0.2,0.001,32,0.07078,0.036458


Best Configuration 

In [8]:
best_params = results_df.sort_values("RMSE").iloc[0]
best_params

units         64.00000
dropout        0.20000
lr             0.00050
batch_size    32.00000
RMSE           0.07022
MAE            0.03608
Name: 10, dtype: float64