This notebook is dedicated to the hyper parameter estimation of the Neural Networks and Gradient Boosting Decision Tree Algorithms.
I use Otpuna for computational efficiency.This is more efficient than grid search algorithms as it uses performs Bayesian optimisation through the tree-structured parzen estimator, making it a realistic algorithm to preform, giving computational limitations. **Note:** due to limited computational power, I was unable to run an Optuna for CNN. Instead I used parameters from [Nyanpn's Kaggle Solution](https://www.kaggle.com/code/nyanpn/1st-place-public-2nd-place-solution?scriptVersionId=85907908)


Firstly reusing the same settings as used in Nearest Neighbours.

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import ipywidgets as widgets
import gc
import glob
import os
import time
import traceback
from contextlib import contextmanager
from enum import Enum
from typing import Dict, List, Optional, Tuple
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
from joblib import delayed, Parallel
from sklearn.decomposition import LatentDirichletAllocation
import statsmodels.api as sm
from sklearn.manifold import TSNE
from sklearn.model_selection import GroupKFold
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import minmax_scale
from tqdm import tqdm_notebook as tqdm
import pickle
%matplotlib inline

@contextmanager
def timer(name):
    s = time.time()
    yield
    e = time.time() - s
    print(f"[{name}] {e:.3f}sec")

def print_trace(name: str = ''):
    print(f'ERROR RAISED IN {name or "anonymous"}')
    print(traceback.format_exc())

DATA_DIR = 'data'
# model & ensemble configurations
PREDICT_CNN = True
PREDICT_MLP = True
PREDICT_GBDT = True
PREDICT_TABNET = False

GBDT_NUM_MODELS = 5 #3
GBDT_LR = 0.02  # 0.1

NN_VALID_TH = 0.25
NN_MODEL_TOP_N = 3
TAB_MODEL_TOP_N = 3
ENSEMBLE_METHOD = 'mean'
NN_NUM_MODELS = 10
TABNET_NUM_MODELS = 5

# data configurations
USE_PRECOMPUTE_FEATURES = True  # Load precomputed features for train.csv from private dataset (just for speed up)


# for saving quota
IS_1ST_STAGE = True
SHORTCUT_NN_IN_1ST_STAGE = False  # early-stop training to save GPU quota
SHORTCUT_GBDT_IN_1ST_STAGE = False
MEMORY_TEST_MODE = False

# for ablation studies
CV_SPLIT = 'time'  # 'time': time-series KFold 'group': GroupKFold by stock-id
USE_PRICE_NN_FEATURES = True  # Use nearest neighbor features that rely on tick size
USE_VOL_NN_FEATURES = True  # Use nearest neighbor features that can be calculated without tick size
USE_SIZE_NN_FEATURES = True  # Use nearest neighbor features that can be calculated without tick size
USE_RANDOM_NN_FEATURES = False  # Use random index to aggregate neighbors

USE_TIME_ID_NN = True  # Use time-id based neighbors
USE_STOCK_ID_NN = True  # Use stock-id based neighbors

ENABLE_RANK_NORMALIZATION = True  # Enable rank-normalization

train = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
stock_ids = set(train['stock_id'])

Loading Training Dataset after performing Nearest Neighbours Feature Engineering 

In [2]:
def get_X(df_src):
    cols = [c for c in df_src.columns if c not in ['time_id', 'target', 'tick_size']]
    return df_src[cols]
    
df_train = pd.read_csv("df_train.csv")
df_test = pd.read_csv("df_test.csv")
X = get_X(df_train)            #remove non-feature columns (time_id, target, tick_size).
y = 2*np.log(df_train['target'] +1e-9) #will train models and cross validate models using log-rv as this allows for better optimisation 
                                        #and allows me to compute error metrics backed by academics    

del df_train
folds = np.load("folds.npy", allow_pickle=True)

# Load from Pickle (.pkl)
with open("folds.pkl", "rb") as f:
    folds = pickle.load(f)

A couple more definitions to ensure code runs efficiently

In [10]:
def rmspe(y_true, y_pred):
    return  (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true))))


def feval_RMSPE(preds, train_data):
    labels = train_data.get_label()
    return 'RMSPE', round(rmspe(y_true = labels, y_pred = preds),5), False


# from: https://blog.amedama.jp/entry/lightgbm-cv-feature-importance
def plot_importance(cvbooster, figsize=(10, 10)):
    raw_importances = cvbooster.feature_importance(importance_type='gain')
    feature_name = cvbooster.boosters[0].feature_name()
    importance_df = pd.DataFrame(data=raw_importances,
                                 columns=feature_name)
    # order by average importance across folds
    sorted_indices = importance_df.mean(axis=0).sort_values(ascending=False).index
    sorted_importance_df = importance_df.loc[:, sorted_indices]
    # plot top-n
    PLOT_TOP_N = 50
    plot_cols = sorted_importance_df.columns[:PLOT_TOP_N]
    _, ax = plt.subplots(figsize=figsize)
    ax.grid()
    ax.set_xscale('log')
    ax.set_ylabel('Feature')
    ax.set_xlabel('Importance')
    sns.boxplot(data=sorted_importance_df[plot_cols],
                orient='h',
                ax=ax)
    plt.show()


def get_X(df_src):
    cols = [c for c in df_src.columns if c not in ['time_id', 'target', 'tick_size']]
    return df_src[cols]


class EnsembleModel:
    def __init__(self, models: List[lgb.Booster], weights: Optional[List[float]] = None):
        self.models = models
        self.weights = weights

        features = list(self.models[0].feature_name())

        for m in self.models[1:]:
            assert features == list(m.feature_name())
#Checks that all models use the same features.
    def predict(self, x):
        predicted = np.zeros((len(x), len(self.models)))

        for i, m in enumerate(self.models):
            w = self.weights[i] if self.weights is not None else 1
            predicted[:, i] = w * m.predict(x)

        ttl = np.sum(self.weights) if self.weights is not None else len(self.models)
        return np.sum(predicted, axis=1) / ttl

    #Runs predictions for each model and stores them in predicted[:, i].
    #If weights are given, normalizes predictions using np.sum(self.weights). Otherwise, averages predictions across models.
    
    def feature_name(self) -> List[str]:
        return self.models[0].feature_name()



Installing Optuna Package

In [5]:
!pip install --upgrade xgboost
!pip install optuna-integration[xgboost]
!pip install optuna
!pip install lightgbm
import lightgbm as lgb





# XGBoost Optuna Run

In [6]:
import xgboost as xgb
import optuna
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error

# Load predefined folds
with open("folds.pkl", "rb") as f:
    folds = pickle.load(f)

# Define Q-Likelihood metric
def qlike_from_rv(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Avoid divide-by-zero or log(0)
    eps = 1e-9

    exp_y_true = np.exp(y_true) + eps
    exp_y_pred = np.exp(y_pred) + eps

    qlike = (exp_y_true / exp_y_pred) - (y_true - y_pred) - 1
    return np.mean(qlike)

# Define Optuna objective
def objective(trial):
    # Suggest hyperparameters
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',  # still needed for early stopping
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
        'n_jobs': -1,
        'random_state': 42,
        'early_stopping_rounds': 50
    }

    qlike_scores = []
    for fold_idx, (train_idx, valid_idx) in enumerate(folds):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        model = xgb.XGBRegressor(**params)

        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            verbose=False
        )

        y_pred = model.predict(X_valid)
        qlike_score = qlike_from_rv(y_valid, y_pred)
        qlike_scores.append(qlike_score)

    return np.mean(qlike_scores)  # Optuna will minimize this

# Run Optuna optimization
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=30, timeout=3600*10)  # 30 trials, max 10 hours

# Best parameters
best_params = study.best_params
print("\n🔹 Best Hyperparameters (Q-Likelihood Optimized):", best_params)

# Train final model with best parameters on full data
final_model = xgb.XGBRegressor(**best_params, random_state=42)
final_model.fit(X, y)

# Save final model
final_model.save_model("xgboost_optuna_qlike.json")

print("\n✅ XGBoost training completed (QLIKE minimized)!")



[I 2025-03-28 18:22:44,316] A new study created in memory with name: no-name-16b9d046-d459-4c1e-abbe-7c39abb01e75
[I 2025-03-28 18:38:51,723] Trial 0 finished with value: 0.08236368103678064 and parameters: {'learning_rate': 0.033195054155555, 'max_depth': 9, 'n_estimators': 501, 'subsample': 0.7298267937721498, 'colsample_bytree': 0.7982340514709187, 'lambda': 0.0012848548354308237, 'alpha': 0.06346749040163628}. Best is trial 0 with value: 0.08236368103678064.
[I 2025-03-28 18:51:11,647] Trial 1 finished with value: 0.08723894339464308 and parameters: {'learning_rate': 0.0540179714516467, 'max_depth': 3, 'n_estimators': 976, 'subsample': 0.6654845066640638, 'colsample_bytree': 0.8581888676761157, 'lambda': 0.6721732709454367, 'alpha': 0.0016163265824155825}. Best is trial 0 with value: 0.08236368103678064.
[I 2025-03-28 18:55:26,106] Trial 2 finished with value: 0.0933578547476957 and parameters: {'learning_rate': 0.02446264104484393, 'max_depth': 8, 'n_estimators': 112, 'subsample':


🔹 Best Hyperparameters (Q-Likelihood Optimized): {'learning_rate': 0.09971287766973493, 'max_depth': 7, 'n_estimators': 801, 'subsample': 0.8763360101347328, 'colsample_bytree': 0.698114590592158, 'lambda': 0.042531472780104954, 'alpha': 6.997454407950201}

✅ XGBoost training completed (QLIKE minimized)!


# LightGBM Optuna run

In [8]:
import optuna
# -----------------------------------------------
# LIGHTGBM OPTUNA-  COMPLETE
# -----------------------------------------------
def qlike_from_rv(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Avoid divide-by-zero or log(0)
    eps = 1e-9

    exp_y_true = np.exp(y_true) + eps
    exp_y_pred = np.exp(y_pred) + eps

    qlike = (exp_y_true / exp_y_pred) - (y_true - y_pred) - 1
    return np.mean(qlike)


def feval_qlike(preds, train_data):
    y_true = train_data.get_label()
    score = qlike_from_rv(y_true, preds)
    return 'QLIKE', abs(score), False  # False = lower is better


def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 0.005, 0.2, log=True)
    early_stopping = int(40 * 0.1 / learning_rate)

    params = {
        'objective': 'regression',
        'metric': 'None',  # We'll use feval instead
        'verbose': -1,
        'boosting_type': 'gbdt',
        'device': 'gpu',
        'gpu_platform_id': 0,
        'gpu_device_id': 0,
        'learning_rate': learning_rate,
        'num_leaves': trial.suggest_int('num_leaves', 31, 512),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 100, 2000),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.1, 10.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.1, 10.0),
        'early_stopping_rounds': early_stopping,
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0),
        'max_depth': -1
    }

    qlike_scores = []

    for fold_idx, (train_idx, valid_idx) in enumerate(folds):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_valid, y_valid = X.iloc[valid_idx], y.iloc[valid_idx]

        train_data = lgb.Dataset(X_train, y_train, weight=1 / np.power(y_train, 2))
        valid_data = lgb.Dataset(X_valid, y_valid, weight=1 / np.power(y_valid, 2))

        booster = lgb.train(
            params,
            train_data,
            valid_sets=[valid_data],
            num_boost_round=5000,
            feval=feval_qlike,
                    )

        y_pred = booster.predict(X_valid, num_iteration=booster.best_iteration)
        qlike_score = qlike_from_rv(y_valid, y_pred)
        qlike_scores.append(qlike_score)

    return abs(np.mean(qlike_scores))


print("\n🔍 Starting Optuna hyperparameter tuning...")

study = optuna.create_study(direction="minimize", study_name="LightGBM_QLIKE_GPU")
study.optimize(objective, n_trials=50)

print("\n✅ Best hyperparameters found:")
print(study.best_trial.params)
print(f"⭐ Best QLIKE: {study.best_value:.5f}")

print("\n🔁 Retraining final model on full data...")

best_params = study.best_trial.params
best_params.update({
    'objective': 'regression',
    'metric': 'None',
    'boosting_type': 'gbdt',
    'device': 'gpu',
    'gpu_platform_id': 0,
    'gpu_device_id': 0,
    'verbosity': -1
})

ds_full = lgb.Dataset(X, y, weight=1 / np.power(y, 2))

final_model = lgb.train(
    best_params,
    ds_full,
    num_boost_round=1000,
    valid_sets=[ds_full],
    feval=feval_qlike,
    
)

# Save final model
final_model.save_model("lightgbm_qlike_gpu_final_model.txt")
print("\n💾 Final model saved to 'lightgbm_qlike_gpu_final_model.txt'")


[I 2025-03-28 23:37:03,997] A new study created in memory with name: LightGBM_QLIKE_GPU



🔍 Starting Optuna hyperparameter tuning...


[I 2025-03-28 23:40:30,283] Trial 0 finished with value: 0.08633851203061542 and parameters: {'learning_rate': 0.15078934712281716, 'num_leaves': 254, 'min_data_in_leaf': 373, 'reg_alpha': 2.931123275823684, 'reg_lambda': 2.6441165567498457, 'colsample_bytree': 0.3395837701497705}. Best is trial 0 with value: 0.08633851203061542.
[I 2025-03-29 00:04:29,558] Trial 1 finished with value: 0.09003240930857781 and parameters: {'learning_rate': 0.011830027015974607, 'num_leaves': 274, 'min_data_in_leaf': 335, 'reg_alpha': 8.258269636322225, 'reg_lambda': 9.731154574846657, 'colsample_bytree': 0.48429967603906177}. Best is trial 0 with value: 0.08633851203061542.
[I 2025-03-29 00:10:31,586] Trial 2 finished with value: 0.08693826578184258 and parameters: {'learning_rate': 0.07123504214479567, 'num_leaves': 324, 'min_data_in_leaf': 1725, 'reg_alpha': 3.880713991101804, 'reg_lambda': 1.7068787655623392, 'colsample_bytree': 0.7690905870548737}. Best is trial 0 with value: 0.08633851203061542.
[I


✅ Best hyperparameters found:
{'learning_rate': 0.02414570218752482, 'num_leaves': 492, 'min_data_in_leaf': 249, 'reg_alpha': 0.12698809196376012, 'reg_lambda': 0.357320402227454, 'colsample_bytree': 0.2552186891402247}
⭐ Best QLIKE: 0.07718

🔁 Retraining final model on full data...


TypeError: train() got an unexpected keyword argument 'verbose_eval'

# MLP Optuna Run

Applying the same format as used in actual prediction notebook:

In [6]:
import gc
import os
import random
from typing import List, Tuple, Optional, Union

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from joblib import Parallel, delayed
from sklearn.decomposition import PCA
from pytorch_tabnet.metrics import Metric
from pytorch_tabnet.tab_model import TabNetRegressor
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts


null_check_cols = [
    'book.log_return1.realized_volatility',
    'book_150.log_return1.realized_volatility',
    'book_300.log_return1.realized_volatility',
    'book_450.log_return1.realized_volatility',
    'trade.log_return.realized_volatility',
    'trade_150.log_return.realized_volatility',
    'trade_300.log_return.realized_volatility',
    'trade_450.log_return.realized_volatility'
]


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def rmspe_metric(y_true, y_pred):
    rmspe = np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))
    return rmspe


def rmspe_loss(y_true, y_pred):
    rmspe = torch.sqrt(torch.mean(torch.square((y_true - y_pred) / y_true)))
    return rmspe
#This is the same RMSPE metric but implemented as a loss function in PyTorch for backpropagation.

class RMSPE(Metric):
    def __init__(self):
        self._name = "rmspe"
        self._maximize = False

    def __call__(self, y_true, y_score):
        return np.sqrt(np.mean(np.square((y_true - y_score) / y_true)))
#This class wraps the RMSPE metric into a format that can be used in PyTorch TabNet for evaluation during training.


def RMSPELoss_Tabnet(y_pred, y_true):
    return torch.sqrt(torch.mean( ((y_true - y_pred) / y_true) ** 2 )).clone()


class AverageMeter:
    """Computes and stores the average and current value"""

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
#used to track running averages of metrics (e.g., loss) during training and evaluation. It's useful for monitoring training progress.

class TabularDataset(Dataset):
    def __init__(self, x_num: np.ndarray, x_cat: np.ndarray, y: Optional[np.ndarray]):
        super().__init__()
        self.x_num = x_num
        self.x_cat = x_cat
        self.y = y

    def __len__(self):
        return len(self.x_num)

    def __getitem__(self, idx):
        if self.y is None:
            return self.x_num[idx], torch.LongTensor(self.x_cat[idx])
        else:
            return self.x_num[idx], torch.LongTensor(self.x_cat[idx]), self.y[idx]

#splits the data into numeric (x_num) and categorical (x_cat) features and optionally returns the target values (y) for training.

class MLP(nn.Module):
    def __init__(self,
                 src_num_dim: int,
                 n_categories: List[int],
                 dropout: float = 0.0,
                 hidden: int = 50,
                 emb_dim: int = 10,
                 dropout_cat: float = 0.2,
                 bn: bool = False):
        super().__init__()

        self.embs = nn.ModuleList([
            nn.Embedding(x, emb_dim) for x in n_categories])
        self.cat_dim = emb_dim * len(n_categories)
        self.dropout_cat = nn.Dropout(dropout_cat)

        if bn:
            self.sequence = nn.Sequential(
                nn.Linear(src_num_dim + self.cat_dim, hidden),
                nn.Dropout(dropout),
                nn.BatchNorm1d(hidden),
                nn.ReLU(),
                nn.Linear(hidden, hidden),
                nn.Dropout(dropout),
                nn.BatchNorm1d(hidden),
                nn.ReLU(),
                nn.Linear(hidden, 1)
            )
        else:
            self.sequence = nn.Sequential(
                nn.Linear(src_num_dim + self.cat_dim, hidden),
                nn.Dropout(dropout),
                nn.ReLU(),
                nn.Linear(hidden, hidden),
                nn.Dropout(dropout),
                nn.ReLU(),
                nn.Linear(hidden, 1)
            )

    def forward(self, x_num, x_cat):
        embs = [embedding(x_cat[:, i]) for i, embedding in enumerate(self.embs)]
        x_cat_emb = self.dropout_cat(torch.cat(embs, 1))
        x_all = torch.cat([x_num, x_cat_emb], 1)
        x = self.sequence(x_all)
        return torch.squeeze(x)

#Embeddings for categorical variables (nn.Embedding).
#Dropout to prevent overfitting.
#Batch Normalization (optional) for faster convergence and better training stability.
#ReLU activation function for non-linearity.
#This model is used for regression, where the final output is a single predicted value.

    
class CNN(nn.Module):
    def __init__(self,
                 num_features: int,
                 hidden_size: int,
                 n_categories: List[int],
                 emb_dim: int = 10,
                 dropout_cat: float = 0.2,
                 channel_1: int = 256,
                 channel_2: int = 512,
                 channel_3: int = 512,
                 dropout_top: float = 0.1,
                 dropout_mid: float = 0.3,
                 dropout_bottom: float = 0.2,
                 weight_norm: bool = True,
                 two_stage: bool = True,
                 celu: bool = True,
                 kernel1: int = 5,
                 leaky_relu: bool = False):
        super().__init__()

        num_targets = 1

        cha_1_reshape = int(hidden_size / channel_1)
        cha_po_1 = int(hidden_size / channel_1 / 2)
        cha_po_2 = int(hidden_size / channel_1 / 2 / 2) * channel_3

        self.cat_dim = emb_dim * len(n_categories)
        self.cha_1 = channel_1
        self.cha_2 = channel_2
        self.cha_3 = channel_3
        self.cha_1_reshape = cha_1_reshape
        self.cha_po_1 = cha_po_1
        self.cha_po_2 = cha_po_2
        self.two_stage = two_stage

        self.expand = nn.Sequential(
            nn.BatchNorm1d(num_features + self.cat_dim),
            nn.Dropout(dropout_top),
            nn.utils.weight_norm(nn.Linear(num_features + self.cat_dim, hidden_size), dim=None),
            nn.CELU(0.06) if celu else nn.ReLU()
        )

        def _norm(layer, dim=None):
            return nn.utils.weight_norm(layer, dim=dim) if weight_norm else layer

        self.conv1 = nn.Sequential(
            nn.BatchNorm1d(channel_1),
            nn.Dropout(dropout_top),
            _norm(nn.Conv1d(channel_1, channel_2, kernel_size=kernel1, stride=1, padding=kernel1 // 2, bias=False)),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(output_size=cha_po_1),
            nn.BatchNorm1d(channel_2),
            nn.Dropout(dropout_top),
            _norm(nn.Conv1d(channel_2, channel_2, kernel_size=3, stride=1, padding=1, bias=True)),
            nn.ReLU()
        )

        if self.two_stage:
            self.conv2 = nn.Sequential(
                nn.BatchNorm1d(channel_2),
                nn.Dropout(dropout_mid),
                _norm(nn.Conv1d(channel_2, channel_2, kernel_size=3, stride=1, padding=1, bias=True)),
                nn.ReLU(),
                nn.BatchNorm1d(channel_2),
                nn.Dropout(dropout_bottom),
                _norm(nn.Conv1d(channel_2, channel_3, kernel_size=5, stride=1, padding=2, bias=True)),
                nn.ReLU()
            )

        self.max_po_c2 = nn.MaxPool1d(kernel_size=4, stride=2, padding=1)

        self.flt = nn.Flatten()

        if leaky_relu:
            self.dense = nn.Sequential(
                nn.BatchNorm1d(cha_po_2),
                nn.Dropout(dropout_bottom),
                _norm(nn.Linear(cha_po_2, num_targets), dim=0),
                nn.LeakyReLU()
            )
        else:
            self.dense = nn.Sequential(
                nn.BatchNorm1d(cha_po_2),
                nn.Dropout(dropout_bottom),
                _norm(nn.Linear(cha_po_2, num_targets), dim=0)
            )

        self.embs = nn.ModuleList([nn.Embedding(x, emb_dim) for x in n_categories])
        self.cat_dim = emb_dim * len(n_categories)
        self.dropout_cat = nn.Dropout(dropout_cat)

    def forward(self, x_num, x_cat):
        embs = [embedding(x_cat[:, i]) for i, embedding in enumerate(self.embs)]
        x_cat_emb = self.dropout_cat(torch.cat(embs, 1))
        x = torch.cat([x_num, x_cat_emb], 1)

        x = self.expand(x)

        x = x.reshape(x.shape[0], self.cha_1, self.cha_1_reshape)

        x = self.conv1(x)

        if self.two_stage:
            x = self.conv2(x) * x

        x = self.max_po_c2(x)
        x = self.flt(x)
        x = self.dense(x)

        return torch.squeeze(x)
#The CNN class defines a convolutional neural network (CNN) designed for tabular data. Similar to the MLP, it uses:
    #Embedding layers for categorical features.
    #Convolutional layers for learning spatial hierarchies in data (commonly used for image data but also applied here).
    #Pooling layers for downsampling the data.
    #This CNN can handle tabular data and learn complex interactions between features.

def preprocess_nn(
        X: pd.DataFrame,
        scaler: Optional[StandardScaler] = None,
        scaler_type: str = 'standard',
        n_pca: int = -1,
        na_cols: bool = True):
    if na_cols:
        #for c in X.columns:
        for c in null_check_cols:
            if c in X.columns:
                X[f"{c}_isnull"] = X[c].isnull().astype(int)

    cat_cols = [c for c in X.columns if c in ['time_id', 'stock_id']]
    num_cols = [c for c in X.columns if c not in cat_cols]

    X_num = X[num_cols].values.astype(np.float32)
    X_cat = np.nan_to_num(X[cat_cols].values.astype(np.int32))

    def _pca(X_num_):
        if n_pca > 0:
            pca = PCA(n_components=n_pca, random_state=0)
            return pca.fit_transform(X_num)
        return X_num

    if scaler is None:
        scaler = StandardScaler()
        X_num = scaler.fit_transform(X_num)
        X_num = np.nan_to_num(X_num, posinf=0, neginf=0)
        return _pca(X_num), X_cat, cat_cols, scaler
    else:
        X_num = scaler.transform(X_num) 
        X_num = np.nan_to_num(X_num, posinf=0, neginf=0)
        return _pca(X_num), X_cat, cat_cols


#This function preprocesses the tabular data. It does the following:
    #Creates binary columns indicating whether specific features have null values.
    #Separates numerical and categorical columns.
    #Scales the numerical features using StandardScaler or any other scaling method specified.
    #Optionally reduces dimensionality using PCA if n_pca > 0.

def train_epoch(data_loader: DataLoader,
                model: nn.Module,
                optimizer,
                scheduler,
                device,
                clip_grad: float = 1.5):
    model.train()
    losses = AverageMeter()
    step = 0

    for x_num, x_cat, y in tqdm(data_loader, position=0, leave=True, desc='Training'):
        batch_size = x_num.size(0)
        x_num = x_num.to(device, dtype=torch.float)
        x_cat = x_cat.to(device)
        y = y.to(device, dtype=torch.float)
        y_pred = model(x_num, x_cat)
        
        loss = rmspe_loss(y, model(x_num, x_cat))
        losses.update(loss.detach().cpu().numpy(), batch_size)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)
        optimizer.step()
        optimizer.zero_grad()

        # Scheduler step needs a metric to adjust the learning rate
        if scheduler is not None:
            scheduler.step(loss)  # Pass validation loss or any relevant metric here

        step += 1

    return losses.avg
#This function trains the model for one epoch. It:
    #Loads batches of data.
    #Computes the loss using rmspe_loss.
    #Performs backpropagation and updates the model weights.
    #Optionally applies gradient clipping to prevent exploding gradients.

def evaluate(data_loader: DataLoader, model, device):
    model.eval()

    losses = AverageMeter()

    final_targets = []
    final_outputs = []

    with torch.no_grad():
        for x_num, x_cat, y in tqdm(data_loader, position=0, leave=True, desc='Evaluating'):
            batch_size = x_num.size(0)
            x_num = x_num.to(device, dtype=torch.float)
            x_cat = x_cat.to(device)
            y = y.to(device, dtype=torch.float)

            with torch.no_grad():
                output = model(x_num, x_cat)

            loss = rmspe_loss(y, output)
            # record loss
            losses.update(loss.detach().cpu().numpy(), batch_size)

            targets = y.detach().cpu().numpy()
            output = output.detach().cpu().numpy()

            final_targets.append(targets)
            final_outputs.append(output)

    final_targets = np.concatenate(final_targets)
    final_outputs = np.concatenate(final_outputs)

    try:
        metric = rmspe_metric(final_targets, final_outputs)
    except:
        metric = None

    return final_outputs, final_targets, losses.avg, metric

#evaluates the model's performance on the validation set. It computes the RMSPE for the predictions and returns the final predictions, targets and loss.

def predict_nn(X: pd.DataFrame,
               model: Union[List[MLP], MLP],
               scaler: StandardScaler,
               device,
               ensemble_method='mean'):
    if not isinstance(model, list):
        model = [model]

    for m in model:
        m.eval()
    X_num, X_cat, cat_cols = preprocess_nn(X.copy(), scaler=scaler)
    valid_dataset = TabularDataset(X_num, X_cat, None)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=512,
                                               shuffle=False,
                                               num_workers=4)

    final_outputs = []

    with torch.no_grad():
        for x_num, x_cat in tqdm(valid_loader, position=0, leave=True, desc='Evaluating'):
            x_num = x_num.to(device, dtype=torch.float)
            x_cat = x_cat.to(device)

            outputs = []
            with torch.no_grad():
                for m in model:
                    output = m(x_num, x_cat)
                    outputs.append(output.detach().cpu().numpy())

            if ensemble_method == 'median':
                pred = np.nanmedian(np.array(outputs), axis=0)
            else:
                pred = np.array(outputs).mean(axis=0)
            final_outputs.append(pred)

    final_outputs = np.concatenate(final_outputs)
    return final_outputs


#This function makes predictions using the trained MLP (or an ensemble of MLPs). It takes the input data, preprocesses it, and computes predictions. 
#The predictions can be averaged or take the median from an ensemble of models.


def predict_tabnet(X: pd.DataFrame,
                   model: Union[List[TabNetRegressor], TabNetRegressor],
                   scaler: StandardScaler,
                   ensemble_method='mean'):
    if not isinstance(model, list):
        model = [model]

    X_num, X_cat, cat_cols = preprocess_nn(X.copy(), scaler=scaler)
    X_processed = np.concatenate([X_cat, X_num], axis=1)

    predicted = []
    for m in model:
        predicted.append(m.predict(X_processed))

    if ensemble_method == 'median':
        pred = np.nanmedian(np.array(predicted), axis=0)
    else:
        pred = np.array(predicted).mean(axis=0)

    return pred


def train_tabnet(X: pd.DataFrame,
                 y: pd.DataFrame,
                 folds: List[Tuple],
                 batch_size: int = 1024,
                 lr: float = 1e-3,
                 model_path: str = 'fold_{}.pth',
                 scaler_type: str = 'standard',
                 output_dir: str = 'artifacts',
                 epochs: int = 250,
                 seed: int = 42,
                 n_pca: int = -1,
                 na_cols: bool = True,
                 patience: int = 10,
                 factor: float = 0.5,
                 gamma: float = 2.0,
                 lambda_sparse: float = 8.0,
                 n_steps: int = 2,
                 scheduler_type: str = 'cosine',
                 n_a: int = 16):
    seed_everything(seed)

    os.makedirs(output_dir, exist_ok=True)

    y = y.values.astype(np.float32)
    X_num, X_cat, cat_cols, scaler = preprocess_nn(X.copy(), scaler_type=scaler_type, n_pca=n_pca, na_cols=na_cols)

    best_losses = []
    best_predictions = []

    for cv_idx, (train_idx, valid_idx) in enumerate(folds):
        X_tr, X_va = X_num[train_idx], X_num[valid_idx]
        X_tr_cat, X_va_cat = X_cat[train_idx], X_cat[valid_idx]
        y_tr, y_va = y[train_idx], y[valid_idx]
        y_tr = y_tr.reshape(-1,1)
        y_va = y_va.reshape(-1,1)
        X_tr = np.concatenate([X_tr_cat, X_tr], axis=1)
        X_va = np.concatenate([X_va_cat, X_va], axis=1)

        cat_idxs = [0]
        cat_dims = [128]

        if scheduler_type == 'cosine':
            scheduler_params = dict(T_0=200, T_mult=1, eta_min=1e-4, last_epoch=-1, verbose=False)
            scheduler_fn = CosineAnnealingWarmRestarts
        else:
            scheduler_params = {'mode': 'min', 'min_lr': 1e-7, 'patience': patience, 'factor': factor, 'verbose': True}
            scheduler_fn = torch.optim.lr_scheduler.ReduceLROnPlateau

        model = TabNetRegressor(
            cat_idxs=cat_idxs,
            cat_dims=cat_dims,
            cat_emb_dim=1,
            n_d=n_a,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            n_independent=2,
            n_shared=2,
            lambda_sparse=lambda_sparse,
            optimizer_fn=torch.optim.Adam,
            optimizer_params={'lr': lr},
            mask_type="entmax",
            scheduler_fn=scheduler_fn,
            scheduler_params=scheduler_params,
            seed=seed,
            verbose=10
            #device_name=device,
            #clip_value=1.5
        )

        model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], max_epochs=epochs, patience=50, batch_size=1024*20,
                  virtual_batch_size=batch_size, num_workers=4, drop_last=False, eval_metric=[RMSPE], loss_fn=RMSPELoss_Tabnet)

        path = os.path.join(output_dir, model_path.format(cv_idx))
        model.save_model(path)

        predicted = model.predict(X_va)

        rmspe = rmspe_metric(y_va, predicted)
        best_losses.append(rmspe)
        best_predictions.append(predicted)

    return best_losses, best_predictions, scaler, model
    
#This function trains a TabNet model on the training data. It uses cross-validation and stores the best models and their predictions. 
#It also applies learning rate schedulers to adjust the learning rate dynamically.




def train_nn(X: pd.DataFrame,
             y: pd.DataFrame,
             folds: List[Tuple],
             device,
             emb_dim: int = 25,
             batch_size: int = 1024,  # Lower batch size
             model_type: str = 'mlp',
             mlp_dropout: float = 0.1,  # Increased dropout
             mlp_hidden: int = 128,  # Reduced hidden units
             cnn_hidden: int = 128,  # Reduced CNN hidden layers
             lr: float = 1e-3,
             weight_decay: float = 0.0,
             model_path: str = 'fold_{}.pth',
             output_dir: str = 'artifacts',
             epochs: int = 10,  # Reduced epochs
             seed: int = 42,
             patience: int = 5,  # Lower patience
             factor: float = 0.5):
    seed_everything(seed)
    os.makedirs(output_dir, exist_ok=True)

    y = y.values.astype(np.float16)  # Lower precision
    X_num, X_cat, cat_cols, scaler = preprocess_nn(X.copy(), na_cols=True)
    
    best_losses = []
    best_predictions = []
    
    for cv_idx, (train_idx, valid_idx) in enumerate(folds):
        X_tr, X_va = X_num[train_idx], X_num[valid_idx]
        y_tr, y_va = y[train_idx], y[valid_idx]
        
        train_dataset = TabularDataset(X_tr, X_cat[train_idx] if X_cat is not None else None, y_tr)
        valid_dataset = TabularDataset(X_va, X_cat[valid_idx] if X_cat is not None else None, y_va)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

        # Handle n_categories correctly for MLP
        n_categories = [X_cat[:, i].max() + 1 for i in range(X_cat.shape[1])] if X_cat is not None else []
        
        model = MLP(X_tr.shape[1], n_categories=n_categories, hidden=mlp_hidden, dropout=mlp_dropout, emb_dim=emb_dim).to(device)
        opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', patience=patience, factor=factor)

        best_loss = float('inf')
        best_prediction = None
        
        for epoch in range(epochs):
            train_loss = train_epoch(train_loader, model, opt, scheduler, device)
            with torch.no_grad():  # Disable gradients in evaluation
                predictions, valid_targets, valid_loss, rmspe = evaluate(valid_loader, model, device=device)
            
            if rmspe < best_loss:
                best_loss = rmspe
                best_prediction = predictions
                torch.save(model, os.path.join(output_dir, model_path.format(cv_idx)))

        best_predictions.append(best_prediction)
        best_losses.append(best_loss)
        
        del model, train_loader, valid_loader, train_dataset, valid_dataset
        gc.collect()

    return best_losses, best_predictions, scaler


#This function trains a neural network (either MLP or CNN) on the training data. It involves:
    #Initializing the model.
    #Using an optimizer (Adam, AdamW).
    #Training for multiple epochs and evaluating the performance using RMSPE.
    #Optionally using learning rate schedulers (like CosineAnnealingWarmRestarts) for training dynamics.

In [7]:
def get_top_n_models(models, scores, top_n):
    if len(models) <= top_n:
        print('Number of models are less than top_n. All models will be used.')
        return models
    sorted_ = [(y, x) for y, x in sorted(zip(scores, models), key=lambda pair: pair[0])]
    print(f'Scores (sorted): {[y for y, _ in sorted_]}')
    return [x for _, x in sorted_][:top_n]

In [10]:
import os
import gc
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import optuna
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# ----------------------------
# QLIKE METRIC
# ----------------------------
def qlike_from_rv(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    eps = 1e-9
    exp_y_true = np.exp(y_true) + eps
    exp_y_pred = np.exp(y_pred) + eps
    qlike = (exp_y_true / exp_y_pred) - (y_true - y_pred) - 1
    return np.mean(qlike)

# ----------------------------
# MLP Model Definition
# ----------------------------
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout_rate):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        return self.model(x).squeeze()

# ----------------------------
# Dataset Wrapper
# ----------------------------
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ----------------------------
# Prepare Data
# ----------------------------
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

X_scaled = np.array(X_scaled, dtype=np.float32)
y_scaled = np.array(y_scaled, dtype=np.float32)

# ----------------------------
# Optuna Hyperparameter Search
# ----------------------------
def objective(trial):
    hidden_dim = trial.suggest_int('hidden_dim', 64, 256)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])

    model = MLP(X_scaled.shape[1], hidden_dim, dropout_rate).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    dataset = TabularDataset(torch.tensor(X_scaled), torch.tensor(y_scaled))
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    model.train()
    for epoch in range(10):
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        preds = model(torch.tensor(X_scaled).to(device)).cpu().numpy()
    return qlike_from_rv(y_scaled, preds)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

best_params = study.best_params
print("Best Parameters from Optuna:", best_params)

# ----------------------------
# Train Ensemble of MLP Models
# ----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_models = 10
ensemble_models = []

for seed in range(n_models):
    print(f"Training model {seed+1}/{n_models}")
    torch.manual_seed(seed)
    np.random.seed(seed)

    model = MLP(X_scaled.shape[1], best_params["hidden_dim"], best_params["dropout_rate"]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=best_params["lr"])
    criterion = nn.MSELoss()

    train_loader = DataLoader(TabularDataset(torch.tensor(X_scaled), torch.tensor(y_scaled)),
                              batch_size=best_params["batch_size"], shuffle=True)

    model.train()
    for epoch in range(50):
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()

    model_path = f"mlp_ensemble_model_seed{seed}.pt"
    torch.save(model.state_dict(), model_path)
    ensemble_models.append(model_path)
    print(f"Saved model {seed+1} to {model_path}")

print("All ensemble models trained and saved.")


[I 2025-04-01 22:26:07,117] A new study created in memory with name: no-name-5efe0de0-87a4-4672-9af8-84d42c34bf3f
[I 2025-04-01 22:28:50,383] Trial 0 finished with value: 0.05634798854589462 and parameters: {'hidden_dim': 78, 'dropout_rate': 0.36664184682667167, 'lr': 0.00011773611629994392, 'batch_size': 32}. Best is trial 0 with value: 0.05634798854589462.
[I 2025-04-01 22:31:58,305] Trial 1 finished with value: 0.05276545509696007 and parameters: {'hidden_dim': 205, 'dropout_rate': 0.1124315267701379, 'lr': 0.00015805704074040478, 'batch_size': 32}. Best is trial 1 with value: 0.05276545509696007.
[I 2025-04-01 22:35:52,950] Trial 2 finished with value: 0.2025771290063858 and parameters: {'hidden_dim': 232, 'dropout_rate': 0.4956638204164292, 'lr': 0.002118689676858749, 'batch_size': 32}. Best is trial 1 with value: 0.05276545509696007.
[I 2025-04-01 22:38:10,484] Trial 3 finished with value: 0.09652992337942123 and parameters: {'hidden_dim': 202, 'dropout_rate': 0.24896090210486177

Best Parameters from Optuna: {'hidden_dim': 170, 'dropout_rate': 0.10368936480428612, 'lr': 0.00045183922948921024, 'batch_size': 128}
Training model 1/10
Saved model 1 to mlp_ensemble_model_seed0.pt
Training model 2/10
Saved model 2 to mlp_ensemble_model_seed1.pt
Training model 3/10
Saved model 3 to mlp_ensemble_model_seed2.pt
Training model 4/10
Saved model 4 to mlp_ensemble_model_seed3.pt
Training model 5/10
Saved model 5 to mlp_ensemble_model_seed4.pt
Training model 6/10
Saved model 6 to mlp_ensemble_model_seed5.pt
Training model 7/10
Saved model 7 to mlp_ensemble_model_seed6.pt
Training model 8/10
Saved model 8 to mlp_ensemble_model_seed7.pt
Training model 9/10
Saved model 9 to mlp_ensemble_model_seed8.pt
Training model 10/10
Saved model 10 to mlp_ensemble_model_seed9.pt
All ensemble models trained and saved.


# LSTM Optuna Run

In [25]:
import numpy as np
import tensorflow as tf
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

# QLIKE metric
def qlike_from_rv(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    eps = 1e-9
    exp_y_true = np.exp(y_true) + eps
    exp_y_pred = np.exp(y_pred) + eps
    qlike = (exp_y_true / exp_y_pred) - (y_true - y_pred) - 1
    return np.mean(qlike)

# Prepare data
X_np = np.array(X, dtype=np.float32)
if np.isnan(X_np).any():
    X = X.fillna(method='ffill')
if np.isnan(y).any():
    print("Warning: NaN values detected in y!")

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_np)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

X_np = np.array(X_scaled, dtype=np.float32)
y_np = np.array(y_scaled, dtype=np.float32)
X_np = X_np.reshape((X_np.shape[0], 1, X_np.shape[1]))

# Optuna objective using QLIKE
def objective(trial):
    lstm_units_1 = trial.suggest_int("lstm_units_1", 32, 128)
    lstm_units_2 = trial.suggest_int("lstm_units_2", 16, 64)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    epochs = 20

    fold_scores = []

    for train_idx, valid_idx in folds:
        X_train, X_valid = X_np[train_idx], X_np[valid_idx]
        y_train, y_valid = y_np[train_idx], y_np[valid_idx]

        model = Sequential([
            tf.keras.layers.Input(shape=(1, X_train.shape[2])),
            LSTM(lstm_units_1, return_sequences=True),
            Dropout(dropout_rate),
            LSTM(lstm_units_2, return_sequences=False),
            Dense(16, activation="relu"),
            Dense(1, activation="linear")
        ])

        optimizer = Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss="mse")

        model.fit(
            X_train, y_train,
            validation_data=(X_valid, y_valid),
            epochs=epochs,
            batch_size=batch_size,
            verbose=0
        )

        y_pred = model.predict(X_valid).flatten()
        score = qlike_from_rv(y_valid, y_pred)
        fold_scores.append(score)

    return np.mean(fold_scores)

# Run Optuna tuning
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)

# Best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model on full data
final_model = Sequential([
    tf.keras.layers.Input(shape=(1, X_np.shape[2])),
    LSTM(best_params["lstm_units_1"], return_sequences=True),
    Dropout(best_params["dropout_rate"]),
    LSTM(best_params["lstm_units_2"], return_sequences=False),
    Dense(16, activation="relu"),
    Dense(1, activation="linear")
])

final_optimizer = Adam(learning_rate=best_params["learning_rate"])
final_model.compile(optimizer=final_optimizer, loss="mse")
final_model.fit(X_np, y_np, epochs=50, batch_size=best_params["batch_size"], verbose=1)

# Save model
final_model.save("lstm_optuna_model.h5")
print("LSTM Training Completed and Model Saved.")


[I 2025-03-30 20:26:58,095] A new study created in memory with name: no-name-d80447f4-e110-4bdd-b0c9-83e8074b78f5


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 995us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


[I 2025-03-30 20:50:08,030] Trial 0 finished with value: 0.0508580282330513 and parameters: {'lstm_units_1': 116, 'lstm_units_2': 21, 'dropout_rate': 0.1735857065047698, 'learning_rate': 0.00026493359085830637, 'batch_size': 64}. Best is trial 0 with value: 0.0508580282330513.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


[I 2025-03-30 21:06:11,605] Trial 1 finished with value: 0.0527987964451313 and parameters: {'lstm_units_1': 114, 'lstm_units_2': 37, 'dropout_rate': 0.39883280196430393, 'learning_rate': 0.0008593721322829184, 'batch_size': 128}. Best is trial 0 with value: 0.0508580282330513.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 986us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 980us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 992us/step


[I 2025-03-30 21:37:42,716] Trial 2 finished with value: 0.06253718584775925 and parameters: {'lstm_units_1': 72, 'lstm_units_2': 54, 'dropout_rate': 0.4017988770435714, 'learning_rate': 0.0037526567714114105, 'batch_size': 32}. Best is trial 0 with value: 0.0508580282330513.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 941us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 922us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 972us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 916us/step


[I 2025-03-30 21:47:12,267] Trial 3 finished with value: 0.05378023907542229 and parameters: {'lstm_units_1': 58, 'lstm_units_2': 36, 'dropout_rate': 0.3596619776122173, 'learning_rate': 0.0003223752539092304, 'batch_size': 128}. Best is trial 0 with value: 0.0508580282330513.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 964us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 967us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 960us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 946us/step


[I 2025-03-30 22:25:19,749] Trial 4 finished with value: 0.05303065478801727 and parameters: {'lstm_units_1': 110, 'lstm_units_2': 25, 'dropout_rate': 0.38366104446507043, 'learning_rate': 0.0006051654681774414, 'batch_size': 32}. Best is trial 0 with value: 0.0508580282330513.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 993us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 979us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step  
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 996us/step


[I 2025-03-30 23:10:43,537] Trial 5 finished with value: 0.050663575530052185 and parameters: {'lstm_units_1': 109, 'lstm_units_2': 47, 'dropout_rate': 0.11768495972706439, 'learning_rate': 0.00017818644728731514, 'batch_size': 32}. Best is trial 5 with value: 0.050663575530052185.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 967us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 955us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 966us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 983us/step


[I 2025-03-30 23:41:55,780] Trial 6 finished with value: 0.052009325474500656 and parameters: {'lstm_units_1': 98, 'lstm_units_2': 16, 'dropout_rate': 0.4180573050430745, 'learning_rate': 0.00027965666854605416, 'batch_size': 32}. Best is trial 5 with value: 0.050663575530052185.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 872us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 886us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 887us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 906us/step


[I 2025-03-30 23:50:31,156] Trial 7 finished with value: 0.0501602441072464 and parameters: {'lstm_units_1': 40, 'lstm_units_2': 51, 'dropout_rate': 0.16302829280601264, 'learning_rate': 0.00011268118341176139, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


[I 2025-03-31 00:03:10,126] Trial 8 finished with value: 0.052093956619501114 and parameters: {'lstm_units_1': 101, 'lstm_units_2': 40, 'dropout_rate': 0.38429715066057113, 'learning_rate': 0.0008516513649835571, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 970us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 921us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 896us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 927us/step


[I 2025-03-31 00:13:40,608] Trial 9 finished with value: 0.07908257842063904 and parameters: {'lstm_units_1': 65, 'lstm_units_2': 29, 'dropout_rate': 0.48824532162745615, 'learning_rate': 0.0006582552448147639, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 925us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 893us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 880us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 930us/step


[I 2025-03-31 00:27:45,516] Trial 10 finished with value: 0.05267965793609619 and parameters: {'lstm_units_1': 39, 'lstm_units_2': 63, 'dropout_rate': 0.2381433835710563, 'learning_rate': 0.00010865769026922112, 'batch_size': 64}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 955us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 899us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 895us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 879us/step


[I 2025-03-31 00:51:46,960] Trial 11 finished with value: 0.050772227346897125 and parameters: {'lstm_units_1': 34, 'lstm_units_2': 47, 'dropout_rate': 0.10756636734831823, 'learning_rate': 0.00011205252406316834, 'batch_size': 32}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 963us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 982us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 982us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 982us/step


[I 2025-03-31 01:04:08,171] Trial 12 finished with value: 0.056907590478658676 and parameters: {'lstm_units_1': 86, 'lstm_units_2': 50, 'dropout_rate': 0.1018344628826725, 'learning_rate': 0.003323565591945337, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 913us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 922us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 917us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 905us/step


[I 2025-03-31 01:30:38,427] Trial 13 finished with value: 0.07079530507326126 and parameters: {'lstm_units_1': 52, 'lstm_units_2': 60, 'dropout_rate': 0.20762949592295615, 'learning_rate': 0.007833733863957564, 'batch_size': 32}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


[I 2025-03-31 02:03:05,976] Trial 14 finished with value: 0.05071312189102173 and parameters: {'lstm_units_1': 128, 'lstm_units_2': 47, 'dropout_rate': 0.27407816667767815, 'learning_rate': 0.00017487160575908555, 'batch_size': 64}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 985us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 949us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 976us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 968us/step


[I 2025-03-31 02:15:16,396] Trial 15 finished with value: 0.05388972908258438 and parameters: {'lstm_units_1': 79, 'lstm_units_2': 55, 'dropout_rate': 0.1589835134747519, 'learning_rate': 0.0016021672069807538, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 956us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 972us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 984us/step


[I 2025-03-31 02:46:45,159] Trial 16 finished with value: 0.05037800222635269 and parameters: {'lstm_units_1': 92, 'lstm_units_2': 46, 'dropout_rate': 0.1477277387199562, 'learning_rate': 0.00017091486267337708, 'batch_size': 32}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 978us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 961us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 986us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 967us/step


[I 2025-03-31 02:58:14,740] Trial 17 finished with value: 0.051070116460323334 and parameters: {'lstm_units_1': 90, 'lstm_units_2': 31, 'dropout_rate': 0.3086800988363981, 'learning_rate': 0.00043974485431527434, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 923us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 892us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 901us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 910us/step


[I 2025-03-31 03:24:29,546] Trial 18 finished with value: 0.05146744102239609 and parameters: {'lstm_units_1': 46, 'lstm_units_2': 42, 'dropout_rate': 0.17866962794910687, 'learning_rate': 0.00018335780159762106, 'batch_size': 32}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 977us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 972us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 987us/step


[I 2025-03-31 03:41:58,458] Trial 19 finished with value: 0.058032866567373276 and parameters: {'lstm_units_1': 73, 'lstm_units_2': 57, 'dropout_rate': 0.24225491748882563, 'learning_rate': 0.001606747151615237, 'batch_size': 64}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 933us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 927us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 939us/step


[I 2025-03-31 03:52:03,538] Trial 20 finished with value: 0.05021350085735321 and parameters: {'lstm_units_1': 60, 'lstm_units_2': 52, 'dropout_rate': 0.3116454554244704, 'learning_rate': 0.00011088432241813655, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 941us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 931us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 959us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step  


[I 2025-03-31 04:02:22,075] Trial 21 finished with value: 0.05034686625003815 and parameters: {'lstm_units_1': 59, 'lstm_units_2': 51, 'dropout_rate': 0.30896637428726176, 'learning_rate': 0.00011523237820084088, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 923us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 940us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 936us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 955us/step


[I 2025-03-31 04:12:38,003] Trial 22 finished with value: 0.05134715512394905 and parameters: {'lstm_units_1': 60, 'lstm_units_2': 53, 'dropout_rate': 0.32229190649468686, 'learning_rate': 0.00010226068131120033, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 927us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 939us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 946us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 940us/step


[I 2025-03-31 04:22:19,085] Trial 23 finished with value: 0.05175969749689102 and parameters: {'lstm_units_1': 49, 'lstm_units_2': 60, 'dropout_rate': 0.3384919504864672, 'learning_rate': 0.00013960304310834325, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 908us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 947us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 927us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 934us/step


[I 2025-03-31 04:31:13,720] Trial 24 finished with value: 0.057025182992219925 and parameters: {'lstm_units_1': 41, 'lstm_units_2': 51, 'dropout_rate': 0.29163778705873633, 'learning_rate': 0.00025903255900694677, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 945us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 955us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 948us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 961us/step


[I 2025-03-31 04:41:42,636] Trial 25 finished with value: 0.051118403673172 and parameters: {'lstm_units_1': 64, 'lstm_units_2': 42, 'dropout_rate': 0.2642447514876116, 'learning_rate': 0.0003539457503847089, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 968us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 934us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 937us/step


[I 2025-03-31 04:51:57,069] Trial 26 finished with value: 0.05328996479511261 and parameters: {'lstm_units_1': 55, 'lstm_units_2': 59, 'dropout_rate': 0.2120786473584095, 'learning_rate': 0.000472032576462358, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 902us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 901us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 882us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 897us/step


[I 2025-03-31 05:00:02,984] Trial 27 finished with value: 0.08142340928316116 and parameters: {'lstm_units_1': 32, 'lstm_units_2': 63, 'dropout_rate': 0.35020199088534704, 'learning_rate': 0.00021904738962278444, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 942us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 921us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 942us/step


[I 2025-03-31 05:09:22,070] Trial 28 finished with value: 0.06322826445102692 and parameters: {'lstm_units_1': 43, 'lstm_units_2': 51, 'dropout_rate': 0.45855888601964784, 'learning_rate': 0.00014043955562628588, 'batch_size': 128}. Best is trial 7 with value: 0.0501602441072464.


[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 947us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 944us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 953us/step
[1m1341/1341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 952us/step


[I 2025-03-31 05:26:07,779] Trial 29 finished with value: 0.050079721957445145 and parameters: {'lstm_units_1': 69, 'lstm_units_2': 44, 'dropout_rate': 0.3020399589467795, 'learning_rate': 0.00013315950716143908, 'batch_size': 64}. Best is trial 29 with value: 0.050079721957445145.


Best Hyperparameters: {'lstm_units_1': 69, 'lstm_units_2': 44, 'dropout_rate': 0.3020399589467795, 'learning_rate': 0.00013315950716143908, 'batch_size': 64}
Epoch 1/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - loss: 0.1625
Epoch 2/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1096
Epoch 3/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1065
Epoch 4/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1051
Epoch 5/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1034
Epoch 6/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1031
Epoch 7/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1019
Epoch 8/50
[1m6703/6703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.1020
Epoch 9/50



LSTM Training Completed and Model Saved.
