In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, make_scorer, mean_absolute_percentage_error
from scipy.stats import loguniform
import joblib
import tensorflow as tf
from sklearn.model_selection import ParameterSampler

# Expand display options
pd.set_option("display.max_rows", None)  # Show all rows
pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.max_colwidth", None)  # Do not truncate column text
pd.set_option("display.expand_frame_repr", False)  # Avoid line wrapping

tf.config.threading.set_intra_op_parallelism_threads(18)
tf.config.threading.set_inter_op_parallelism_threads(18)

import optuna
from optuna.integration import TFKerasPruningCallback

In [2]:
df_OHE = pd.read_csv('../DataSet/RegressionData/healthinsurance_OHE.csv')
print(df_OHE.shape)

(13648, 146)


In [3]:
X = df_OHE.drop(columns='claim')
y = df_OHE['claim']

# First split: 70% Training, 30% Temporary (Validation + Test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=69)

# Second split: Splitting the 30% temporary set into 15% Validation and 15% Test
X_eval, X_test, y_eval, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=69)

# Standardization
scaler = StandardScaler()
scaler.fit(X_train)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Transform the datasets
X_train_scaled = scaler.transform(X_train)
X_eval_scaled = scaler.transform(X_eval)
X_test_scaled = scaler.transform(X_test)

#print dist.
print('Training set: ', X_train_scaled.shape)
print('Validation set: ', X_eval_scaled.shape)
print('Test set: ', X_test_scaled.shape)

Training set:  (9553, 145)
Validation set:  (2047, 145)
Test set:  (2048, 145)


In [12]:
import tensorflow as tf
import optuna
from optuna.integration import TFKerasPruningCallback

# Assume these are defined elsewhere:
# X, y, X_train_scaled, y_train, X_eval_scaled, y_eval, X_test, y_test
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()  # Cache data in memory to speed up later epochs
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    
    model = tf.keras.Sequential([tf.keras.Input(shape=(input_dim,))])
    
    # First Dense layer with batch normalization, activation, and dropout.
    model.add(tf.keras.layers.Dense(
        neurons,
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Additional hidden layers.
    for _ in range(hidden_layers - 1):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
        
    # Output layer.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select and configure the optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")

    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets (only once, outside objective).
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test, y_test, batch_size=64, shuffle=False, cache_data=True)

def objective(trial):
    """Objective function for Optuna hyperparameter tuning."""
    # ---------------------
    # 1) Define hyperparameter search space
    # ---------------------
    hidden_layers = trial.suggest_int("hidden_layers", 5, 30)
    neurons = trial.suggest_categorical("neurons", [20, 40, 60, 80, 100])
    optimizer_choice = trial.suggest_categorical("optimizer", ["adamW", "Nadam", "Adam"])
    learning_rate = trial.suggest_float("learning_rate", 0.0005, 0.005, log=True)
    regularization = trial.suggest_categorical("regularization", [0.0001, 0.001, 0.01])
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.3)
    
    # ---------------------
    # 2) Build the model
    # ---------------------
    model = create_model(hidden_layers, neurons, optimizer_choice, learning_rate, regularization, dropout_rate)
    
    # ---------------------
    # 3) Define callbacks (each trial gets its own set)
    # ---------------------
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=20,
        restore_best_weights=True,
        verbose=0
    )
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=0
    )
    # Only the pruning callback is used for trial-level evaluation.
    pruning_callback = TFKerasPruningCallback(trial, monitor='val_mean_absolute_percentage_error')
    
    # ---------------------
    # 4) Train the model
    # ---------------------
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,  
        callbacks=[early_stop, lr_scheduler, pruning_callback]
    )
    
    # ---------------------
    # 5) Evaluate the model
    # ---------------------
    eval_score = model.evaluate(eval_dataset, verbose=0)
    eval_mape = eval_score[3]  # [loss, mae, rmse, mape]
    
    return eval_mape

# ---------------------
# Create & run the Optuna study
# ---------------------
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# ---------------------
# Retrieve the best 10 hyperparameter sets and the best trial overall
# ---------------------
top_trials = sorted(study.trials, key=lambda t: t.value)[:10]
best_params_list = [trial.params for trial in top_trials]

print("\nTop 10 Hyperparameter Sets:")
for i, params in enumerate(best_params_list, 1):
    print(f"Rank {i}: {params}")

best_trial = study.best_trial
best_params = best_trial.params

print("\nBest Hyperparameters:")
print(best_params)
print(f"Best Eval MAPE: {best_trial.value:.2f}%")

# ---------------------
# Build best model using best parameters
# ---------------------
best_model = create_model(**best_params)

# ---------------------
# Define final callbacks for training the best model
# ---------------------
final_early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_mean_absolute_percentage_error',
    patience=10,
    restore_best_weights=True,
    verbose=1
)
final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_mean_absolute_percentage_error',
    factor=0.5,
    patience=5,
    verbose=1
)
# Save only the best model during final training.
final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_model_final.keras',
    monitor='val_mean_absolute_percentage_error',
    save_best_only=True,
    verbose=1
)

# ---------------------
# Train best model fully (only the best final model is saved)
# ---------------------
best_model.fit(
    train_dataset,
    validation_data=eval_dataset,
    epochs=200,
    verbose=0,
    callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
)

# ---------------------
# Evaluate final model
# ---------------------
train_score = best_model.evaluate(train_dataset, verbose=0)
eval_score  = best_model.evaluate(eval_dataset, verbose=0)
test_score  = best_model.evaluate(test_dataset, verbose=0)

print("\n--- Performance of the Best Model ---")
print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, "
      f"RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, "
      f"RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, "
      f"RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%")



[I 2025-02-17 21:06:30,480] A new study created in memory with name: no-name-62a5b0e6-28b8-4932-89f7-6d4bc21b55b8


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 64ms/step - loss: 321381984.0000 - mae: 13319.4541 - mean_absolute_percentage_error: 99.9977 - root_mean_squared_error: 17924.4766 - val_loss: 326992672.0000 - val_mae: 13445.4932 - val_mean_absolute_percentage_error: 99.9946 - val_root_mean_squared_error: 18082.9395 - learning_rate: 6.2353e-04
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 37ms/step - loss: 321334528.0000 - mae: 13317.7002 - mean_absolute_percentage_error: 99.9688 - root_mean_squared_error: 17923.1523 - val_loss: 326977472.0000 - val_mae: 13444.9521 - val_mean_absolute_percentage_error: 99.9859 - val_root_mean_squared_error: 18082.5176 - learning_rate: 6.2353e-04
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - loss: 321272512.0000 - mae: 13315.3496 - mean_absolute_percentage_error: 99.9297 - root_mean_squared_error: 17921.4238 - val_loss: 326975264.0000 - val_mae: 1344

[I 2025-02-17 21:20:26,220] Trial 0 finished with value: 26.995189666748047 and parameters: {'hidden_layers': 25, 'neurons': 40, 'optimizer': 'Adam', 'learning_rate': 0.0006235320682528795, 'regularization': 0.0001, 'dropout_rate': 0.24200452741377398}. Best is trial 0 with value: 26.995189666748047.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 15ms/step - loss: 321357632.0000 - mae: 13319.3496 - mean_absolute_percentage_error: 100.0039 - root_mean_squared_error: 17923.8008 - val_loss: 325098432.0000 - val_mae: 13420.0264 - val_mean_absolute_percentage_error: 99.9821 - val_root_mean_squared_error: 18030.4863 - learning_rate: 0.0011
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 321070080.0000 - mae: 13315.7168 - mean_absolute_percentage_error: 100.0138 - root_mean_squared_error: 17915.7812 - val_loss: 325385600.0000 - val_mae: 13426.4678 - val_mean_absolute_percentage_error: 100.0534 - val_root_mean_squared_error: 18038.4473 - learning_rate: 0.0011
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 320666880.0000 - mae: 13309.0537 - mean_absolute_percentage_error: 99.9927 - root_mean_squared_error: 17904.5293 - val_loss: 325144768.0000 - val_mae: 13421.6943 

[I 2025-02-17 21:23:37,940] Trial 1 finished with value: 22.102413177490234 and parameters: {'hidden_layers': 12, 'neurons': 40, 'optimizer': 'adamW', 'learning_rate': 0.0011462773920085047, 'regularization': 0.01, 'dropout_rate': 0.11283462245892283}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 15ms/step - loss: 321380544.0000 - mae: 13319.4092 - mean_absolute_percentage_error: 99.9967 - root_mean_squared_error: 17924.4355 - val_loss: 326989664.0000 - val_mae: 13445.4580 - val_mean_absolute_percentage_error: 99.9947 - val_root_mean_squared_error: 18082.8555 - learning_rate: 7.0136e-04
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 321312512.0000 - mae: 13317.2607 - mean_absolute_percentage_error: 99.9657 - root_mean_squared_error: 17922.5391 - val_loss: 326671520.0000 - val_mae: 13442.7344 - val_mean_absolute_percentage_error: 100.0188 - val_root_mean_squared_error: 18074.0566 - learning_rate: 7.0136e-04
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 321127520.0000 - mae: 13314.5273 - mean_absolute_percentage_error: 99.9590 - root_mean_squared_error: 17917.3828 - val_loss: 325624608.0000 - val_mae: 13431

[I 2025-02-17 21:28:41,323] Trial 2 finished with value: 25.553466796875 and parameters: {'hidden_layers': 15, 'neurons': 40, 'optimizer': 'Adam', 'learning_rate': 0.000701359244173059, 'regularization': 0.01, 'dropout_rate': 0.24332207092197924}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 22ms/step - loss: 321356096.0000 - mae: 13318.4941 - mean_absolute_percentage_error: 99.9814 - root_mean_squared_error: 17923.7559 - val_loss: 327002784.0000 - val_mae: 13445.9756 - val_mean_absolute_percentage_error: 100.0037 - val_root_mean_squared_error: 18083.2188 - learning_rate: 0.0012
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 321165696.0000 - mae: 13312.4648 - mean_absolute_percentage_error: 99.8933 - root_mean_squared_error: 17918.4492 - val_loss: 307279200.0000 - val_mae: 13191.1416 - val_mean_absolute_percentage_error: 99.5441 - val_root_mean_squared_error: 17529.3809 - learning_rate: 0.0012
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 320328608.0000 - mae: 13299.5859 - mean_absolute_percentage_error: 99.8458 - root_mean_squared_error: 17895.0957 - val_loss: 319629184.0000 - val_mae: 13374.6943 

[I 2025-02-17 21:33:50,448] Trial 3 finished with value: 25.470481872558594 and parameters: {'hidden_layers': 26, 'neurons': 60, 'optimizer': 'adamW', 'learning_rate': 0.0012219641237962825, 'regularization': 0.01, 'dropout_rate': 0.25670461021929925}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - loss: 321382944.0000 - mae: 13319.5488 - mean_absolute_percentage_error: 99.9994 - root_mean_squared_error: 17924.5039 - val_loss: 326870336.0000 - val_mae: 13443.4668 - val_mean_absolute_percentage_error: 99.9880 - val_root_mean_squared_error: 18079.5566 - learning_rate: 6.6362e-04
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 321256896.0000 - mae: 13317.4648 - mean_absolute_percentage_error: 99.9932 - root_mean_squared_error: 17920.9902 - val_loss: 326388640.0000 - val_mae: 13438.6074 - val_mean_absolute_percentage_error: 100.0318 - val_root_mean_squared_error: 18066.2285 - learning_rate: 6.6362e-04
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 321025792.0000 - mae: 13314.5342 - mean_absolute_percentage_error: 100.0039 - root_mean_squared_error: 17914.5410 - val_loss: 326144928.0000 - val_mae: 13435

[I 2025-02-17 21:39:43,256] Trial 4 finished with value: 26.014522552490234 and parameters: {'hidden_layers': 6, 'neurons': 40, 'optimizer': 'Nadam', 'learning_rate': 0.0006636218921879619, 'regularization': 0.0001, 'dropout_rate': 0.264737992680745}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 321127648.0000 - mae: 13313.5068 - mean_absolute_percentage_error: 99.9382 - root_mean_squared_error: 17917.3945

[I 2025-02-17 21:40:53,875] Trial 5 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 18ms/step - loss: 321215200.0000 - mae: 13315.4131 - mean_absolute_percentage_error: 99.9513 - root_mean_squared_error: 17919.8438 - val_loss: 196112736.0000 - val_mae: 11265.8252 - val_mean_absolute_percentage_error: 97.4996 - val_root_mean_squared_error: 14004.0254 - learning_rate: 0.0041
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 318579296.0000 - mae: 13269.1836 - mean_absolute_percentage_error: 99.7353 - root_mean_squared_error: 17846.2246 - val_loss: 273952256.0000 - val_mae: 12743.6240 - val_mean_absolute_percentage_error: 100.7490 - val_root_mean_squared_error: 16551.5039 - learning_rate: 0.0041
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 312338208.0000 - mae: 13145.8174 - mean_absolute_percentage_error: 99.0049 - root_mean_squared_error: 17670.5645 - val_loss: 281567904.0000 - val_mae: 12838.5664 - 

[I 2025-02-17 21:43:33,292] Trial 6 finished with value: 26.415491104125977 and parameters: {'hidden_layers': 18, 'neurons': 40, 'optimizer': 'Adam', 'learning_rate': 0.0041490826562426315, 'regularization': 0.001, 'dropout_rate': 0.19160794315459162}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m144/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 321136096.0000 - mae: 13313.9072 - mean_absolute_percentage_error: 99.9996 - root_mean_squared_error: 17917.5156

[I 2025-02-17 21:44:29,062] Trial 7 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m135/150[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - loss: 320664032.0000 - mae: 13304.6338 - mean_absolute_percentage_error: 100.0093 - root_mean_squared_error: 17904.2090

[I 2025-02-17 21:44:46,784] Trial 8 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 17ms/step - loss: 321354848.0000 - mae: 13318.4619 - mean_absolute_percentage_error: 99.9810 - root_mean_squared_error: 17923.7227 - val_loss: 325997120.0000 - val_mae: 13426.7617 - val_mean_absolute_percentage_error: 99.8295 - val_root_mean_squared_error: 18055.3906 - learning_rate: 0.0020
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 320858496.0000 - mae: 13308.1846 - mean_absolute_percentage_error: 99.8865 - root_mean_squared_error: 17909.8965 - val_loss: 302952320.0000 - val_mae: 13140.1055 - val_mean_absolute_percentage_error: 99.8092 - val_root_mean_squared_error: 17405.5254 - learning_rate: 0.0020
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 319074336.0000 - mae: 13278.1924 - mean_absolute_percentage_error: 99.7502 - root_mean_squared_error: 17860.0508 - val_loss: 315737600.0000 - val_mae: 13313.7441 - 

[I 2025-02-17 21:49:33,533] Trial 9 finished with value: 27.09002685546875 and parameters: {'hidden_layers': 20, 'neurons': 40, 'optimizer': 'Nadam', 'learning_rate': 0.0020205068347771485, 'regularization': 0.0001, 'dropout_rate': 0.22555275990215712}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 26ms/step - loss: 321351040.0000 - mae: 13318.7627 - mean_absolute_percentage_error: 99.9915 - root_mean_squared_error: 17923.6152 - val_loss: 325412928.0000 - val_mae: 13423.7090 - val_mean_absolute_percentage_error: 99.9733 - val_root_mean_squared_error: 18039.2051 - learning_rate: 0.0021
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 320954752.0000 - mae: 13312.4756 - mean_absolute_percentage_error: 99.9755 - root_mean_squared_error: 17912.5684 - val_loss: 324949248.0000 - val_mae: 13419.4961 - val_mean_absolute_percentage_error: 100.0229 - val_root_mean_squared_error: 18026.3496 - learning_rate: 0.0021
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 320217824.0000 - mae: 13299.1611 - mean_absolute_percentage_error: 99.9122 - root_mean_squared_error: 17891.9922 - val_loss: 324051744.0000 - val_mae: 13402.5947 

[I 2025-02-17 21:55:16,518] Trial 10 finished with value: 25.34774398803711 and parameters: {'hidden_layers': 12, 'neurons': 20, 'optimizer': 'adamW', 'learning_rate': 0.0021244387619638065, 'regularization': 0.01, 'dropout_rate': 0.11210120361892426}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 28ms/step - loss: 321342688.0000 - mae: 13318.4248 - mean_absolute_percentage_error: 99.9846 - root_mean_squared_error: 17923.3828 - val_loss: 325315712.0000 - val_mae: 13423.4463 - val_mean_absolute_percentage_error: 99.9663 - val_root_mean_squared_error: 18036.5098 - learning_rate: 0.0020
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - loss: 320935456.0000 - mae: 13312.2783 - mean_absolute_percentage_error: 99.9712 - root_mean_squared_error: 17912.0293 - val_loss: 324792736.0000 - val_mae: 13416.9863 - val_mean_absolute_percentage_error: 100.0110 - val_root_mean_squared_error: 18022.0078 - learning_rate: 0.0020
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 320207392.0000 - mae: 13299.1924 - mean_absolute_percentage_error: 99.9125 - root_mean_squared_error: 17891.7031 - val_loss: 324140224.0000 - val_mae: 13404.0000 

[I 2025-02-17 22:00:39,017] Trial 11 finished with value: 26.804065704345703 and parameters: {'hidden_layers': 12, 'neurons': 20, 'optimizer': 'adamW', 'learning_rate': 0.0019937176348353816, 'regularization': 0.01, 'dropout_rate': 0.10236695353133214}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m146/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 321234464.0000 - mae: 13315.9219 - mean_absolute_percentage_error: 99.9980 - root_mean_squared_error: 17920.2852

[I 2025-02-17 22:01:29,358] Trial 12 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 31ms/step - loss: 321342528.0000 - mae: 13318.1699 - mean_absolute_percentage_error: 99.9786 - root_mean_squared_error: 17923.3789 - val_loss: 321912448.0000 - val_mae: 13369.0693 - val_mean_absolute_percentage_error: 99.7139 - val_root_mean_squared_error: 17941.9180 - learning_rate: 0.0026
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 320792992.0000 - mae: 13308.6875 - mean_absolute_percentage_error: 99.9337 - root_mean_squared_error: 17908.0625 - val_loss: 322638624.0000 - val_mae: 13391.3965 - val_mean_absolute_percentage_error: 100.0439 - val_root_mean_squared_error: 17962.1445 - learning_rate: 0.0026
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 319522112.0000 - mae: 13285.7256 - mean_absolute_percentage_error: 99.8116 - root_mean_squared_error: 17872.5547 - val_loss: 321849408.0000 - val_mae: 13376.9873 

[I 2025-02-17 22:06:37,274] Trial 13 finished with value: 26.44919776916504 and parameters: {'hidden_layers': 15, 'neurons': 20, 'optimizer': 'adamW', 'learning_rate': 0.0026097709079675774, 'regularization': 0.01, 'dropout_rate': 0.15494726605168582}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 43ms/step - loss: 321351488.0000 - mae: 13318.3252 - mean_absolute_percentage_error: 99.9790 - root_mean_squared_error: 17923.6289 - val_loss: 326906496.0000 - val_mae: 13443.3867 - val_mean_absolute_percentage_error: 99.9680 - val_root_mean_squared_error: 18080.5566 - learning_rate: 0.0016
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 320999904.0000 - mae: 13308.9697 - mean_absolute_percentage_error: 99.8564 - root_mean_squared_error: 17913.8320 - val_loss: 296110976.0000 - val_mae: 13029.9932 - val_mean_absolute_percentage_error: 99.0503 - val_root_mean_squared_error: 17207.8750 - learning_rate: 0.0016
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - loss: 319545568.0000 - mae: 13285.6582 - mean_absolute_percentage_error: 99.7591 - root_mean_squared_error: 17873.2266 - val_loss: 313684288.0000 - val_mae: 13289.6484 -

[I 2025-02-17 22:14:55,357] Trial 14 finished with value: 25.86534309387207 and parameters: {'hidden_layers': 22, 'neurons': 60, 'optimizer': 'adamW', 'learning_rate': 0.0015715029303370633, 'regularization': 0.01, 'dropout_rate': 0.2988616144334861}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 321218048.0000 - mae: 13316.7539 - mean_absolute_percentage_error: 99.9934 - root_mean_squared_error: 17919.8984

[I 2025-02-17 22:15:45,267] Trial 15 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m147/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 14ms/step - loss: 321257600.0000 - mae: 13316.1641 - mean_absolute_percentage_error: 99.9881 - root_mean_squared_error: 17920.9453

[I 2025-02-17 22:16:46,561] Trial 16 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 36ms/step - loss: 321347104.0000 - mae: 13318.5869 - mean_absolute_percentage_error: 99.9874 - root_mean_squared_error: 17923.5059 - val_loss: 322180352.0000 - val_mae: 13381.4189 - val_mean_absolute_percentage_error: 99.8943 - val_root_mean_squared_error: 17949.3828 - learning_rate: 0.0028
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - loss: 320773376.0000 - mae: 13308.8467 - mean_absolute_percentage_error: 99.9454 - root_mean_squared_error: 17907.5156 - val_loss: 322007840.0000 - val_mae: 13385.6211 - val_mean_absolute_percentage_error: 100.0991 - val_root_mean_squared_error: 17944.5762 - learning_rate: 0.0028
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - loss: 319505792.0000 - mae: 13284.8066 - mean_absolute_percentage_error: 99.8094 - root_mean_squared_error: 17872.1035 - val_loss: 320994496.0000 - val_mae: 13356.9375 

[I 2025-02-17 22:23:10,671] Trial 17 finished with value: 24.48186683654785 and parameters: {'hidden_layers': 17, 'neurons': 20, 'optimizer': 'adamW', 'learning_rate': 0.0027899811845098128, 'regularization': 0.01, 'dropout_rate': 0.10371259837232008}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 321041344.0000 - mae: 13312.4385 - mean_absolute_percentage_error: 99.9458 - root_mean_squared_error: 17914.9980

[I 2025-02-17 22:25:15,389] Trial 18 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 43ms/step - loss: 321202336.0000 - mae: 13316.4990 - mean_absolute_percentage_error: 99.9837 - root_mean_squared_error: 17919.4863 - val_loss: 261022032.0000 - val_mae: 12497.1484 - val_mean_absolute_percentage_error: 98.7566 - val_root_mean_squared_error: 16156.1768 - learning_rate: 0.0031
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - loss: 319091328.0000 - mae: 13280.8301 - mean_absolute_percentage_error: 99.8548 - root_mean_squared_error: 17860.5410 - val_loss: 282299488.0000 - val_mae: 12829.5234 - val_mean_absolute_percentage_error: 99.4324 - val_root_mean_squared_error: 16801.7695 - learning_rate: 0.0031
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - loss: 314551840.0000 - mae: 13193.1689 - mean_absolute_percentage_error: 99.3911 - root_mean_squared_error: 17733.0508 - val_loss: 287617344.0000 - val_mae: 12896.2715 -

[I 2025-02-17 22:30:28,262] Trial 19 finished with value: 24.062082290649414 and parameters: {'hidden_layers': 18, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.003101236576604122, 'regularization': 0.001, 'dropout_rate': 0.16979591122174972}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 321350016.0000 - mae: 13318.6064 - mean_absolute_percentage_error: 99.9944 - root_mean_squared_error: 17923.5703

[I 2025-02-17 22:32:01,852] Trial 20 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 20ms/step - loss: 321196768.0000 - mae: 13316.5049 - mean_absolute_percentage_error: 99.9877 - root_mean_squared_error: 17919.3320 - val_loss: 270028736.0000 - val_mae: 12600.0820 - val_mean_absolute_percentage_error: 98.1970 - val_root_mean_squared_error: 16432.5508 - learning_rate: 0.0032
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 319142688.0000 - mae: 13281.4219 - mean_absolute_percentage_error: 99.8525 - root_mean_squared_error: 17861.9766 - val_loss: 295141856.0000 - val_mae: 13033.2666 - val_mean_absolute_percentage_error: 100.1827 - val_root_mean_squared_error: 17179.6934 - learning_rate: 0.0032
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 314486656.0000 - mae: 13191.1865 - mean_absolute_percentage_error: 99.3652 - root_mean_squared_error: 17731.2188 - val_loss: 287599424.0000 - val_mae: 12863.6396 -

[I 2025-02-17 22:36:31,402] Trial 21 finished with value: 23.19792938232422 and parameters: {'hidden_layers': 17, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.003241877758917267, 'regularization': 0.001, 'dropout_rate': 0.165476210596809}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 19ms/step - loss: 320909440.0000 - mae: 13311.7305 - mean_absolute_percentage_error: 99.9672 - root_mean_squared_error: 17911.3438 - val_loss: 148151328.0000 - val_mae: 10110.8164 - val_mean_absolute_percentage_error: 98.4990 - val_root_mean_squared_error: 12171.7432 - learning_rate: 0.0049
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 315489376.0000 - mae: 13211.3125 - mean_absolute_percentage_error: 99.4854 - root_mean_squared_error: 17759.5156 - val_loss: 204610480.0000 - val_mae: 11585.5508 - val_mean_absolute_percentage_error: 100.8525 - val_root_mean_squared_error: 14304.2119 - learning_rate: 0.0049
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 303174912.0000 - mae: 12961.9375 - mean_absolute_percentage_error: 98.0061 - root_mean_squared_error: 17409.4727 - val_loss: 218134672.0000 - val_mae: 11845.4961 -

[I 2025-02-17 22:38:41,049] Trial 22 finished with value: 24.423980712890625 and parameters: {'hidden_layers': 17, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.004946713343290107, 'regularization': 0.001, 'dropout_rate': 0.1645317805243952}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 21ms/step - loss: 321195648.0000 - mae: 13316.2266 - mean_absolute_percentage_error: 99.9778 - root_mean_squared_error: 17919.3027 - val_loss: 146643728.0000 - val_mae: 9777.6250 - val_mean_absolute_percentage_error: 87.9729 - val_root_mean_squared_error: 12109.6543 - learning_rate: 0.0034
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 318920352.0000 - mae: 13276.9404 - mean_absolute_percentage_error: 99.8125 - root_mean_squared_error: 17855.7656 - val_loss: 261443904.0000 - val_mae: 12549.4883 - val_mean_absolute_percentage_error: 99.6607 - val_root_mean_squared_error: 16169.2275 - learning_rate: 0.0034
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 313775040.0000 - mae: 13176.2646 - mean_absolute_percentage_error: 99.2217 - root_mean_squared_error: 17711.1387 - val_loss: 271225664.0000 - val_mae: 12689.3604 - 

[I 2025-02-17 22:41:36,805] Trial 23 finished with value: 23.0010929107666 and parameters: {'hidden_layers': 22, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.003383904488865713, 'regularization': 0.001, 'dropout_rate': 0.16964364066119972}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m148/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 321136768.0000 - mae: 13313.7471 - mean_absolute_percentage_error: 99.9586 - root_mean_squared_error: 17917.6113

[I 2025-02-17 22:42:30,794] Trial 24 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 20ms/step - loss: 321275648.0000 - mae: 13317.4746 - mean_absolute_percentage_error: 99.9830 - root_mean_squared_error: 17921.5234 - val_loss: 258784016.0000 - val_mae: 12401.4121 - val_mean_absolute_percentage_error: 97.5455 - val_root_mean_squared_error: 16086.7656 - learning_rate: 0.0024
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 319846496.0000 - mae: 13294.1992 - mean_absolute_percentage_error: 99.9087 - root_mean_squared_error: 17881.6426 - val_loss: 292425440.0000 - val_mae: 12967.7051 - val_mean_absolute_percentage_error: 99.3376 - val_root_mean_squared_error: 17100.4512 - learning_rate: 0.0024
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - loss: 316760608.0000 - mae: 13236.9082 - mean_absolute_percentage_error: 99.6273 - root_mean_squared_error: 17795.1797 - val_loss: 297464928.0000 - val_mae: 13042.0840 -

[I 2025-02-17 22:46:15,949] Trial 25 finished with value: 23.544775009155273 and parameters: {'hidden_layers': 22, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.0024339073664579863, 'regularization': 0.001, 'dropout_rate': 0.13796670313595205}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m142/150[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 9ms/step - loss: 321025728.0000 - mae: 13311.8906 - mean_absolute_percentage_error: 100.0061 - root_mean_squared_error: 17914.4102

[I 2025-02-17 22:46:45,888] Trial 26 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 65ms/step - loss: 320938240.0000 - mae: 13313.2256 - mean_absolute_percentage_error: 99.9943 - root_mean_squared_error: 17912.1406 - val_loss: 231695856.0000 - val_mae: 11934.2500 - val_mean_absolute_percentage_error: 97.2261 - val_root_mean_squared_error: 15221.5586 - learning_rate: 0.0034
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 316864320.0000 - mae: 13242.0039 - mean_absolute_percentage_error: 99.7112 - root_mean_squared_error: 17798.1426 - val_loss: 258431888.0000 - val_mae: 12484.2686 - val_mean_absolute_percentage_error: 100.1489 - val_root_mean_squared_error: 16075.8164 - learning_rate: 0.0034
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 307952032.0000 - mae: 13069.9736 - mean_absolute_percentage_error: 98.7785 - root_mean_squared_error: 17546.0566 - val_loss: 251434176.0000 - val_mae: 12373.1543 

[I 2025-02-17 22:52:10,277] Trial 27 finished with value: 22.29725456237793 and parameters: {'hidden_layers': 14, 'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0034392907953942997, 'regularization': 0.001, 'dropout_rate': 0.14696151778307404}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - loss: 321124448.0000 - mae: 13315.7471 - mean_absolute_percentage_error: 100.0063 - root_mean_squared_error: 17917.2773

[I 2025-02-17 22:53:05,429] Trial 28 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 33ms/step - loss: 320584576.0000 - mae: 13305.8027 - mean_absolute_percentage_error: 99.9501 - root_mean_squared_error: 17902.2676

[I 2025-02-17 22:53:49,700] Trial 29 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - loss: 321230368.0000 - mae: 13317.0938 - mean_absolute_percentage_error: 100.0080 - root_mean_squared_error: 17920.2246

[I 2025-02-17 22:54:15,955] Trial 30 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 16ms/step - loss: 321232064.0000 - mae: 13316.8857 - mean_absolute_percentage_error: 99.9848 - root_mean_squared_error: 17920.3125 - val_loss: 300758048.0000 - val_mae: 13056.4219 - val_mean_absolute_percentage_error: 99.0342 - val_root_mean_squared_error: 17342.3770 - learning_rate: 0.0035
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 319569824.0000 - mae: 13288.1143 - mean_absolute_percentage_error: 99.8666 - root_mean_squared_error: 17873.9141 - val_loss: 303144640.0000 - val_mae: 13130.6562 - val_mean_absolute_percentage_error: 99.8997 - val_root_mean_squared_error: 17411.0488 - learning_rate: 0.0035
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 315850848.0000 - mae: 13215.0566 - mean_absolute_percentage_error: 99.4511 - root_mean_squared_error: 17769.6152 - val_loss: 304343168.0000 - val_mae: 13117.1094 - v

[I 2025-02-17 22:56:39,718] Trial 31 finished with value: 24.18043327331543 and parameters: {'hidden_layers': 16, 'neurons': 40, 'optimizer': 'Nadam', 'learning_rate': 0.0035126125539387955, 'regularization': 0.001, 'dropout_rate': 0.1623423578506306}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 13ms/step - loss: 321246304.0000 - mae: 13317.1260 - mean_absolute_percentage_error: 99.9844 - root_mean_squared_error: 17920.7070 - val_loss: 312314752.0000 - val_mae: 13239.6992 - val_mean_absolute_percentage_error: 99.6651 - val_root_mean_squared_error: 17672.4297 - learning_rate: 0.0030
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 319843936.0000 - mae: 13293.8594 - mean_absolute_percentage_error: 99.9078 - root_mean_squared_error: 17881.5703 - val_loss: 311001632.0000 - val_mae: 13227.1250 - val_mean_absolute_percentage_error: 99.7684 - val_root_mean_squared_error: 17635.2383 - learning_rate: 0.0030
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 316880672.0000 - mae: 13237.2881 - mean_absolute_percentage_error: 99.6112 - root_mean_squared_error: 17798.5547 - val_loss: 310366848.0000 - val_mae: 13201.9277 - v

[I 2025-02-17 22:58:56,947] Trial 32 finished with value: 23.65427589416504 and parameters: {'hidden_layers': 15, 'neurons': 40, 'optimizer': 'Nadam', 'learning_rate': 0.0029654423375780384, 'regularization': 0.001, 'dropout_rate': 0.1330289930410676}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 22ms/step - loss: 321278304.0000 - mae: 13317.2773 - mean_absolute_percentage_error: 99.9767 - root_mean_squared_error: 17921.5957 - val_loss: 272732928.0000 - val_mae: 12619.4424 - val_mean_absolute_percentage_error: 97.5206 - val_root_mean_squared_error: 16514.6270 - learning_rate: 0.0024
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 319957920.0000 - mae: 13296.0986 - mean_absolute_percentage_error: 99.8972 - root_mean_squared_error: 17884.7559 - val_loss: 296890816.0000 - val_mae: 13063.3154 - val_mean_absolute_percentage_error: 99.9668 - val_root_mean_squared_error: 17230.5195 - learning_rate: 0.0024
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 317092704.0000 - mae: 13243.0352 - mean_absolute_percentage_error: 99.6071 - root_mean_squared_error: 17804.5059 - val_loss: 293479456.0000 - val_mae: 12997.6992 -

[I 2025-02-17 23:02:59,260] Trial 33 finished with value: 23.542098999023438 and parameters: {'hidden_layers': 22, 'neurons': 60, 'optimizer': 'Nadam', 'learning_rate': 0.0023796827774929916, 'regularization': 0.001, 'dropout_rate': 0.19621928632034646}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 39ms/step - loss: 320877216.0000 - mae: 13312.4766 - mean_absolute_percentage_error: 99.9947 - root_mean_squared_error: 17910.4375 - val_loss: 247810432.0000 - val_mae: 12204.4180 - val_mean_absolute_percentage_error: 97.9809 - val_root_mean_squared_error: 15741.9961 - learning_rate: 0.0036
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - loss: 316762656.0000 - mae: 13240.3115 - mean_absolute_percentage_error: 99.7049 - root_mean_squared_error: 17795.2871 - val_loss: 236439376.0000 - val_mae: 12071.0020 - val_mean_absolute_percentage_error: 98.5641 - val_root_mean_squared_error: 15376.5850 - learning_rate: 0.0036
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - loss: 307615328.0000 - mae: 13063.6025 - mean_absolute_percentage_error: 98.7463 - root_mean_squared_error: 17536.4590 - val_loss: 248421120.0000 - val_mae: 12314.5898 -

[I 2025-02-17 23:09:44,011] Trial 34 finished with value: 22.48834228515625 and parameters: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.003607639946220003, 'regularization': 0.001, 'dropout_rate': 0.1182706740916589}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 86ms/step - loss: 320794144.0000 - mae: 13311.1299 - mean_absolute_percentage_error: 99.9935 - root_mean_squared_error: 17908.1289 - val_loss: 194298864.0000 - val_mae: 11274.0986 - val_mean_absolute_percentage_error: 98.2177 - val_root_mean_squared_error: 13939.1123 - learning_rate: 0.0042
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 69ms/step - loss: 315269184.0000 - mae: 13211.4805 - mean_absolute_percentage_error: 99.5487 - root_mean_squared_error: 17753.3184 - val_loss: 193953648.0000 - val_mae: 11428.6748 - val_mean_absolute_percentage_error: 103.4339 - val_root_mean_squared_error: 13926.7246 - learning_rate: 0.0042
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 68ms/step - loss: 302635520.0000 - mae: 12962.6143 - mean_absolute_percentage_error: 98.1460 - root_mean_squared_error: 17393.9492 - val_loss: 214251328.0000 - val_mae: 11708.499

[I 2025-02-17 23:17:48,593] Trial 35 finished with value: 23.286054611206055 and parameters: {'hidden_layers': 14, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0041573473640436195, 'regularization': 0.0001, 'dropout_rate': 0.11942247295900069}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 321208000.0000 - mae: 13317.3389 - mean_absolute_percentage_error: 100.0131 - root_mean_squared_error: 17919.6211

[I 2025-02-17 23:18:39,188] Trial 36 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 75ms/step - loss: 320851488.0000 - mae: 13311.7568 - mean_absolute_percentage_error: 99.9885 - root_mean_squared_error: 17909.7227 - val_loss: 264158400.0000 - val_mae: 12529.1230 - val_mean_absolute_percentage_error: 98.9460 - val_root_mean_squared_error: 16252.9502 - learning_rate: 0.0036
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 316397536.0000 - mae: 13233.5605 - mean_absolute_percentage_error: 99.6733 - root_mean_squared_error: 17785.0293 - val_loss: 251485536.0000 - val_mae: 12363.4092 - val_mean_absolute_percentage_error: 100.0245 - val_root_mean_squared_error: 15858.2959 - learning_rate: 0.0036
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 63ms/step - loss: 306579616.0000 - mae: 13042.3125 - mean_absolute_percentage_error: 98.6375 - root_mean_squared_error: 17506.9375 - val_loss: 247318720.0000 - val_mae: 12260.7725

[I 2025-02-17 23:26:57,073] Trial 37 finished with value: 22.791723251342773 and parameters: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.003629796616953451, 'regularization': 0.001, 'dropout_rate': 0.14582828558172395}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 65ms/step - loss: 321121856.0000 - mae: 13315.4580 - mean_absolute_percentage_error: 99.9991 - root_mean_squared_error: 17917.2070

[I 2025-02-17 23:28:22,781] Trial 38 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 44ms/step - loss: 320848416.0000 - mae: 13311.7852 - mean_absolute_percentage_error: 100.0057 - root_mean_squared_error: 17909.6035

[I 2025-02-17 23:29:18,220] Trial 39 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 321319296.0000 - mae: 13318.2412 - mean_absolute_percentage_error: 100.0044 - root_mean_squared_error: 17922.6973

[I 2025-02-17 23:29:55,736] Trial 40 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 81ms/step - loss: 320927648.0000 - mae: 13313.1670 - mean_absolute_percentage_error: 99.9961 - root_mean_squared_error: 17911.8418 - val_loss: 257433232.0000 - val_mae: 12393.5518 - val_mean_absolute_percentage_error: 98.0219 - val_root_mean_squared_error: 16044.7256 - learning_rate: 0.0034
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 317039200.0000 - mae: 13245.6465 - mean_absolute_percentage_error: 99.7277 - root_mean_squared_error: 17803.0488 - val_loss: 254819376.0000 - val_mae: 12411.9971 - val_mean_absolute_percentage_error: 99.9433 - val_root_mean_squared_error: 15963.0625 - learning_rate: 0.0034
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 308425888.0000 - mae: 13079.4277 - mean_absolute_percentage_error: 98.8270 - root_mean_squared_error: 17559.5488 - val_loss: 266782880.0000 - val_mae: 12588.4404

[I 2025-02-17 23:38:47,864] Trial 41 finished with value: 22.616931915283203 and parameters: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0034114118822873626, 'regularization': 0.001, 'dropout_rate': 0.13685195466975933}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 82ms/step - loss: 320660480.0000 - mae: 13309.0410 - mean_absolute_percentage_error: 99.9856 - root_mean_squared_error: 17904.4062 - val_loss: 226873904.0000 - val_mae: 11887.2695 - val_mean_absolute_percentage_error: 97.8796 - val_root_mean_squared_error: 15062.3340 - learning_rate: 0.0046
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 66ms/step - loss: 314152704.0000 - mae: 13190.6738 - mean_absolute_percentage_error: 99.4448 - root_mean_squared_error: 17721.8652 - val_loss: 227707040.0000 - val_mae: 12038.7002 - val_mean_absolute_percentage_error: 102.3544 - val_root_mean_squared_error: 15089.9648 - learning_rate: 0.0046
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 299295392.0000 - mae: 12895.3379 - mean_absolute_percentage_error: 97.7108 - root_mean_squared_error: 17297.6895 - val_loss: 232812608.0000 - val_mae: 12066.723

[I 2025-02-17 23:49:04,640] Trial 42 finished with value: 22.706912994384766 and parameters: {'hidden_layers': 12, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.004580839776429419, 'regularization': 0.001, 'dropout_rate': 0.12760625204711257}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - loss: 320707008.0000 - mae: 13309.3145 - mean_absolute_percentage_error: 99.9918 - root_mean_squared_error: 17905.6699

[I 2025-02-17 23:50:06,674] Trial 43 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 80ms/step - loss: 320726656.0000 - mae: 13309.9326 - mean_absolute_percentage_error: 99.9878 - root_mean_squared_error: 17906.2500 - val_loss: 235364992.0000 - val_mae: 11984.5117 - val_mean_absolute_percentage_error: 96.1968 - val_root_mean_squared_error: 15341.6094 - learning_rate: 0.0045
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 62ms/step - loss: 314454432.0000 - mae: 13196.1934 - mean_absolute_percentage_error: 99.4555 - root_mean_squared_error: 17730.3672 - val_loss: 219721696.0000 - val_mae: 11828.7393 - val_mean_absolute_percentage_error: 102.0260 - val_root_mean_squared_error: 14823.0127 - learning_rate: 0.0045
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 63ms/step - loss: 300125408.0000 - mae: 12910.9561 - mean_absolute_percentage_error: 97.8552 - root_mean_squared_error: 17321.6855 - val_loss: 231813216.0000 - val_mae: 12023.1963 

[I 2025-02-17 23:57:24,833] Trial 44 finished with value: 23.008878707885742 and parameters: {'hidden_layers': 12, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.004526650421967499, 'regularization': 0.001, 'dropout_rate': 0.11201193633475534}. Best is trial 1 with value: 22.102413177490234.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - loss: 321069536.0000 - mae: 13315.4932 - mean_absolute_percentage_error: 100.0040 - root_mean_squared_error: 17915.7891 - val_loss: 270573184.0000 - val_mae: 12607.7461 - val_mean_absolute_percentage_error: 98.2954 - val_root_mean_squared_error: 16449.1094 - learning_rate: 0.0027
Epoch 2/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 75ms/step - loss: 318601088.0000 - mae: 13274.2686 - mean_absolute_percentage_error: 99.8618 - root_mean_squared_error: 17846.8164 - val_loss: 282155616.0000 - val_mae: 12814.2383 - val_mean_absolute_percentage_error: 99.3760 - val_root_mean_squared_error: 16797.4883 - learning_rate: 0.0027
Epoch 3/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 79ms/step - loss: 313345472.0000 - mae: 13175.8887 - mean_absolute_percentage_error: 99.3560 - root_mean_squared_error: 17699.0176 - val_loss: 278141632.0000 - val_mae: 12765.413

[I 2025-02-18 00:09:12,823] Trial 45 finished with value: 21.916889190673828 and parameters: {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}. Best is trial 45 with value: 21.916889190673828.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 321245856.0000 - mae: 13316.7188 - mean_absolute_percentage_error: 99.9896 - root_mean_squared_error: 17920.6582

[I 2025-02-18 00:10:41,388] Trial 46 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 321095968.0000 - mae: 13315.5078 - mean_absolute_percentage_error: 100.0060 - root_mean_squared_error: 17916.5059

[I 2025-02-18 00:11:45,233] Trial 47 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 321110464.0000 - mae: 13315.6270 - mean_absolute_percentage_error: 100.0014 - root_mean_squared_error: 17916.9102

[I 2025-02-18 00:13:12,196] Trial 48 pruned. Trial was pruned at epoch 0.


Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 57ms/step - loss: 321139168.0000 - mae: 13315.4863 - mean_absolute_percentage_error: 99.9956 - root_mean_squared_error: 17917.6895

[I 2025-02-18 00:14:07,837] Trial 49 pruned. Trial was pruned at epoch 0.



Top 10 Hyperparameter Sets:
Rank 1: {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}
Rank 2: {'hidden_layers': 12, 'neurons': 40, 'optimizer': 'adamW', 'learning_rate': 0.0011462773920085047, 'regularization': 0.01, 'dropout_rate': 0.11283462245892283}
Rank 3: {'hidden_layers': 14, 'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0034392907953942997, 'regularization': 0.001, 'dropout_rate': 0.14696151778307404}
Rank 4: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.003607639946220003, 'regularization': 0.001, 'dropout_rate': 0.1182706740916589}
Rank 5: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0034114118822873626, 'regularization': 0.001, 'dropout_rate': 0.13685195466975933}
Rank 6: {'hidden_layers': 12, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.004580839776429419, 'regularization': 0.00

In [13]:

print("\nTop 10 Hyperparameter Sets:")
for i, params in enumerate(best_params_list, 1):
    print(f"Rank {i}: {params}")


Top 10 Hyperparameter Sets:
Rank 1: {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}
Rank 2: {'hidden_layers': 12, 'neurons': 40, 'optimizer': 'adamW', 'learning_rate': 0.0011462773920085047, 'regularization': 0.01, 'dropout_rate': 0.11283462245892283}
Rank 3: {'hidden_layers': 14, 'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0034392907953942997, 'regularization': 0.001, 'dropout_rate': 0.14696151778307404}
Rank 4: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.003607639946220003, 'regularization': 0.001, 'dropout_rate': 0.1182706740916589}
Rank 5: {'hidden_layers': 13, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0034114118822873626, 'regularization': 0.001, 'dropout_rate': 0.13685195466975933}
Rank 6: {'hidden_layers': 12, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.004580839776429419, 'regularization': 0.00

In [None]:
import tensorflow as tf
import numpy as np

# ------------------------------------------------------------------
# Assume these are defined externally:
# X, y: the full dataset (used only for inferring input_dim)
# X_train_scaled, y_train: training data (70%)
# X_eval_scaled, y_eval: validation data (15%)
# X_test_scaled, y_test: test data (15%)
# ------------------------------------------------------------------

# Derive input dimension from X
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    model = tf.keras.Sequential([tf.keras.Input(shape=(input_dim,))])
    
    # Add hidden layers: each layer = Dense -> BatchNorm -> LeakyReLU -> Dropout.
    for _ in range(hidden_layers):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Output layer for regression.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets from your predefined splits.
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test_scaled, y_test, batch_size=64, shuffle=False, cache_data=True)

# ------------------------------------------------------------------
# Define the top 10 parameter combinations (exactly as provided):
# ------------------------------------------------------------------
top_params_list = best_params_list

# ------------------------------------------------------------------
# Final Training for Each of the Top 10 Combinations.
# For each combination, train a model on the full training set (train_dataset)
# with early stopping (patience = 15 epochs) using the validation set (eval_dataset).
# Then evaluate on train, eval, and test sets.
# ------------------------------------------------------------------
for i, params in enumerate(top_params_list):
    print(f"--- Final Training for Combination {i} ---")
    print("Params:", params)
    
    model = create_model(**params)
    
    final_early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=15,  # Stop if no improvement for 15 epochs
        restore_best_weights=True,
        verbose=1
    )
    final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=1
    )
    final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'final_model_combo_{i}.keras',
        monitor='val_mean_absolute_percentage_error',
        save_best_only=True,
        verbose=1
    )
    
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,
        callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
    )
    
    train_score = model.evaluate(train_dataset, verbose=0)
    eval_score  = model.evaluate(eval_dataset, verbose=0)
    test_score  = model.evaluate(test_dataset, verbose=0)
    
    print(f"\n--- Performance for Combination {i} ---")
    print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
    print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
    print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%\n")


--- Final Training for Combination 0 ---
Params: {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}
Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 41ms/step - loss: 323167200.0000 - mae: 13334.0303 - mean_absolute_percentage_error: 100.0020 - root_mean_squared_error: 17974.9727
Epoch 1: val_mean_absolute_percentage_error improved from inf to 98.33588, saving model to final_model_combo_0.keras
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 55ms/step - loss: 323184640.0000 - mae: 13334.9883 - mean_absolute_percentage_error: 100.0019 - root_mean_squared_error: 17975.4805 - val_loss: 268664416.0000 - val_mae: 12581.4609 - val_mean_absolute_percentage_error: 98.3359 - val_root_mean_squared_error: 16390.9863 - learning_rate: 0.0027
Epoch 2/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 42ms/step - loss: 3205701

In [4]:
import tensorflow as tf
import numpy as np

# ------------------------------------------------------------------
# Assume these are defined externally:
# X, y: the full dataset (used only for inferring input_dim)
# X_train_scaled, y_train: training data (70%)
# X_eval_scaled, y_eval: validation data (15%)
# X_test_scaled, y_test: test data (15%)
# ------------------------------------------------------------------

# Derive input dimension from X
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    model = tf.keras.Sequential([tf.keras.Input(shape=(input_dim,))])
    
    # Add hidden layers: each layer = Dense -> BatchNorm -> LeakyReLU -> Dropout.
    for _ in range(hidden_layers):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Output layer for regression.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets from your predefined splits.
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test_scaled, y_test, batch_size=64, shuffle=False, cache_data=True)

# ------------------------------------------------------------------
# Define the top 10 parameter combinations (exactly as provided):
# ------------------------------------------------------------------
top_params_list = [
    {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}
]

# ------------------------------------------------------------------
# Final Training for Each of the Top 10 Combinations.
# For each combination, train a model on the full training set (train_dataset)
# with early stopping (patience = 15 epochs) using the validation set (eval_dataset).
# Then evaluate on train, eval, and test sets.
# ------------------------------------------------------------------
for i, params in enumerate(top_params_list):
    print(f"--- Final Training for Combination {i} ---")
    print("Params:", params)
    
    model = create_model(**params)
    
    final_early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=15,  # Stop if no improvement for 15 epochs
        restore_best_weights=True,
        verbose=1
    )
    final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=1
    )
    final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'final_model_combo_{i}.keras',
        monitor='val_mean_absolute_percentage_error',
        save_best_only=True,
        verbose=1
    )
    
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,
        callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
    )
    
    train_score = model.evaluate(train_dataset, verbose=0)
    eval_score  = model.evaluate(eval_dataset, verbose=0)
    test_score  = model.evaluate(test_dataset, verbose=0)
    
    print(f"\n--- Performance for Combination {i} ---")
    print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
    print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
    print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%\n")


--- Final Training for Combination 0 ---
Params: {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Adam', 'learning_rate': 0.0027447053581130367, 'regularization': 0.001, 'dropout_rate': 0.10666397301582889}

Epoch 1/200
[1m148/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 325092192.0000 - mae: 13421.6494 - mean_absolute_percentage_error: 99.9969 - root_mean_squared_error: 18028.1914
Epoch 1: val_mean_absolute_percentage_error improved from inf to 98.11957, saving model to final_model_combo_0.keras
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - loss: 325081280.0000 - mae: 13421.3750 - mean_absolute_percentage_error: 99.9968 - root_mean_squared_error: 18027.9316 - val_loss: 270214432.0000 - val_mae: 12577.7656 - val_mean_absolute_percentage_error: 98.1196 - val_root_mean_squared_error: 16438.2012 - learning_rate: 0.0027
Epoch 2/200
[1m148/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 32234112

## Summary

1. ANNs require extensive hyperparameter tuning to achieve okay performance.
2. ANNs lack in terms of speed, precision and simplicity behind tree boosting/ensemble methods.