In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, make_scorer, mean_absolute_percentage_error
from scipy.stats import loguniform
import joblib
import tensorflow as tf
from sklearn.model_selection import ParameterSampler

# Expand display options
pd.set_option("display.max_rows", None)  # Show all rows
pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.max_colwidth", None)  # Do not truncate column text
pd.set_option("display.expand_frame_repr", False)  # Avoid line wrapping

tf.config.threading.set_intra_op_parallelism_threads(18)
tf.config.threading.set_inter_op_parallelism_threads(18)

import optuna
from optuna.integration import TFKerasPruningCallback

In [23]:
df_OHE = pd.read_csv('../DataSet/RegressionData/healthinsurance_OHE.csv')
print(df_OHE.shape)

(13648, 146)


In [24]:
X = df_OHE.drop(columns='claim')
y = df_OHE['claim']

# First split: 70% Training, 30% Temporary (Validation + Test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=69)

# Second split: Splitting the 30% temporary set into 15% Validation and 15% Test
X_eval, X_test, y_eval, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=69)

# Standardization
scaler = StandardScaler()
scaler.fit(X_train)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Transform the datasets
X_train_scaled = scaler.transform(X_train)
X_eval_scaled = scaler.transform(X_eval)
X_test_scaled = scaler.transform(X_test)

#print dist.
print('Training set: ', X_train_scaled.shape)
print('Validation set: ', X_eval_scaled.shape)
print('Test set: ', X_test_scaled.shape)

Training set:  (9553, 145)
Validation set:  (2047, 145)
Test set:  (2048, 145)


In [25]:
import tensorflow as tf
import optuna
from optuna.integration import TFKerasPruningCallback

# Assume these are defined elsewhere:
# X, y, X_train_scaled, y_train, X_eval_scaled, y_eval, X_test, y_test
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()  # Cache data in memory to speed up later epochs
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(input_dim,))
    ])
    
    # First Dense layer with batch normalization, activation, and dropout.
    model.add(tf.keras.layers.Dense(
        neurons,
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Additional hidden layers.
    for _ in range(hidden_layers - 1):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
        
    # Output layer.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select and configure the optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")

    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets (only once, outside objective).
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test, y_test, batch_size=64, shuffle=False, cache_data=True)

def objective(trial):
    """Objective function for Optuna hyperparameter tuning."""
    # ---------------------
    # 1) Define hyperparameter search space
    # ---------------------
    hidden_layers = trial.suggest_int("hidden_layers", 5, 30)
    neurons = trial.suggest_categorical("neurons", [20, 40, 60, 80, 100])
    optimizer_choice = trial.suggest_categorical("optimizer", ["adamW", "Nadam", "Adam"])
    learning_rate = trial.suggest_float("learning_rate", 0.0005, 0.005, log=True)
    regularization = trial.suggest_categorical("regularization", [0.0001, 0.001, 0.01])
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.3)
    
    # ---------------------
    # 2) Build the model
    # ---------------------
    model = create_model(hidden_layers, neurons, optimizer_choice, learning_rate, regularization, dropout_rate)
    
    # ---------------------
    # 3) Define fresh callbacks (unique for each trial)
    # ---------------------
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=20,
        restore_best_weights=True,
        verbose=0
    )
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=0
    )
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        # Use a unique checkpoint filename per trial
        filepath=f'best_model_trial_{trial.number}.keras',
        monitor='val_mean_absolute_percentage_error',
        save_best_only=True,
        verbose=0
    )
    # The TFKerasPruningCallback for pruning unpromising trials
    pruning_callback = TFKerasPruningCallback(trial, monitor='val_mean_absolute_percentage_error')
    
    # ---------------------
    # 4) Train the model
    # ---------------------
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,  # no per-epoch output
        callbacks=[early_stop, lr_scheduler, checkpoint, pruning_callback]
    )
    
    # ---------------------
    # 5) Evaluate the model
    # ---------------------
    eval_score = model.evaluate(eval_dataset, verbose=0)
    eval_mape = eval_score[3]  # [loss, mae, rmse, mape]
    
    return eval_mape

# ---------------------
# Create & run the Optuna study
# ---------------------
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# ---------------------
# Retrieve best hyperparameters
# ---------------------
best_trial = study.best_trial
best_params = best_trial.params
print("\nBest Hyperparameters:")
print(best_params)
print(f"Best Eval MAPE: {best_trial.value:.2f}%")

# ---------------------
# Build best model
# ---------------------
best_model = create_model(**best_params)

# Re-create fresh callbacks for final training
final_early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_mean_absolute_percentage_error',
    patience=10,
    restore_best_weights=True,
    verbose=1
)
final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_mean_absolute_percentage_error',
    factor=0.5,
    patience=5,
    verbose=1
)
final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_model_final.keras',
    monitor='val_mean_absolute_percentage_error',
    save_best_only=True,
    verbose=1
)

# ---------------------
# Train best model fully
# ---------------------
best_model.fit(
    train_dataset,
    validation_data=eval_dataset,
    epochs=200,
    verbose=0,
    callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
)

# ---------------------
# Evaluate final model
# ---------------------
train_score = best_model.evaluate(train_dataset, verbose=0)
eval_score  = best_model.evaluate(eval_dataset, verbose=0)
test_score  = best_model.evaluate(test_dataset, verbose=0)

print("\n--- Performance of the Best Model ---")
print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, "
      f"RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, "
      f"RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, "
      f"RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%")


[I 2025-02-16 23:12:50,920] A new study created in memory with name: no-name-14083e96-018a-4734-bd77-5995c6129c73


Epoch 1/200


[W 2025-02-16 23:13:14,726] Trial 0 failed with parameters: {'hidden_layers': 13, 'neurons': 80, 'optimizer': 'adamW', 'learning_rate': 0.0007843895312959535, 'regularization': 0.01, 'dropout_rate': 0.1361515009142667} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\kobra\AppData\Roaming\Python\Python311\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\kobra\AppData\Local\Temp\ipykernel_35444\4055246380.py", line 130, in objective
    model.fit(
  File "C:\Users\kobra\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\kobra\AppData\Roaming\Python\Python311\site-packages\keras\src\backend\tensorflow\trainer.py", line 314, in fit
    logs = self.train_function(iterator)
           ^^^^^^^^^^^^^^

KeyboardInterrupt: 

In [None]:
import pandas as pd

# Option 1: Using the trial objects directly.
top_trials = sorted(study.trials, key=lambda t: t.value)[:10]
print("\nTop 10 Trials:")
for trial in top_trials:
    print(f"Trial {trial.number}: Eval MAPE = {trial.value:.2f}%")
    print("Params:", trial.params)
    print()

# Option 2: Using the trials dataframe.
df = study.trials_dataframe()
top10_df = df.sort_values("value", ascending=True).head(10)
print("\nTop 10 Trials DataFrame:")
print(top10_df)



Top 10 Trials:
Trial 0: Eval MAPE = 18.85%
Params: {'hidden_layers': 23, 'neurons': 100, 'optimizer': 'adamW', 'learning_rate': 0.0005334995730188107, 'regularization': 0.0001, 'dropout_rate': 0.15138876513967242}

Trial 6: Eval MAPE = 21.83%
Params: {'hidden_layers': 7, 'neurons': 80, 'optimizer': 'Nadam', 'learning_rate': 0.0025928812217160123, 'regularization': 0.0001, 'dropout_rate': 0.17230719502465075}

Trial 4: Eval MAPE = 22.03%
Params: {'hidden_layers': 9, 'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0015401175856105049, 'regularization': 0.001, 'dropout_rate': 0.20845154764128387}

Trial 42: Eval MAPE = 23.31%
Params: {'hidden_layers': 12, 'neurons': 80, 'optimizer': 'Nadam', 'learning_rate': 0.004464723150421391, 'regularization': 0.0001, 'dropout_rate': 0.14150532073670605}

Trial 45: Eval MAPE = 24.35%
Params: {'hidden_layers': 12, 'neurons': 80, 'optimizer': 'Nadam', 'learning_rate': 0.00356427831060853, 'regularization': 0.0001, 'dropout_rate': 0.13651109375

In [26]:
import tensorflow as tf
import numpy as np

# ------------------------------------------------------------------
# Assume these are defined externally:
# X, y: the full dataset (used only for inferring input_dim)
# X_train_scaled, y_train: training data (70%)
# X_eval_scaled, y_eval: validation data (15%)
# X_test_scaled, y_test: test data (15%)
# ------------------------------------------------------------------

# Derive input dimension from X
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    model = tf.keras.Sequential([tf.keras.Input(shape=(input_dim,))])
    
    # Add hidden layers: each layer = Dense -> BatchNorm -> LeakyReLU -> Dropout.
    for _ in range(hidden_layers):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Output layer for regression.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets from your predefined splits.
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test_scaled, y_test, batch_size=64, shuffle=False, cache_data=True)

# ------------------------------------------------------------------
# Define the top 10 parameter combinations (exactly as provided):
# ------------------------------------------------------------------
top_params_list = [
    {'hidden_layers': 23, 'neurons': 100, 'optimizer': 'adamW', 'learning_rate': 0.0025334995730188107, 'regularization': 0.0001, 'dropout_rate': 0.15138876513967242},
    {'hidden_layers': 7,  'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.0025928812217160123, 'regularization': 0.0001, 'dropout_rate': 0.17230719502465075},
    {'hidden_layers': 9,  'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0015401175856105049, 'regularization': 0.001,   'dropout_rate': 0.20845154764128387},
    {'hidden_layers': 12, 'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.004464723150421391,  'regularization': 0.0001, 'dropout_rate': 0.14150532073670605},
    {'hidden_layers': 12, 'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.00356427831060853,   'regularization': 0.0001, 'dropout_rate': 0.13651109375374137},
    {'hidden_layers': 9,  'neurons': 60,  'optimizer': 'adamW', 'learning_rate': 0.003704347268549383,  'regularization': 0.0001, 'dropout_rate': 0.21667427276282802},
    {'hidden_layers': 13, 'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.00481253317014211,  'regularization': 0.0001, 'dropout_rate': 0.16865578211114332},
    {'hidden_layers': 15, 'neurons': 100, 'optimizer': 'Nadam', 'learning_rate': 0.0028429396647837836,   'regularization': 0.0001, 'dropout_rate': 0.2046873150029404},
    {'hidden_layers': 24, 'neurons': 100, 'optimizer': 'Adam',  'learning_rate': 0.0023260428486328415,  'regularization': 0.0001, 'dropout_rate': 0.27704282022356197},
    {'hidden_layers': 13, 'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.0048873876092685635,  'regularization': 0.0001, 'dropout_rate': 0.2093953622066992}
]

# ------------------------------------------------------------------
# Final Training for Each of the Top 10 Combinations.
# For each combination, train a model on the full training set (train_dataset)
# with early stopping (patience = 15 epochs) using the validation set (eval_dataset).
# Then evaluate on train, eval, and test sets.
# ------------------------------------------------------------------
for i, params in enumerate(top_params_list):
    print(f"--- Final Training for Combination {i} ---")
    print("Params:", params)
    
    model = create_model(**params)
    
    final_early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=15,  # Stop if no improvement for 15 epochs
        restore_best_weights=True,
        verbose=1
    )
    final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=1
    )
    final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'final_model_combo_{i}.keras',
        monitor='val_mean_absolute_percentage_error',
        save_best_only=True,
        verbose=1
    )
    
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,
        callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
    )
    
    train_score = model.evaluate(train_dataset, verbose=0)
    eval_score  = model.evaluate(eval_dataset, verbose=0)
    test_score  = model.evaluate(test_dataset, verbose=0)
    
    print(f"\n--- Performance for Combination {i} ---")
    print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
    print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
    print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%\n")


--- Final Training for Combination 0 ---
Params: {'hidden_layers': 23, 'neurons': 100, 'optimizer': 'adamW', 'learning_rate': 0.0025334995730188106, 'regularization': 0.0001, 'dropout_rate': 0.15138876513967242}
Epoch 1/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - loss: 321359936.0000 - mae: 13295.5889 - mean_absolute_percentage_error: 99.9702 - root_mean_squared_error: 17924.9297
Epoch 1: val_mean_absolute_percentage_error improved from inf to 84.64451, saving model to final_model_combo_0.keras
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 41ms/step - loss: 321403200.0000 - mae: 13297.0645 - mean_absolute_percentage_error: 99.9701 - root_mean_squared_error: 17926.1543 - val_loss: 112729408.0000 - val_mae: 8582.2744 - val_mean_absolute_percentage_error: 84.6445 - val_root_mean_squared_error: 10617.4102 - learning_rate: 0.0025
Epoch 2/200
[1m149/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - loss: 31876064

In [29]:
import tensorflow as tf
import numpy as np

# ------------------------------------------------------------------
# Assume these are defined externally:
# X, y: the full dataset (used only for inferring input_dim)
# X_train_scaled, y_train: training data (70%)
# X_eval_scaled, y_eval: validation data (15%)
# X_test_scaled, y_test: test data (15%)
# ------------------------------------------------------------------

# Derive input dimension from X
input_dim = X.shape[1]

def preprocess(x, y):
    """Optional preprocessing before batching."""
    return x, y

def create_tf_dataset(X, y, batch_size=64, shuffle=True, cache_data=True):
    """Creates a tf.data.Dataset with shuffling, optional caching, batching, and prefetching."""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X), reshuffle_each_iteration=True)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    if cache_data:
        dataset = dataset.cache()
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization, dropout_rate):
    """Builds and compiles a Keras model using an explicit Input layer and best practices."""
    tf.keras.backend.clear_session()
    model = tf.keras.Sequential([tf.keras.Input(shape=(input_dim,))])
    
    # Add hidden layers: each layer = Dense -> BatchNorm -> LeakyReLU -> Dropout.
    for _ in range(hidden_layers):
        model.add(tf.keras.layers.Dense(
            neurons,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            kernel_regularizer=tf.keras.regularizers.l2(regularization)
        ))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Output layer for regression.
    model.add(tf.keras.layers.Dense(
        1,
        activation='linear',
        kernel_initializer=tf.keras.initializers.HeNormal(),
        kernel_regularizer=tf.keras.regularizers.l2(regularization)
    ))
    
    # Select optimizer.
    optimizers = {
        'adamW': tf.keras.optimizers.AdamW(learning_rate=learning_rate),
        'Adam': tf.keras.optimizers.Adam(learning_rate=learning_rate),
        'Nadam': tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    }
    opt = optimizers.get(optimizer, None)
    if opt is None:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt,
                  loss='mse',
                  metrics=[
                      'mae',
                      tf.keras.metrics.RootMeanSquaredError(),
                      tf.keras.metrics.MeanAbsolutePercentageError()
                  ])
    return model

# Create datasets from your predefined splits.
train_dataset = create_tf_dataset(X_train_scaled, y_train, batch_size=64, shuffle=True, cache_data=True)
eval_dataset  = create_tf_dataset(X_eval_scaled, y_eval, batch_size=64, shuffle=False, cache_data=True)
test_dataset  = create_tf_dataset(X_test_scaled, y_test, batch_size=64, shuffle=False, cache_data=True)

# ------------------------------------------------------------------
# Define the top 10 parameter combinations (exactly as provided):
# ------------------------------------------------------------------
top_params_list = [
    {'hidden_layers': 12, 'neurons': 80,  'optimizer': 'Nadam', 'learning_rate': 0.004464723150421391,  'regularization': 0.0002, 'dropout_rate': 0.14150532073670605},

]

# ------------------------------------------------------------------
# Final Training for Each of the Top 10 Combinations.
# For each combination, train a model on the full training set (train_dataset)
# with early stopping (patience = 15 epochs) using the validation set (eval_dataset).
# Then evaluate on train, eval, and test sets.
# ------------------------------------------------------------------
for i, params in enumerate(top_params_list):
    print(f"--- Final Training for Combination {i} ---")
    print("Params:", params)
    
    model = create_model(**params)
    
    final_early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_mean_absolute_percentage_error',
        patience=15,  # Stop if no improvement for 15 epochs
        restore_best_weights=True,
        verbose=1
    )
    final_lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_mean_absolute_percentage_error',
        factor=0.5,
        patience=10,
        verbose=1
    )
    final_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'final_model_combo_{i}.keras',
        monitor='val_mean_absolute_percentage_error',
        save_best_only=True,
        verbose=1
    )
    
    model.fit(
        train_dataset,
        validation_data=eval_dataset,
        epochs=200,
        verbose=1,
        callbacks=[final_early_stop, final_lr_scheduler, final_checkpoint]
    )
    
    train_score = model.evaluate(train_dataset, verbose=0)
    eval_score  = model.evaluate(eval_dataset, verbose=0)
    test_score  = model.evaluate(test_dataset, verbose=0)
    
    print(f"\n--- Performance for Combination {i} ---")
    print(f"Train  -> Loss: {train_score[0]:.4f}, MAE: {train_score[1]:.4f}, RMSE: {train_score[2]:.4f}, MAPE: {train_score[3]:.2f}%")
    print(f"Eval   -> Loss: {eval_score[0]:.4f}, MAE: {eval_score[1]:.4f}, RMSE: {eval_score[2]:.4f}, MAPE: {eval_score[3]:.2f}%")
    print(f"Test   -> Loss: {test_score[0]:.4f}, MAE: {test_score[1]:.4f}, RMSE: {test_score[2]:.4f}, MAPE: {test_score[3]:.2f}%\n")


--- Final Training for Combination 0 ---
Params: {'hidden_layers': 12, 'neurons': 80, 'optimizer': 'Nadam', 'learning_rate': 0.004464723150421391, 'regularization': 0.0002, 'dropout_rate': 0.14150532073670605}
Epoch 1/200
[1m147/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - loss: 330489440.0000 - mae: 13591.5234 - mean_absolute_percentage_error: 99.9896 - root_mean_squared_error: 18176.4902
Epoch 1: val_mean_absolute_percentage_error improved from inf to 96.04765, saving model to final_model_combo_0.keras
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 330313952.0000 - mae: 13586.3262 - mean_absolute_percentage_error: 99.9885 - root_mean_squared_error: 18171.7148 - val_loss: 235792864.0000 - val_mae: 11966.9395 - val_mean_absolute_percentage_error: 96.0477 - val_root_mean_squared_error: 15355.5479 - learning_rate: 0.0045
Epoch 2/200
[1m147/150[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - loss: 325257824