# Hyperparameter Optimization

In [None]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.3.3 Tensorflow Current Version: 2.16.1


In [None]:
# !pip uninstall tf-keras

In [None]:
# !pip install keras-tuner

In [None]:
# !pip install tensorflow==2.16.1

# Imports

In [None]:
import time
import numpy as np
import pandas as pd
from joblib import dump, load
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.initializers import RandomNormal, RandomUniform, GlorotUniform, GlorotNormal, HeNormal
from keras.optimizers.schedules import ExponentialDecay
from keras_tuner import RandomSearch, GridSearch, BayesianOptimization
from keras_tuner.engine.hyperparameters import HyperParameters

random.seed(46)
np.random.seed(46)
tf.random.set_seed(46)


# Functions

In [None]:
def preprocess_data(filepath):
    data = pd.read_csv(filepath)
    scaler = StandardScaler()
    X = scaler.fit_transform(data.drop('Outcome', axis=1))
    y = data['Outcome'].values
    dump(scaler, 'scaler.joblib')
    return X, y

def prepare_datasets(X_train, X_val, y_train, y_val, batch_size=None):
    if batch_size is None:
        batch_size = len(X_train)
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
    val_dataset = val_dataset.batch(batch_size)
    return train_dataset, val_dataset

def plot_training_history(history, train_loss='loss', train_metric='accuracy', val_loss='val_loss', val_metric='val_accuracy'):

    #Loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_loss], label='Training Loss')
    plt.plot(history.history[val_loss], label='Validation Loss')
    plt.title('Training and Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Metrics
    plt.figure(figsize=(10, 5))
    plt.plot(history.history[train_metric], label=f"Training: {train_metric}")
    plt.plot(history.history[val_metric], label=f"Validation: {val_metric}")
    plt.title(f'Training and Validation {train_metric} Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel(f'train_metric')
    plt.legend()
    plt.show()

def get_best_epoch_details(history):
    val_losses = history.history['val_loss']
    min_val_loss_index = val_losses.index(min(val_losses))
    best_epoch = min_val_loss_index + 1

    epoch_details = {}
    for key in history.history.keys():
        epoch_details[key] = history.history[key][min_val_loss_index]

    epoch_details['best_epoch'] = best_epoch
    print(f"Best epoch details: {epoch_details}")

# Data Preparation

In [None]:
X, y = preprocess_data('/content/diabetes.csv')

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_ds, val_ds = prepare_datasets(X_train, X_val, y_train, y_val, batch_size=32)

# Base Model

In [None]:
base_model = Sequential([
    Input(shape=(train_ds.element_spec[0].shape[1],)),
    Dense(50, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

optimizer = SGD(learning_rate=0.01, momentum=0.0)

base_model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=200, verbose=1, restore_best_weights=True)

base_model_history = base_model.fit(train_ds, epochs=1000, validation_data=val_ds, verbose=0, callbacks=early_stopping)


Epoch 235: early stopping
Restoring model weights from the end of the best epoch: 35.


In [None]:
get_best_epoch_details(base_model_history)

Best epoch details: {'accuracy': 0.767100989818573, 'loss': 0.4984630346298218, 'val_accuracy': 0.7792207598686218, 'val_loss': 0.49623173475265503, 'best_epoch': 35}


# Initialization of Weights and Biases


In [None]:
def try_initializers(train_ds, val_ds):

    initializers = {
        'RandomNormal': RandomNormal(),
        'RandomUniform': RandomUniform(),
        'GlorotUniform': GlorotUniform(),
        'GlorotNormal': GlorotNormal(),
        'HeNormal': HeNormal()
    }

    for name, initializer in initializers.items():
        print()
        print(f"Training model with {name} initialization...")

        start_time = time.time()

        model = Sequential([
            Input(shape=(train_ds.element_spec[0].shape[1],)),
            Dense(50, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.001)),
            BatchNormalization(),
            Dropout(0.5),
            Dense(1, activation='sigmoid', kernel_initializer=initializer)])
        model.compile(optimizer="SGD", loss="binary_crossentropy", metrics=["accuracy"])
        early_stopping = EarlyStopping(monitor='val_loss', patience=200, verbose=1, restore_best_weights=True)
        history = model.fit(train_ds, epochs=1000, validation_data=val_ds, verbose=0, callbacks=early_stopping)
        get_best_epoch_details(history)

        end_time = time.time()
        training_time = end_time - start_time
        print("Training time:", training_time, "seconds")


In [None]:
try_initializers(train_ds, val_ds)


Training model with RandomNormal initialization...
Epoch 229: early stopping
Restoring model weights from the end of the best epoch: 29.
Best epoch details: {'accuracy': 0.7801302671432495, 'loss': 0.46342968940734863, 'val_accuracy': 0.7727272510528564, 'val_loss': 0.5068941712379456, 'best_epoch': 29}
Training time: 11.719818830490112 seconds

Training model with RandomUniform initialization...
Epoch 275: early stopping
Restoring model weights from the end of the best epoch: 75.
Best epoch details: {'accuracy': 0.7899022698402405, 'loss': 0.42231056094169617, 'val_accuracy': 0.7792207598686218, 'val_loss': 0.49487897753715515, 'best_epoch': 75}
Training time: 13.28676962852478 seconds

Training model with GlorotUniform initialization...
Epoch 213: early stopping
Restoring model weights from the end of the best epoch: 13.
Best epoch details: {'accuracy': 0.7410423159599304, 'loss': 0.5925973057746887, 'val_accuracy': 0.7857142686843872, 'val_loss': 0.48324257135391235, 'best_epoch': 

 # Layers, Units, Dropout

 ## Create Searching Space



In [None]:
def build_model(hp):
  model = Sequential()
  model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

  for i in range(hp.Int('num_layers', 1, 5)):
      model.add(Dense(units=hp.Int('units_' + str(i), min_value=32, max_value=512, step=32)))
      model.add(BatchNormalization())

      model.add(Dropout(hp.Float('dropout_' + str(i), min_value=0.0, max_value=0.5, step=0.1)))

  model.add(Dense(1, activation='sigmoid'))

  model.compile(optimizer="SGD", loss="binary_crossentropy", metrics=["accuracy"])

  return model


## Random Search Tuner

In [None]:
random_search_tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=1,
    overwrite=True)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=20,
    verbose=1,
    restore_best_weights=True)


In [None]:
random_search_tuner.search(train_ds,
                           epochs=100,
                           validation_data=val_ds,
                           callbacks=[early_stopping])

Trial 20 Complete [00h 00m 17s]
val_loss: 0.4850340485572815

Best val_loss So Far: 0.45973536372184753
Total elapsed time: 00h 03m 42s


In [None]:
random_search_tuner.search_space_summary()

Search space summary
Default search space size: 11
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 5, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
dropout_0 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
dropout_1 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
dropout_2 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
units_3 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'samp

In [None]:
random_search_tuner.results_summary()

## Best Hyperparameters

In [None]:
best_hps = random_search_tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
print(f"Best Hyperparameters: {best_hps.values}")

Best Hyperparameters: {'num_layers': 5, 'units_0': 480, 'dropout_0': 0.1, 'units_1': 448, 'dropout_1': 0.4, 'units_2': 96, 'dropout_2': 0.0, 'units_3': 480, 'dropout_3': 0.0, 'units_4': 384, 'dropout_4': 0.0}


## Best Model

In [None]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

In [None]:
best_model.summary()

## Model Performance

In [None]:
loss, acc = best_model.evaluate(val_ds)
print(f"Validation loss: {loss}, Accuracy: {acc}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.7996 - loss: 0.4658
Validation loss: 0.45973536372184753, Accuracy: 0.8051947951316833


# All Together

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(train_ds.element_spec[0].shape[1],)))

    # Hidden layers, activation functions, l2, Dropout
    for i in range(hp.Int('num_layers', 1, 5)):

        model.add(Dense(units=hp.Int('units_' + str(i), min_value=32, max_value=512, step=32),

                        activation=hp.Choice('activation_' + str(i), values=['relu', 'tanh', 'sigmoid']),

                        kernel_regularizer=l2(hp.Float('l2_' + str(i), min_value=0.0001, max_value=0.01, sampling='log'))))

        model.add(BatchNormalization())
        model.add(Dropout(hp.Float('dropout_' + str(i), min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    # Learning rate schedule
    initial_learning_rate = hp.Float('initial_learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    lr_schedule = ExponentialDecay(
        initial_learning_rate=initial_learning_rate,
        decay_steps=100,
        decay_rate=0.96,
        staircase=True
    )

    # optimizers
    optimizer_choice = hp.Choice('optimizer', values=['sgd', 'adam', "rmsprop"])
    if optimizer_choice == 'sgd':
        optimizer = SGD(
            learning_rate=lr_schedule,
            momentum=hp.Float('momentum', min_value=0.0, max_value=0.9, step=0.1)
        )
    elif optimizer_choice == 'adam':
        optimizer = Adam(
            learning_rate=lr_schedule,
            beta_1=hp.Float('beta1', min_value=0.85, max_value=0.99, step=0.01),
            beta_2=hp.Float('beta2', min_value=0.999, max_value=0.9999, step=0.0001),
            epsilon=hp.Float('epsilon', min_value=1e-8, max_value=1e-7, step=1e-8)
        )

    elif optimizer_choice == 'rmsprop':
        optimizer = RMSprop(
            learning_rate=lr_schedule,
            rho=hp.Float('rho', min_value=0.8, max_value=0.99, step=0.01),  # Decay rate for moving average of squared gradients
            epsilon=hp.Float('epsilon', min_value=1e-10, max_value=1e-8, step=1e-10),
            momentum=hp.Float('momentum', min_value=0.0, max_value=0.9, step=0.1)
        )

    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

    return model

## Random Search

In [None]:
random_search_tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=1,
    overwrite=True)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=20,
    verbose=1,
    restore_best_weights=True)


In [None]:
random_search_tuner.search(train_ds,
                           epochs=100,
                           validation_data=val_ds,
                           callbacks=[early_stopping])


Trial 20 Complete [00h 00m 06s]
val_loss: 0.5788618326187134

Best val_loss So Far: 0.5018632411956787
Total elapsed time: 00h 04m 45s


## Best Hyperparameters

In [None]:
best_hps = random_search_tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"Best hyperparameters: {best_hps.values}")

Best hyperparameters: {'num_layers': 1, 'units_0': 224, 'activation_0': 'sigmoid', 'l2_0': 0.0004954647636620538, 'dropout_0': 0.0, 'initial_learning_rate': 0.004265340098831145, 'optimizer': 'sgd', 'momentum': 0.7000000000000001, 'units_1': 224, 'activation_1': 'tanh', 'l2_1': 0.00222222746808837, 'dropout_1': 0.0, 'units_2': 288, 'activation_2': 'sigmoid', 'l2_2': 0.000889531227382006, 'dropout_2': 0.0, 'units_3': 128, 'activation_3': 'tanh', 'l2_3': 0.0019355289844175982, 'dropout_3': 0.4, 'units_4': 128, 'activation_4': 'tanh', 'l2_4': 0.000421873393590671, 'dropout_4': 0.2, 'rho': 0.8600000000000001, 'epsilon': 6.1e-09, 'beta1': 0.9, 'beta2': 0.9994999999999999}


In [None]:
dump(best_hps, 'best_hps.joblib')

['best_hps.joblib']

## Best Model

In [None]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
best_model.summary()

In [None]:
loss, acc = best_model.evaluate(val_ds)
print(f"Validation set üzerinde loss: {loss}, Accuracy: {acc}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.7610 - loss: 0.5125
Validation set üzerinde loss: 0.5018632411956787, Accuracy: 0.7792207598686218


## Saving the Best Model

In [None]:
best_model.save('tuned_model.keras')

# Retrain Model for Entire Dataset

## Load best hps, dataset

In [None]:
X, y = preprocess_data('/content/diabetes.csv')

dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(buffer_size=len(X)).batch(len(X))

## Model Build

In [None]:
best_hps = load('best_hps.joblib')

final_tuned_model = build_model(best_hps)

## Training

In [None]:
early_stopping = EarlyStopping(
    monitor='loss',
    patience=5,
    verbose=1,
    restore_best_weights=True)

model_checkpoint = ModelCheckpoint(
    'final_tuned_model.keras',
    monitor='loss',
    verbose=0,
    save_best_only=True)

final_history = final_tuned_model.fit(dataset,
            epochs=500,
            verbose=1,
            callbacks=[early_stopping, model_checkpoint])

In [None]:
def get_best_epoch_details(history):
    val_losses = history.history['loss']
    min_val_loss_index = val_losses.index(min(val_losses))
    best_epoch = min_val_loss_index + 1

    epoch_details = {}
    for key in history.history.keys():
        epoch_details[key] = history.history[key][min_val_loss_index]

    epoch_details['best_epoch'] = best_epoch
    print(f"Best epoch details: {epoch_details}")

In [None]:
get_best_epoch_details(final_history)

Best epoch details: {'accuracy': 0.7838541865348816, 'loss': 0.47668328881263733, 'best_epoch': 500}


## Prediction

In [None]:
diabetes_data = pd.read_csv('/content/diabetes.csv')

scaler = load('scaler.joblib')

loaded_final_tuned_model = load_model("/content/final_tuned_model.keras", compile=False)

In [None]:
def random_samples(scaler, data, num_samples=100):
    X = data.drop('Outcome', axis=1)
    X_scaled = scaler.fit_transform(X)
    new_dataset = tf.data.Dataset.from_tensor_slices((X_scaled)).batch(len(X))
    return new_dataset

In [None]:
random_samples = random_samples(scaler, diabetes_data)

In [None]:
loaded_final_tuned_model.predict(random_samples)[:10]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step


array([[0.6710845 ],
       [0.07178123],
       [0.724851  ],
       [0.06417733],
       [0.86507714],
       [0.17141622],
       [0.09141503],
       [0.557759  ],
       [0.65300614],
       [0.05481444]], dtype=float32)