In [1]:
import kerastuner as kt
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

npz_data = np.load('data/audiobooks_data_split.npz')

X_train = tf.convert_to_tensor(npz_data['X_train'], dtype=tf.float64)
X_val = tf.convert_to_tensor(npz_data['X_val'], dtype=tf.float64)
X_test = tf.convert_to_tensor(npz_data['X_test'], dtype=tf.float64)
y_train = tf.convert_to_tensor(npz_data['y_train'], dtype=tf.int8)
y_val = tf.convert_to_tensor(npz_data['y_val'], dtype=tf.int8)
y_test = tf.convert_to_tensor(npz_data['y_test'], dtype=tf.int8)

print('Data successfully loaded:')
print(f'{X_train.shape=}, {X_val.shape=}, {X_test.shape=}')
print(f'{y_train.shape=}, {y_val.shape=}, {y_test.shape=}')

  import kerastuner as kt


Data successfully loaded:
X_train.shape=TensorShape([3579, 10]), X_val.shape=TensorShape([447, 10]), X_test.shape=TensorShape([448, 10])
y_train.shape=TensorShape([3579]), y_val.shape=TensorShape([447]), y_test.shape=TensorShape([448])


In [2]:
def build_model(hp):
    """Model building function."""

    # Define hyperparameters to tune
    num_hidden_layers = hp.Int('num_layers', min_value=2, max_value=5, default=3)
    hidden_layer_size = hp.Int('hidden_layer_size', min_value=16, max_value=128, step=16, default=64)
    activation = hp.Choice('activation', values=['relu', 'tanh', 'selu'], default='relu')
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, default=0.2, step=0.05)
    l2_strength = hp.Float('l2_strength', min_value=1e-5, max_value=1e-2, default=0.001, step=1e-5)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, default=1e-3, step=1e-4)

    # Build the model
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(10,)))
    for _ in range(num_hidden_layers):
        model.add(
            tf.keras.layers.Dense(
                hidden_layer_size,
                activation=activation,
                kernel_regularizer=tf.keras.regularizers.l2(l2_strength)
            )
        )
        # Dropout layer after each Dense layer
        model.add(tf.keras.layers.Dropout(rate=dropout_rate))
    model.add(tf.keras.layers.Dense(2,activation='softmax',))

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


In [None]:
### Create and run the tuner ###

MAX_EPOCHS = 200

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
)

tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=MAX_EPOCHS,
    factor=2,
    directory='tuning_results',
    project_name='undersample_balance_approach'
)

tuner.search(
    X_train, y_train,
    epochs=MAX_EPOCHS,
    batch_size=128,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=2
)


Trial 689 Complete [00h 00m 11s]
val_loss: 0.43276312947273254

Best val_loss So Far: 0.33202415704727173
Total elapsed time: 01h 04m 55s


In [58]:
### Get the best model, hyperparameters, and metrics ###

# Best model and hyperparameters
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters: ", best_hyperparameters.values)

# Metrics
best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]
best_metrics = best_trial.metrics
train_loss = best_metrics.get_history('loss')[-1].value[0]
train_accuracy = best_metrics.get_history('accuracy')[-1].value[0]
val_loss = best_metrics.get_history('val_loss')[-1].value[0]
val_accuracy = best_metrics.get_history('val_accuracy')[-1].value[0]
last_epoch = best_metrics.get_history('loss')[-1].step

# Display best hp and metrics
print(f'Final Results after {last_epoch} epochs:')
print(f'Accuracy: Train={train_accuracy * 100:.2f}% | Val={val_accuracy * 100:.2f}%')
print(f'Loss: Train={train_loss:.4f} | Val={val_loss:.4f}')

Best Hyperparameters:  {'num_layers': 3, 'hidden_layer_size': 80, 'activation': 'selu', 'dropout_rate': 0.05, 'l2_strength': 5e-05, 'learning_rate': 0.0074, 'tuner/epochs': 200, 'tuner/initial_epoch': 100, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0637'}
Final Results after 8 epochs:
Accuracy: Train=79.74% | Val=81.66%
Loss: Train=0.3709 | Val=0.3320


In [80]:
### Exploring model viabilities for ensembling ###

num_to_explore = 10
best_models = tuner.get_best_models(num_models=num_to_explore)
best_trials = tuner.oracle.get_best_trials(num_trials=num_to_explore)
best_hps = tuner.get_best_hyperparameters(num_trials=num_to_explore)
for i in range(num_to_explore):
    trial_i = best_trials[i]
    hp_i = best_hps[i]
    loss_i = trial_i.metrics.get_history('loss')[-1].value[0]
    val_loss_i = trial_i.metrics.get_history('val_loss')[-1].value[0]
    print('-------------------------')
    print(f'#{i + 1} Best Trial Results')
    print(f'Training Loss: {loss_i:.4f}')
    print(f'Validation Loss: {val_loss_i:.4f}')
    print(f'Train - Val Loss: {loss_i - val_loss_i:.4f}')
    print(f'Hyperparameters: {hp_i.values}')


  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))


-------------------------
#1 Best Trial Results
Training Loss: 0.3709
Validation Loss: 0.3320
Train - Val Loss: 0.0388
Hyperparameters: {'num_layers': 3, 'hidden_layer_size': 80, 'activation': 'selu', 'dropout_rate': 0.05, 'l2_strength': 5e-05, 'learning_rate': 0.0074, 'tuner/epochs': 200, 'tuner/initial_epoch': 100, 'tuner/bracket': 3, 'tuner/round': 3, 'tuner/trial_id': '0637'}
-------------------------
#2 Best Trial Results
Training Loss: 0.3605
Validation Loss: 0.3345
Train - Val Loss: 0.0260
Hyperparameters: {'num_layers': 2, 'hidden_layer_size': 48, 'activation': 'relu', 'dropout_rate': 0.1, 'l2_strength': 0.00028000000000000003, 'learning_rate': 0.0047, 'tuner/epochs': 200, 'tuner/initial_epoch': 100, 'tuner/bracket': 6, 'tuner/round': 6, 'tuner/trial_id': '0452'}
-------------------------
#3 Best Trial Results
Training Loss: 0.3632
Validation Loss: 0.3356
Train - Val Loss: 0.0276
Hyperparameters: {'num_layers': 3, 'hidden_layer_size': 80, 'activation': 'selu', 'dropout_rate': 0

In [None]:
### Ensemble models ###

num_to_ensemble = 4

# Get predicted class probabilities and stack them
probabilities = [model.predict(X_val)[:, 1] for model in best_models[:num_to_ensemble]]
probabilities_stack = np.stack(probabilities, axis=0)

# Average probabilities across models and convert to 0 or 1
avg_probabilities = np.mean(probabilities_stack, axis=0)
final_predictions = (avg_probabilities > 0.5).astype(int)

# Compute the ensemble's accuracy
ensemble_accuracy = np.mean(final_predictions == y_val)
print(f"Ensemble Accuracy: {ensemble_accuracy:.4f}")

# Calculate ensemble's val_loss using sparse categorical crossentropy
ensemble_predictions = np.stack([1 - avg_probabilities, avg_probabilities], axis=-1)
ensemble_loss = tf.keras.losses.sparse_categorical_crossentropy(
    y_val, ensemble_predictions
).numpy()
avg_val_loss = np.mean(ensemble_loss)
print(f"Ensemble Validation Loss: {avg_val_loss:.4f}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Ensemble Accuracy: 0.8300
Ensemble Validation Loss: 0.3211
