In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input
import keras_tuner as kt
import numpy as np

2025-02-16 22:00:20.136034: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-16 22:00:20.143813: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739739620.152496   27282 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739739620.155052   27282 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-16 22:00:20.165160: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
print("Num GPUs Available:", len(tf.config.experimental.list_physical_devices('GPU')))
print("Physical Devices:", tf.config.list_physical_devices())

Num GPUs Available: 1
Physical Devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
class MyHyperband(kt.Hyperband):
    """
    A custom Hyperband tuner that, after each trial, builds the model
    from the trial’s hyperparameters and logs its parameter count.
    
    (This is one approach to letting the tuner “see” both validation accuracy
     and model size so that it can search for an architecture that is both
     accurate and compact.)
    """
    def run_trial(self, trial, *args, **kwargs):
        # Run the standard training for this trial.
        logs = {}
        super(MyHyperband, self).run_trial(
            trial, *args, 
            callbacks=[tf.keras.callbacks.LambdaCallback(
                on_epoch_end=lambda epoch, log: logs.update(log)
            )], 
            **kwargs
        )
        
        # Ensure val_accuracy is present
        if "val_accuracy" in logs:
            val_accuracy = logs["val_accuracy"]
        else:
            val_accuracy = 0.0  # Default to zero if not found (prevents crash)

        # Build the model from the trial’s hyperparameters (we don’t need to train it here)
        model = self.hypermodel.build(trial.hyperparameters)
        param_count = model.count_params()
        
        # Update the trial with the parameter count metric.
        # (We assume that the tuner was set up to track a metric named "param_count".)
        goodness=val_accuracy-0.00001*param_count
        self.oracle.update_trial(
            trial.trial_id, {
                "goodness": goodness
            }
        )
        return {
            "goodness": goodness
        }

In [4]:
def build_model(hp):
    """
    Build a ResNet-style model with hyperparameters:
      - num_blocks: number of residual blocks
      - filters_i: number of filters in each block (for block i)
      - learning_rate: learning rate for Adam optimizer
    """
    inputs = Input(shape=(32, 32, 3))
    
    # Initial convolution layer
    x = layers.Conv2D(32, kernel_size=3, padding="same", activation="relu")(inputs)
    
    # Choose number of residual blocks (e.g., 2 to 4)
    num_blocks = hp.Int("num_blocks", min_value=2, max_value=4, step=1, default=3)
    for i in range(num_blocks):
        # For each block, choose the number of filters (e.g., 32, 64, 96, or 128)
        filters = hp.Int(f"filters_{i}", min_value=32, max_value=128, step=32, default=64)
        shortcut = x  # save input for the skip connection
        
        # First convolution in block
        y = layers.Conv2D(filters, kernel_size=3, padding="same", activation="relu")(x)
        y = layers.BatchNormalization()(y)
        # Second convolution in block (no activation until after adding the shortcut)
        y = layers.Conv2D(filters, kernel_size=3, padding="same")(y)
        y = layers.BatchNormalization()(y)
        
        # If the number of channels does not match, adjust the shortcut
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, kernel_size=1, padding="same")(shortcut)
        # Add the shortcut (residual connection)
        x = layers.Add()([shortcut, y])
        x = layers.Activation("relu")(x)
    
    # Global pooling and output classification layer
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(10, activation="softmax")(x)
    
    model = models.Model(inputs, outputs)
    
    # Compile the model – note that the learning rate is also a hyperparameter
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        ),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

In [5]:
tuner = MyHyperband(
    build_model,
    objective=kt.Objective("goodness", direction="max"),
    max_epochs=10,
    factor=3,
    directory="my_dir",
    project_name="resnet_tuner"
)

# -------------------------------------------
# 4. Load and Prepare the Dataset (CIFAR-10)
# -------------------------------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normalize pixel values
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

# Create a validation set from the tail of the training data
x_val = x_train[-5000:]
y_val = y_train[-5000:]
x_train = x_train[:-5000]
y_train = y_train[:-5000]

# -------------------------------------------
# 5. Run the Hyperparameter Search
# -------------------------------------------
tuner.search(x_train, y_train, epochs=10, validation_data=(x_val, y_val))

# Retrieve the best hyperparameters and build the best model
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.hypermodel.build(best_hp)

# Show the model summary (including parameter count)
best_model.summary()

# Optionally, retrain the best model for more epochs
history = best_model.fit(x_train, y_train, epochs=20, validation_data=(x_val, y_val))

# -------------------------------------------
# 6. Evaluate the Model on Test Data
# -------------------------------------------
test_loss, test_acc = best_model.evaluate(x_test, y_test)
print("Test accuracy:", test_acc)

# -------------------------------------------
# 7. Inference: Predict on a New Sample
# -------------------------------------------
# For demonstration, take one sample from the test set.
sample = x_test[0:1]  # shape: (1, 32, 32, 3)
predictions = best_model.predict(sample)
predicted_class = np.argmax(predictions, axis=1)
print("Predicted class:", predicted_class)

Trial 30 Complete [00h 01m 04s]
goodness: -0.7721799963760376

Best goodness So Far: 0.18409998674392697
Total elapsed time: 00h 26m 52s


Epoch 1/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.2782 - loss: 1.9940 - val_accuracy: 0.3694 - val_loss: 1.6629
Epoch 2/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4347 - loss: 1.6152 - val_accuracy: 0.4768 - val_loss: 1.4802
Epoch 3/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4795 - loss: 1.4901 - val_accuracy: 0.4854 - val_loss: 1.4387
Epoch 4/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5060 - loss: 1.4042 - val_accuracy: 0.4882 - val_loss: 1.4648
Epoch 5/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5317 - loss: 1.3513 - val_accuracy: 0.4762 - val_loss: 1.4771
Epoch 6/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5485 - loss: 1.2984 - val_accuracy: 0.5320 - val_loss: 1.3071
Epoch 7/20
[1m1

In [6]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)


{'num_blocks': 2, 'filters_0': 32, 'filters_1': 32, 'filters_2': 32, 'learning_rate': 0.0001, 'filters_3': 64, 'tuner/epochs': 10, 'tuner/initial_epoch': 4, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0012'}
