In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt


In [2]:
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x  # Save input for skip connection

    if x.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding="same")(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Conv2D(filters, kernel_size=kernel_size, strides=stride, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters, kernel_size=kernel_size, strides=1, padding="same")(x)
    x = layers.BatchNormalization()(x)

    x = layers.add([x, shortcut])
    x = layers.ReLU()(x)

    return x


In [3]:
def build_resnet_tuned(hp):
    inputs = layers.Input(shape=(32, 32, 1))

    # Initial convolution layer
    x = layers.Conv2D(
        filters=hp.Choice('initial_filters', [32, 64, 128]),
        kernel_size=7,
        strides=2,
        padding="same"
    )(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding="same")(x)

    # Add residual blocks
    num_blocks = hp.Int('num_blocks', 2, 6)
    filters = hp.Choice('block_filters', [64, 128, 256])
    for _ in range(num_blocks):
        x = residual_block(x, filters=filters)

    # Global average pooling
    x = layers.GlobalAveragePooling2D()(x)

    # Fully connected output layer
    outputs = layers.Dense(10, activation="softmax")(x)

    # Create the model
    model = models.Model(inputs, outputs)

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-3, 1e-4, 1e-5])
        ),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


In [4]:
tuner = kt.Hyperband(
    build_resnet_tuned,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    directory='resnet_tuning',
    project_name='mnist_resnet'
)


In [5]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))
x_train = tf.image.resize(x_train, (32, 32))
x_test = tf.image.resize(x_test, (32, 32))

x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)


In [6]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=3, restore_best_weights=True
)

tuner.search(
    x_train, y_train,
    epochs=15,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping]
)


Trial 30 Complete [00h 16m 26s]
val_accuracy: 0.9907000064849854

Best val_accuracy So Far: 0.9950000047683716
Total elapsed time: 01h 10m 35s


In [7]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best hyperparameters: {best_hps.values}")


Best hyperparameters: {'initial_filters': 128, 'num_blocks': 3, 'block_filters': 256, 'learning_rate': 0.001, 'tuner/epochs': 10, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [8]:
best_model = tuner.hypermodel.build(best_hps)

best_model.fit(
    x_train, y_train,
    epochs=15,
    batch_size=64,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping]
)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15


<keras.callbacks.History at 0x1e08be16fd0>

In [9]:
best_model.save('best_resnet_model.h5')
test_loss, test_acc = best_model.evaluate(x_test, y_test)
print(f"Test accuracy with tuned model: {test_acc}")


Test accuracy with tuned model: 0.9909999966621399
