## Download and prepare the dataset

In [1]:
from tensorflow import keras

In [2]:
# Download the dataset and split into train and test sets
(img_train, label_train), (
    img_test,
    label_test,
) = keras.datasets.fashion_mnist.load_data()

In [3]:
# Normalize pixel values between 0 and 1
img_train = img_train.astype("float32") / 255.0
img_test = img_test.astype("float32") / 255.0

## Baseline Performance

In [4]:
# Build the baseline model using the Sequential API
b_model = keras.Sequential()
b_model.add(keras.layers.Flatten(input_shape=(28, 28)))
b_model.add(
    keras.layers.Dense(units=512, activation="relu", name="dense_1")
)  # You will tune this layer later
b_model.add(keras.layers.Dropout(0.2))
b_model.add(keras.layers.Dense(10, activation="softmax"))

# Print model summary
b_model.summary()

Metal device set to: Apple M1 Max
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


2023-12-03 11:25:48.783392: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-03 11:25:48.783528: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
# Setup the training parameters
b_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

In [6]:
# Number of training epochs.
NUM_EPOCHS = 10

# Train the model
b_model.fit(img_train, label_train, epochs=NUM_EPOCHS, validation_split=0.2)

Epoch 1/10


2023-12-03 11:25:48.914700: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-12-03 11:25:49.042746: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-12-03 11:25:56.623082: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x29cccbee0>

In [7]:
# Evaluate model on the test set
b_eval_dict = b_model.evaluate(img_test, label_test, return_dict=True)

 37/313 [==>...........................] - ETA: 1s - loss: 0.3434 - accuracy: 0.8792

2023-12-03 11:27:15.484434: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [8]:
# Define helper function
def print_results(model, model_name, layer_name, eval_dict):
    """
    Prints the values of the hyparameters to tune, and the results of model evaluation

    Args:
      model (Model) - Keras model to evaluate
      model_name (string) - arbitrary string to be used in identifying the model
      layer_name (string) - name of the layer to tune
      eval_dict (dict) -  results of model.evaluate
    """
    print(f"\n{model_name}:")

    print(f"number of units in 1st Dense layer: {model.get_layer(layer_name).units}")
    print(f"learning rate for the optimizer: {model.optimizer.lr.numpy()}")

    for key, value in eval_dict.items():
        print(f"{key}: {value}")


# Print results for baseline model
print_results(b_model, "BASELINE MODEL", "dense_1", b_eval_dict)


BASELINE MODEL:
number of units in 1st Dense layer: 512
learning rate for the optimizer: 0.0010000000474974513
loss: 0.339042067527771
accuracy: 0.8808000683784485


## Keras Tuner

To perform hypertuning with Keras Tuner, you will need to:

* Define the model
* Select which hyperparameters to tune
* Define its search space
* Define the search strategy

In [9]:
import keras_tuner as kt
import tensorflow as tf

In [10]:
def model_builder(hp):
    """
    Builds the model and sets up the hyperparameters to tune.

    Args:
      hp - Keras tuner object

    Returns:
      model with hyperparameters to tune
    """

    # Initialize the Sequential API and start stacking the layers
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(28, 28)))

    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units = hp.Int("units", min_value=32, max_value=512, step=32)
    model.add(
        keras.layers.Dense(units=hp_units, activation="relu", name="tuned_dense_1")
    )

    # Add next layers
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(10, activation="softmax"))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )

    return model

In [11]:
# Instantiate the tuner
tuner = kt.Hyperband(
    model_builder,
    objective="val_accuracy",
    max_epochs=10,
    factor=3,
    directory="kt_dir",
    project_name="kt_hyperband",
)

In [12]:
tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [13]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)

In [14]:
# Perform hypertuning: This will take around 10 minutes to run.
tuner.search(
    img_train,
    label_train,
    epochs=NUM_EPOCHS,
    validation_split=0.2,
    callbacks=[stop_early],
)

Trial 30 Complete [00h 01m 13s]
val_accuracy: 0.8519166707992554

Best val_accuracy So Far: 0.89041668176651
Total elapsed time: 00h 17m 35s


In [15]:
# Get the optimal hyperparameters from the results
best_hps = tuner.get_best_hyperparameters()[0]

print(
    f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
"""
)


The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 320 and the optimal learning rate for the optimizer
is 0.001.



In [16]:
# Build the model with the optimal hyperparameters
h_model = tuner.hypermodel.build(best_hps)
h_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 tuned_dense_1 (Dense)       (None, 320)               251200    
                                                                 
 dropout_1 (Dropout)         (None, 320)               0         
                                                                 
 dense_1 (Dense)             (None, 10)                3210      
                                                                 
Total params: 254,410
Trainable params: 254,410
Non-trainable params: 0
_________________________________________________________________


In [17]:
# Train the hypertuned model
h_model.fit(img_train, label_train, epochs=NUM_EPOCHS, validation_split=0.2)

Epoch 1/10
  20/1500 [..............................] - ETA: 8s - loss: 1.4582 - accuracy: 0.4984

2023-12-03 11:44:52.493372: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-12-03 11:44:59.990029: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2f68dd820>

In [18]:
# Evaluate the hypertuned model against the test set
h_eval_dict = h_model.evaluate(img_test, label_test, return_dict=True)

 37/313 [==>...........................] - ETA: 1s - loss: 0.3640 - accuracy: 0.8750

2023-12-03 11:46:17.219840: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [19]:
# Print results of the baseline and hypertuned model
print_results(b_model, "BASELINE MODEL", "dense_1", b_eval_dict)
print_results(h_model, "HYPERTUNED MODEL", "tuned_dense_1", h_eval_dict)


BASELINE MODEL:
number of units in 1st Dense layer: 512
learning rate for the optimizer: 0.0010000000474974513
loss: 0.339042067527771
accuracy: 0.8808000683784485

HYPERTUNED MODEL:
number of units in 1st Dense layer: 320
learning rate for the optimizer: 0.0010000000474974513
loss: 0.3593907654285431
accuracy: 0.876300036907196
