In [1]:
!pip install keras-tuner





In [2]:
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
print(f"TensorFlow Version: {tf.__version__}")
print(f"KerasTuner Version: {kt.__version__}")

TensorFlow Version: 2.9.1
KerasTuner Version: 1.1.2


In [4]:
# Load and split data into train and test sets
(X_train, y_train), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [5]:
# Normalize pixels to values between 0 and 1
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Baseline Performance

As mentioned, we will first train a shallow dense neural network (DNN) with preselected
hyperparameters giving us a baseline performance. We'll see later on how simple models, like this
our shallow DNN, can take some time to tune.

In [6]:
# Build baseline model with Sequential API
b_model = keras.Sequential()
b_model.add(keras.layers.Flatten(input_shape=(28,28)))
b_model.add(keras.layers.Dense(units=512, activation='relu', name='dense_1'))
b_model.add(keras.layers.Dropout(0.2))
b_model.add(keras.layers.Dense(10, activation='softmax'))
# Print model summary
b_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [7]:
# Set training parameters
b_model.compile(optimizer=keras.optimizers.Adam(lr=0.001),
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])

  super(Adam, self).__init__(name, **kwargs)


In [9]:
# Number of epochs
NUM_EPOCHS = 20
# Early stopping set after 5 epochs
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
# Train model
b_model.fit(X_train, y_train, epochs=NUM_EPOCHS, validation_split=0.2, callbacks=[stop_early])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2b3cdeb3820>

In [10]:
b_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               401920    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [11]:
import pandas as pd
def evaluate_model(model, X_test, y_test):
    eval_dict = model.evaluate(X_test, y_test, return_dict=True)
    display_df = pd.DataFrame([eval_dict.values()], columns=[list(eval_dict.keys())])
    return display_df

results = evaluate_model(b_model, X_test, y_test)
results.index = ['Baseline']
results.head()



Unnamed: 0,loss,accuracy
Baseline,0.364314,0.8849


There's the results for a single set of hyperparameters. Imagine trying out different learning rates,
dropout percentages, number of hidden layers, and number of neurons in each hidden layer. As you
can see, manual hypertuning is simply not feasible nor scalable. In the next section you'll see how
Keras Tuner solves these problems simply by automating the process and searching the
hyperparameter space in an efficient way.

# Keras Tuner
Keras Tuner is a simple, distributable hyperparameter optimization framework that automates the
painful process of manually searching for optimal hyperparameters. Keras Tuner comes with
Random Search, Hyperband, and Bayesian Optimization built-in search algorithms, and is designed
to fit many use cases including:
* Distributed tuning
* Custom training loops (e.g., GANs, reinforcement learning, etc.)
* Adding hyperparameters outside of the model building function (preprocessing, data
augmentation, test time augmentation, etc.)

These processes are outside the scope of this write-up, but feel free to read more in the official
documentation. There are four steps to hypertune our shallow DNN using Keras Tuner:
1. Define the model
2. Specify which hyperparameters to tune
3. Define the search space
4. Define the search algorithm

In [13]:
def build_model(hp):

    # Initialize sequential API and start building model.
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(28,28)))
    # Tune the number of hidden layers and units in each.
    # Number of hidden layers: 1 - 5
    # Number of Units: 32 - 512 with stepsize of 32
    for i in range(1, hp.Int("num_layers", 2, 6)):
        model.add(
        keras.layers.Dense(
        units=hp.Int("units_" + str(i), min_value=32, max_value=512, step=32),
        activation="relu")
    )
        # Tune dropout layer with values from 0 - 0.3.
        model.add(keras.layers.Dropout(hp.Float("dropout_" + str(i), 0, 0.3, step=0.1)))
    # Add output layer.
    model.add(keras.layers.Dense(units=10, activation="softmax"))
    # Tune learning rate for Adam optimizer with values from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.SparseCategoricalCrossentropy(),
                metrics=["accuracy"])
    return model

In [14]:
# Instantiate the tuner
tuner = kt.Hyperband(build_model,
objective="val_accuracy",
max_epochs=20,
factor=3,
hyperband_iterations=1,
directory="kt_dir",
project_name="kt_hyperband",
overwrite=True)

In [15]:
# Display search space summary
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 6, 'step': 1, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
dropout_1 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.3, 'step': 0.1, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [16]:
# This cell takes a long time to run when hyperband_iterations is large
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(X_train, y_train, epochs=NUM_EPOCHS, validation_split=0.2, callbacks=[stop_early])

Trial 30 Complete [00h 00m 48s]
val_accuracy: 0.8523333072662354

Best val_accuracy So Far: 0.8952500224113464
Total elapsed time: 00h 26m 27s
INFO:tensorflow:Oracle triggered exit


In [17]:
# Get the optimal hyperparameters from the results
best_hps=tuner.get_best_hyperparameters()[0]

h_model = tuner.hypermodel.build(best_hps)
h_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 480)               376800    
                                                                 
 dropout_2 (Dropout)         (None, 480)               0         
                                                                 
 dense_4 (Dense)             (None, 160)               76960     
                                                                 
 dropout_3 (Dropout)         (None, 160)               0         
                                                                 
 dense_5 (Dense)             (None, 448)               72128     
                                                                 
 dropout_4 (Dropout)         (None, 448)              

In [18]:
# Train the hypertuned model
h_model.fit(X_train, y_train, epochs=NUM_EPOCHS, validation_split=0.2, callbacks=[stop_early])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.callbacks.History at 0x2b3d44b5490>

In [19]:
hyper_df = evaluate_model(h_model, X_test, y_test)
hyper_df.index = ["Hypertuned"]
results.append(hyper_df)



  results.append(hyper_df)


Unnamed: 0,loss,accuracy
Baseline,0.364314,0.8849
Hypertuned,0.330538,0.8859
