In [1]:
# Importing the relevant packages
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorboard.plugins.hparams import api as hp

## Downloading and preprocessing the data

In [3]:
# Defining some constants/hyperparameters
BUFFER_SIZE = 70_000 # for reshuffling
BATCH_SIZE = 128
NUM_EPOCHS = 10

In [4]:
# Downloading the MNIST dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [5]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [6]:
# Creating a function to scale our data
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.

    return image, label

In [7]:
# Scaling the data
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [8]:
# Defining the size of the validation set
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [9]:
# Defining the size of the test set
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [10]:
# Reshuffling the dataset
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

In [11]:
# Splitting the dataset into trainig + validation
train_data = train_and_validation_data.skip(num_validation_samples)
validation_data = train_and_validation_data.take(num_validation_samples)

In [12]:
# Batching the data
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = mnist_test.map(scale).batch(num_test_samples)

## Defining hyperparameters

In [14]:
# Defining the hypermatarest we would test and their range
HP_FILTER_SIZE = hp.HParam('filter_size', hp.Discrete([3,5]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))

METRIC_ACCURACY = 'accuracy'

# Logging setup info
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_FILTER_SIZE, HP_OPTIMIZER],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

## Cerating functions for training our model and for logging purposes

In [16]:
# Wrapping our model and training in a function
def train_test_model(hparams):
    
    # Outlining the model/architecture of our CNN
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(50, hparams[HP_FILTER_SIZE], activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
        tf.keras.layers.Conv2D(50, hparams[HP_FILTER_SIZE], activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2,2)), 
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10)
    ])
    
    # Defining the loss function
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    # Compiling the model with parameter value for the optimizer
    model.compile(optimizer=hparams[HP_OPTIMIZER], loss=loss_fn, metrics=['accuracy'])
    
    # Defining early stopping to prevent overfitting
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor = 'val_loss',
        mode = 'auto',
        min_delta = 0,
        patience = 2,
        verbose = 0, 
        restore_best_weights = True
    )
    
    # Training the model
    model.fit(
        train_data, 
        epochs = NUM_EPOCHS,
        callbacks = [early_stopping],
        validation_data = validation_data,
        verbose = 2
    )
    
    _, accuracy = model.evaluate(test_data)
    
    return accuracy

In [17]:
# Creating a function to log the resuls
def run(log_dir, hparams):
    
    with tf.summary.create_file_writer(log_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

## Training the model with the different hyperparameters

In [19]:
# Performing a grid search on the hyperparameters we need to test
session_num = 0

for filter_size in HP_FILTER_SIZE.domain.values:
    for optimizer in HP_OPTIMIZER.domain.values:
    
        hparams = {
            HP_FILTER_SIZE: filter_size,
            HP_OPTIMIZER: optimizer
        }
        run_name = "run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs/hparam_tuning/' + run_name, hparams)

        session_num += 1

--- Starting trial: run-0
{'filter_size': 3, 'optimizer': 'adam'}
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


422/422 - 22s - 51ms/step - accuracy: 0.9137 - loss: 0.3006 - val_accuracy: 0.9772 - val_loss: 0.0854
Epoch 2/10
422/422 - 20s - 47ms/step - accuracy: 0.9763 - loss: 0.0776 - val_accuracy: 0.9833 - val_loss: 0.0550
Epoch 3/10
422/422 - 18s - 42ms/step - accuracy: 0.9826 - loss: 0.0570 - val_accuracy: 0.9880 - val_loss: 0.0382
Epoch 4/10
422/422 - 18s - 42ms/step - accuracy: 0.9852 - loss: 0.0479 - val_accuracy: 0.9885 - val_loss: 0.0431
Epoch 5/10
422/422 - 17s - 41ms/step - accuracy: 0.9878 - loss: 0.0394 - val_accuracy: 0.9902 - val_loss: 0.0342
Epoch 6/10
422/422 - 18s - 43ms/step - accuracy: 0.9893 - loss: 0.0349 - val_accuracy: 0.9892 - val_loss: 0.0339
Epoch 7/10
422/422 - 18s - 42ms/step - accuracy: 0.9902 - loss: 0.0308 - val_accuracy: 0.9923 - val_loss: 0.0283
Epoch 8/10
422/422 - 19s - 44ms/step - accuracy: 0.9913 - loss: 0.0276 - val_accuracy: 0.9948 - val_loss: 0.0185
Epoch 9/10
422/422 - 19s - 46ms/step - accuracy: 0.9919 - loss: 0.0249 - val_accuracy: 0.9955 - val_loss: 0

## Visualizing the hyperparameter results with Tensorboard

In [21]:
# Loading the Tensorboard extension
%load_ext tensorboard
%tensorboard --logdir logs/hparam_tuning --port 6006


ERROR: Failed to launch TensorBoard (exited with 4294967295).
Contents of stderr:
2025-01-26 13:39:12.486421: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-26 13:39:13.727727: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
E0126 13:39:18.142985 13220 program.py:300] TensorBoard could not bind to port 6006, it was already in use
ERROR: TensorBoard could not bind to port 6006, it was already in use