In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import optuna

Load and preprocess the MNIST dataset

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, x_test = x_train[..., tf.newaxis], x_test[..., tf.newaxis]

Define the CNN model

In [None]:
basemodel = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

Optuna hyperparameters optimisation

In [None]:

def optimize_model(x_train, y_train):
    def objective(trial):
        # Define the hyperparameters to optimize
        num_filters = trial.suggest_int('num_filters', 16, 128)
        kernel_size = trial.suggest_int('kernel_size', 3, 5)
        activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
        
        # Create the model with the specified hyperparameters
        model = basemodel(num_filters, kernel_size, activation)
        
        # Compile the model with an Adam optimizer and a categorical cross-entropy loss function
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        
        # Train the model on the training data, using a validation split
        history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)
        
        # Extract the validation accuracy from the training history
        val_acc = history.history['val_accuracy'][-1]
        
        return val_acc
    
    # Create a new study and run the optimization
    study = optuna.create_study()
    study.optimize(objective, n_trials=100)
    
    # Extract the best hyperparameters from the optimization
    best_params = study.best_params
    
    # Create and return the best model based on the optimized hyperparameters
    return basemodel(best_params['num_filters'], best_params['kernel_size'], best_params['activation'])

Define a callback to save the model's weights after each epoch

In [None]:
checkpoint_path = 'mnist_cnn.ckpt'
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, verbose=1)

Train the model on the training data, using a validation split and the checkpoint callback

In [None]:
history = basemodel.fit(x_train, y_train, epochs=10, validation_split=0.2, callbacks=[checkpoint])

Plot the training and validation accuracy over time

In [None]:
plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')