In [37]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [38]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)


def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)


BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)


BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

In [59]:
# Define the create_model() function
def create_model(hidden_layer_size, dropout_rate, learning_rate):
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [60]:
# Define the hyperparameters to search over
hidden_layer_sizes = [100, 200]
dropout_rates = [0.2, 0.5]
learning_rates = [0.001, 0.01]

In [61]:
# Define variables to store the best hyperparameters and resulting accuracy
best_hidden_layer_size = None
best_dropout_rate = None
best_learning_rate = None
best_accuracy = 0

In [64]:
# Perform a grid search over the hyperparameters
for hidden_layer_size in hidden_layer_sizes:
    for dropout_rate in dropout_rates:
        for learning_rate in learning_rates:
            # Create the model
            model = create_model(hidden_layer_size, dropout_rate, learning_rate)
            # Train the model
            NUM_EPOCHS = 5
            model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), validation_steps=10, verbose=0)
            # Evaluate the model on the test data
            test_loss, test_accuracy = model.evaluate(test_data)
             # If the accuracy is better than the current best, update the best accuracy and the best hyperparameters
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                best_hidden_layer_size = hidden_layer_size
                best_dropout_rate = dropout_rate
                best_learning_rate = learning_rate




In [65]:
#Print the best hyperparameters and accuracy
print('Best accuracy: {}'.format(best_accuracy))
print('Best hidden layer size: {}'.format(best_hidden_layer_size))
print('Best dropout rate: {}'.format(best_dropout_rate))
print('Best learning rate: {}'.format(best_learning_rate))

Best accuracy: 0.984000027179718
Best hidden layer size: 500
Best dropout rate: 0.2
Best learning rate: 0.001
Best accuracy: 0.9807999730110168
Best hidden layer size: 500
Best dropout rate: 0.5
Best learning rate: 0.001
Best accuracy: 0.9843000173568726
Best hidden layer size: 500
Best dropout rate: 0.5
Best learning rate: 0.001
