# MNIST with a Deep MLP

## Step 1 Load and Preprocess the MNIST Data

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
# Load the data and check shape and data type
(x_train_full, y_train_full), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train_full.shape, y_train_full.shape, x_train_full.dtype

In [None]:
# Rescale the data
x_train_full = x_train_full / 255.
x_test = x_test / 255.
x_train_full.dtype, x_test.dtype

In [None]:
# Print min and max as a sanity check
np.min(x_train_full), np.max(x_train_full)

In [None]:
# Create validation set
x_train = x_train_full[:-10_000]
y_train = y_train_full[:-10_000]
x_val = x_train_full[-10_000:]
y_val = y_train_full[-10_000:]
x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape

In [None]:
# Reshape the tensors
x_train = x_train.reshape(50_000, 28*28)
x_val = x_val.reshape(10_000, 28*28)
x_test = x_test.reshape(10_000, 28*28)

## Step 2: Build a MLP

In [None]:
def get_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=300, activation='relu',
                                    input_shape=[28*28]))
    model.add(tf.keras.layers.Dense(units=100, activation='relu'))
    model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

    return model

In [None]:
tf.keras.backend.clear_session()
model = get_model()
model.summary()

## Step 3: Compile the Model

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

## Step 4: Train the Model

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=5, restore_best_weights=True,
    monitor='val_loss')

model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "check_points", save_best_only=True)

In [None]:
from pathlib import Path
from time import strftime

def get_run_logdir(root_logdir="my_logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")

run_logdir = get_run_logdir()

tensorboard_cb = tf.keras.callbacks.TensorBoard(
    run_logdir,
    profile_batch=(100,200),
)

In [None]:
history = model.fit(x_train, y_train,
                    validation_data=(x_val, y_val),
                    batch_size=256,
                    epochs=5000,
                    callbacks=[early_stopping_cb, model_checkpoint_cb, tensorboard_cb])

In [None]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs

## Step 5: Evaluate the Model

In [None]:
model.evaluate(x_train, y_train, batch_size=512)

In [None]:
model.evaluate(x_val, y_val, batch_size=512)

In [None]:
model.evaluate(x_test, y_test, batch_size=512)

## Step 6: Try Different Learning Rates

In [None]:
def get_run_logdir(root_logdir="my_logs", lr=None, batch_size=None):
    lr_string = "" if lr is None else "_lr_" + str(lr)
    batch_size_string = "" if batch_size is None else "_batch_size_" + str(batch_size)
    return Path(root_logdir) / (strftime("run_%Y_%m_%d_%H_%M_%S") + lr_string + batch_size_string)

In [None]:
models = dict()
for lr in [0.01, 0.05, 0.1, 0.5]:
    for batch_size in [64, 128, 256, 512]:
        tf.keras.backend.clear_session()
        model = get_model()
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

        model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'],
        )
        run_logdir = get_run_logdir(lr=lr, batch_size=batch_size)

        tensorboard_cb = tf.keras.callbacks.TensorBoard(
            run_logdir,
            profile_batch=(100,200),
        )

        history = model.fit(x_train, y_train,
                    validation_data=(x_val, y_val),
                    batch_size=256,
                    epochs=5000,
                    callbacks=[early_stopping_cb, tensorboard_cb],
                    verbose=0)

        models[(lr, batch_size)] = model

        print(f"Trained model for {lr} with batch_size {batch_size}")




In [None]:
# Find the best model
best_acc = 0.0
best_model = None
best_key = None
for key, model in models.items():
    acc = model.evaluate(x_val, y_val,verbose=0)[1]
    if acc > best_acc:
        best_acc = acc
        best_model = model
        best_key = key

best_key, best_acc

In [None]:
best_model.evaluate(x_val, y_val)

In [None]:
# Evaluate the best model on the test set
best_model.evaluate(x_test, y_test)