# MNIST Digit Classification
We will be training a CNN to classify images of handwritten digits from the MNIST dataset. Then, we will use this on our own handwritten digits.

First, let's import some libraries

In [None]:
# let's keep our Tensorflow logging to a minimum
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

from IPython.display import Image 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow import keras
from tensorflow.keras import layers

## Importing dataset

The MNIST dataset is built into Keras. From the documentation [here](https://keras.io/api/datasets/mnist/), we can get some important info. The MNIST dataset is:
> a dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images.

From the [MNIST homepage](http://yann.lecun.com/exdb/mnist/),
> The digits have been size-normalized and centered in a fixed-size image. The images were centered in a 28x28 image by computing the center of mass of the pixels, and translating the image so as to position this point at the center of the 28x28 field.

Internally, the dataset is stored as numpy array files.

The pixel values of each image range from 0 to 255, so we will have to scale it to `[0,1]` to normalize.

Additionally, since the images are grayscale (single channel), we should reshape the data to have a single colour channel. Normally, images used in a CNN have 3 channels for RGB

In [None]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1) # the shape of a single sample/image not including batchsize is (height, width, num of channels)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
#TODO maybe move image normalization to Keras Model?

assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,) # a NumPy array of digit labels (0-9)
assert y_test.shape == (10000,) # a NumPy array of digit labels (0-9)

# Reshape image for single colour channel
# Change images to have shape (28, 28, 1) instead of (28,28), by appending a dimension
# only needed when Keras model does not use the Reshape layer
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "training samples")
print(x_test.shape[0], "test samples")

Let's also view some of the images

In [None]:
fig = plt.figure(figsize=(8, 8), dpi=80)
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.tight_layout()
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    # plot raw pixel data
    plt.imshow(x_train[i], cmap='gray_r', interpolation='none')
    plt.title(f"Digit: {y_train[i]}")

## Model definition
Next, let's define the CNN model using Keras' sequential API

In [None]:
model = keras.Sequential(
    [
        keras.layers.InputLayer(input_shape=input_shape),
        # keras.layers.Reshape(target_shape=input_shape+(1,)), # Reshape to (batch_size, 28, 28, 1),
        layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation="relu", padding="same"),
        layers.MaxPooling2D(pool_size=(2, 2), padding='valid'),
        layers.BatchNormalization(),
        layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation="relu", padding="same"),
        layers.MaxPooling2D(pool_size=(2, 2), padding='valid'),        
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dense(100,activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

print(model.summary())
keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True,
)
Image(retina=True, filename='model.png')

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
# sparse_categorical_crossentropy accepts non-o.h.e values for y_true

callbacks = [
    keras.callbacks.EarlyStopping( # runs at end of every epoch
        # Stop training when `val_loss` is no longer improving
        monitor="val_loss",
        min_delta=1e-2, # no longer improving means when "no better than 1e-2 less"
        patience=2, # if no longer improving for at least 2 epochs"
        verbose=1,
    ),
    keras.callbacks.TensorBoard(
        log_dir="tensorboard_logs",
        histogram_freq=1,  # log histogram visualizations once per epoch
        update_freq="epoch",  # write logs once per epoch
    )
    
]

## Training

We can also load tensorboard, to view live plots as our model trains.

In [None]:
# To view tensorboard within this notebook, uncomment the following section:
%load_ext tensorboard
%tensorboard --logdir tensorboard_logs

Now, training the model for 15 epochs

In [None]:
batch_size = 64
epochs = 2 # 15
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
                        callbacks=callbacks,validation_split=0.1)
model.save('saved_models')

In [None]:
# convert the training history to a dataframe
history_df = pd.DataFrame(history.history)
# Pandas built-in methods to quickly plot the data
# history_df.loc[:, ['loss', 'val_loss']].plot();
# history_df.loc[:, ['accuracy', 'val_accuracy']].plot();

In [None]:
# plotting the metrics
fig = plt.figure(figsize=(8, 8), dpi=80)
plt.subplot(2,1,1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')

plt.subplot(2,1,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

plt.tight_layout()

# Evaluate on Test data

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

# Make predictions on Test data

In [None]:
# Generate predictions
print("Generate predictions for test samples")
predictions = model.predict(x_test)
print("predictions shape (samples, probabilities):", predictions.shape)
predictions[0] # each entry in predictions tensor has a list of probabilities of the digit being 0,1,...9

In [None]:
# We can check if a single prediction matches the ground truth in this way:
print(np.argmax(predictions[0]))
print(y_test[0])

In [None]:
# Helper function from https://www.tensorflow.org/tutorials/keras/classification#verify_predictions
def plot_image(i, predictions_array, true_label, img):
    true_label, img = true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
  
    plt.imshow(img, cmap=plt.cm.binary)
  
    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
      color = 'blue'
    else:
      color = 'red'
  
    plt.xlabel("{} {:2.0f}% ({})".format(predicted_label,
                                100*np.max(predictions_array),
                                true_label),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
    true_label = true_label[i]
    plt.grid(False)
    plt.xticks(range(10))
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)
  
    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')
  

In [None]:
i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions[i], y_test, x_test)
plt.subplot(1,2,2)
plot_value_array(i, predictions[i],  y_test)
plt.show()

In [None]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, predictions[i], y_test, x_test)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, predictions[i], y_test)
plt.tight_layout()
plt.show()

# Using the trained model on a single test image

In [None]:
# Grab an image from the test dataset.
img = x_test[1]
print(img.shape)

# Add the image to a batch where it's the only member, by reshaping
# tf.keras models make predictions on a batch of samples at once.
img = (np.expand_dims(img,0))
print(img.shape)

In [None]:
predictions_single = model.predict(img)
plot_value_array(1, predictions_single[0], y_test)
np.argmax(predictions_single[0])

In [None]:
#TODO: use model.predict() on our own image