In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline

# Load data

In [2]:
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
X_train.shape

(60000, 28, 28)

In [4]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])
# reshaping into a 1D array

In [7]:
from tensorflow.keras.utils import to_categorical

# change the label from single int to an array 
# in accordance to the label
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

y_test

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

# Build the model

## Starting with a really simple model

In [8]:
from datetime import datetime
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import TensorBoard

In [9]:
model_simple = Sequential([
    Dense(50, activation='relu', input_shape=(28*28,)),
    Dense(10, activation='softmax')
])
model_simple.compile(loss='categorical_crossentropy',
                     optimizer=SGD(lr=1e-3),
                     metrics=['accuracy'])

Here we use `softmax` activation function for a multilabel classification as well as the loss of `categorical_crossentropy`.

In [11]:
logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M")
tensorboard_callback = TensorBoard(log_dir=logdir)

In [12]:
training_history = model_simple.fit(
    X_train,
    y_train,
    batch_size=32,
    verbose=1,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## More layers in the model

In [13]:
model = Sequential(
    [
        Dense(10, activation='relu', input_shape=(28*28,)),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax'),
    ]
)

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=1e-3),
              metrics=['accuracy'])

In [14]:
logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M")
tensorboard_callback = TensorBoard(log_dir=logdir)

In [15]:
training_history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    verbose=1,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Early Stopping

In [17]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss', mode='min',
                   verbose=1, patience=5)

In [18]:
model = Sequential(
    [
        Dense(128, activation='relu', input_shape=(28*28,)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax'),
    ]
)

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=1e-1),
              metrics=['accuracy'])

logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M")
tensorboard_callback = TensorBoard(log_dir=logdir)

training_history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    verbose=1,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[tensorboard_callback, es]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 00005: early stopping


## Dropouts

In [20]:
from tensorflow.keras.layers import Dropout

model = Sequential(
    [
        Dense(10, activation='relu', input_shape=(28*28,)),
        Dropout(0.2),
        Dense(256, activation='relu'),
        Dropout(0.2),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(10, activation='softmax'),
    ]
)

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=1e-3),
              metrics=['accuracy'])

logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M")
tensorboard_callback = TensorBoard(log_dir=logdir)

training_history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    verbose=1,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[tensorboard_callback, es]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Regularization

In [21]:
model = Sequential(
    [
        Dense(10, activation='relu', input_shape=(28*28,)),
        Dense(128, activation='relu', kernel_regularizer='l2'),
        Dense(256, activation='relu', kernel_regularizer='l2'),
        Dense(128, activation='relu', kernel_regularizer='l2'),
        Dense(10, activation='softmax'),
    ]
)

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=1e-3),
              metrics=['accuracy'])

logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M")
tensorboard_callback = TensorBoard(log_dir=logdir)

training_history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    verbose=1,
    epochs=10,
    validation_data=(X_test, y_test),
    callbacks=[tensorboard_callback, es]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
