# Image Classification - MNIST Dataset
Solve the same problem as MLP\3_ImageClassification\example_MNIST.py but using a CNN.

New Code
- Reshaping to input into a Conv Layer.
- Convolutional and Pooling Layers. Flatten operation.

# Keras

In [1]:
# %% --------------------------------------- Imports -------------------------------------------------------------------
import os
import random
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Conv2D, Flatten, MaxPooling2D, AveragePooling2D
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.utils import to_categorical
from keras import backend as K
import keras
# %% --------------------------------------- Set-Up --------------------------------------------------------------------
# SEED = 42
# os.environ['PYTHONHASHSEED'] = str(SEED)
# random.seed(SEED)
# np.random.seed(SEED)
# tf.random.set_seed(SEED)

# %% ----------------------------------- Hyper Parameters --------------------------------------------------------------
LR = 1e-3
N_EPOCHS = 20
BATCH_SIZE = 512
DROPOUT = 0.5
num_classes = 10
img_rows, img_cols = 28, 28
# %% -------------------------------------- Data Prep ------------------------------------------------------------------
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Reshapes to (n_examples, n_channels, height_pixels, width_pixels)
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
# x_train, x_test = x_train.reshape(len(x_train), 1, 28, 28), x_test.reshape(len(x_test), 1, 28, 28)
# y_train, y_test = to_categorical(y_train, num_classes=10), to_categorical(y_test, num_classes=10)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# %% -------------------------------------- Training Prep ----------------------------------------------------------
model = Sequential([
    Conv2D(16, (3,3),  activation="relu"),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Conv2D(32, (3,3), activation="relu"),
    #The Batch Normalization normalizes the outputs from the hidden activation functions. 
    # This helps with neuron imbalance and can speed training significantly.
    BatchNormalization(),
    AveragePooling2D((2,2)),
    Flatten(),
    Dense(400, activation="tanh"),
    Dropout(DROPOUT),
    BatchNormalization(),
    Dense(10, activation="softmax")
])
model.compile(optimizer=Adam(lr=LR), loss="categorical_crossentropy", metrics=["accuracy"])

# %% -------------------------------------- Training Loop ----------------------------------------------------------
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=N_EPOCHS, validation_data=(x_test, y_test))

# %% ------------------------------------------ Final test -------------------------------------------------------------
print("Final accuracy on validations set:", 100*model.evaluate(x_test, y_test)[1], "%")

Using TensorFlow backend.


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Final accuracy on validations set: 98.83000254631042 %


# TensorFlow

In [2]:
# %% --------------------------------------- Imports -------------------------------------------------------------------
import tensorflow as tf


# %% --------------------------------------- Set-Up --------------------------------------------------------------------
tf.random.set_seed(42)
np.random.seed(42)

# %% ----------------------------------- Hyper Parameters --------------------------------------------------------------
LR = 5e-2
N_EPOCHS = 20
BATCH_SIZE = 512
DROPOUT = 0.5


# %% -------------------------------------- CNN Class ------------------------------------------------------------------
class CNN(tf.keras.Model):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(16, 3)  # output (n_examples, 26, 26, 16)
        self.convnorm1 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D(2)  # output (n_examples, 13, 13, 16)
        self.conv2 = tf.keras.layers.Conv2D(32, 3)  # output (n_examples, 11, 11, 32)
        self.convnorm2 = tf.keras.layers.BatchNormalization()
        self.pool2 = tf.keras.layers.AveragePooling2D(2)  # output (n_examples, 5, 5, 32)
        self.flatten = tf.keras.layers.Flatten()  # input will be flattened to (n_examples, 32 * 5 * 5)
        self.linear1 = tf.keras.layers.Dense(400)
        self.linear1_bn = tf.keras.layers.BatchNormalization()
        self.linear2 = tf.keras.layers.Dense(10)
        self.act = tf.nn.relu
        self.drop = DROPOUT
        self.training = True

    def call(self, x):
        x = self.pool1(self.convnorm1(self.act(self.conv1(x)), training=self.training))
        x = self.flatten(self.pool2(self.convnorm2(self.act(self.conv2(x)), training=self.training)))
        x = tf.nn.dropout(self.linear1_bn(self.act(self.linear1(x)), training=self.training), self.drop)
        return self.linear2(x)


# %% -------------------------------------- Data Prep ------------------------------------------------------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Reshapes to (n_examples, height, width, n_channels)
x_train, x_test = tf.reshape(x_train, (len(x_train), 28, 28, 1), ), tf.reshape(x_test, (len(x_test), 28, 28, 1))
x_train, x_test = tf.dtypes.cast(x_train, tf.float32), tf.dtypes.cast(x_test, tf.float32)
y_train, y_test = tf.convert_to_tensor(y_train), tf.convert_to_tensor(y_test)

# %% -------------------------------------- Training Prep ----------------------------------------------------------
model = CNN()
optimizer = tf.keras.optimizers.Adam(learning_rate=LR)
criterion = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
@tf.function
def train(x, y):
    model.training = True
    model.drop = DROPOUT
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = criterion(y, logits)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(y, logits)

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
@tf.function
def eval(x, y):
    model.training = False
    model.drop = 0
    logits = model(x)
    loss = criterion(y, logits)
    test_loss(loss)
    test_accuracy(y, logits)


# %% -------------------------------------- Training Loop ----------------------------------------------------------
print("Starting training loop...")
for epoch in range(N_EPOCHS):

    for batch in range(len(x_train)//BATCH_SIZE + 1):
        inds = slice(batch*BATCH_SIZE, (batch+1)*BATCH_SIZE)
        train(x_train[inds], y_train[inds])

    eval(x_test, y_test)

    print("Epoch {} | Train Loss {:.5f}, Train Acc {:.2f} - Test Loss {:.5f}, Test Acc {:.2f}".format(
        epoch, train_loss.result(), train_accuracy.result()*100, test_loss.result(), test_accuracy.result()*100))
    train_loss.reset_states(); train_accuracy.reset_states(); test_loss.reset_states(); test_accuracy.reset_states()

Starting training loop...
Epoch 0 | Train Loss 0.31078, Train Acc 92.58 - Test Loss 0.64011, Test Acc 89.90
Epoch 1 | Train Loss 0.08680, Train Acc 97.53 - Test Loss 0.10478, Test Acc 96.94
Epoch 2 | Train Loss 0.06868, Train Acc 98.03 - Test Loss 0.04896, Test Acc 98.59
Epoch 3 | Train Loss 0.06568, Train Acc 98.16 - Test Loss 0.10479, Test Acc 97.21
Epoch 4 | Train Loss 0.06351, Train Acc 98.22 - Test Loss 1.27813, Test Acc 78.31
Epoch 5 | Train Loss 0.07397, Train Acc 97.97 - Test Loss 0.65000, Test Acc 89.57
Epoch 6 | Train Loss 0.07231, Train Acc 98.10 - Test Loss 0.09979, Test Acc 98.23
Epoch 7 | Train Loss 0.06954, Train Acc 98.31 - Test Loss 0.07762, Test Acc 98.51
Epoch 8 | Train Loss 0.10667, Train Acc 97.53 - Test Loss 0.56933, Test Acc 91.55
Epoch 9 | Train Loss 0.08778, Train Acc 97.96 - Test Loss 0.06872, Test Acc 98.44
Epoch 10 | Train Loss 0.06481, Train Acc 98.42 - Test Loss 0.11595, Test Acc 97.56
Epoch 11 | Train Loss 0.07399, Train Acc 98.26 - Test Loss 0.08427, Tes

# PyTorch

In [3]:
# %% --------------------------------------- Imports -------------------------------------------------------------------
import torch
import torch.nn as nn
from torchvision import datasets
from sklearn.metrics import accuracy_score


# %% --------------------------------------- Set-Up --------------------------------------------------------------------
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# %% ----------------------------------- Hyper Parameters --------------------------------------------------------------
LR = 5e-2
N_EPOCHS = 20
BATCH_SIZE = 512
DROPOUT = 0.5


# %% ----------------------------------- Helper Functions --------------------------------------------------------------
def acc(x, y, return_labels=False):
    with torch.no_grad():
        logits = model(x)
        pred_labels = np.argmax(logits.cpu().numpy(), axis=1)
    if return_labels:
        return pred_labels
    else:
        return 100*accuracy_score(y.cpu().numpy(), pred_labels)


# %% -------------------------------------- CNN Class ------------------------------------------------------------------
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (3, 3))  # output (n_examples, 16, 26, 26)
        self.convnorm1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d((2, 2))  # output (n_examples, 16, 13, 13)
        self.conv2 = nn.Conv2d(16, 32, (3, 3))  # output (n_examples, 32, 11, 11)
        self.convnorm2 = nn.BatchNorm2d(32)
        self.pool2 = nn.AvgPool2d((2, 2))  # output (n_examples, 32, 5, 5)
        self.linear1 = nn.Linear(32*5*5, 400)  # input will be flattened to (n_examples, 32 * 5 * 5)
        self.linear1_bn = nn.BatchNorm1d(400)
        self.drop = nn.Dropout(DROPOUT)
        self.linear2 = nn.Linear(400, 10)
        self.act = torch.relu

    def forward(self, x):
        x = self.pool1(self.convnorm1(self.act(self.conv1(x))))
        x = self.pool2(self.convnorm2(self.act(self.conv2(x))))
        x = self.drop(self.linear1_bn(self.act(self.linear1(x.view(len(x), -1)))))
        return self.linear2(x)


# %% -------------------------------------- Data Prep ------------------------------------------------------------------
data_train = datasets.MNIST(root='.', train=True, download=True)
# Reshapes to (n_examples, n_channels, height_pixels, width_pixels)
x_train, y_train = data_train.data.view(len(data_train), 1, 28, 28).float().to(device), data_train.targets.to(device)
x_train.requires_grad = True
data_test = datasets.MNIST(root='.', train=False, download=True)
x_test, y_test = data_test.data.view(len(data_test), 1, 28, 28).float().to(device), data_test.targets.to(device)

# %% -------------------------------------- Training Prep ----------------------------------------------------------
model = CNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

# %% -------------------------------------- Training Loop ----------------------------------------------------------
print("Starting training loop...")
for epoch in range(N_EPOCHS):

    loss_train = 0
    model.train()
    for batch in range(len(x_train)//BATCH_SIZE + 1):
        inds = slice(batch*BATCH_SIZE, (batch+1)*BATCH_SIZE)
        optimizer.zero_grad()
        logits = model(x_train[inds])
        loss = criterion(logits, y_train[inds])
        loss.backward()
        optimizer.step()
        loss_train += loss.item()

    model.eval()
    with torch.no_grad():
        y_test_pred = model(x_test)
        loss = criterion(y_test_pred, y_test)
        loss_test = loss.item()

    print("Epoch {} | Train Loss {:.5f}, Train Acc {:.2f} - Test Loss {:.5f}, Test Acc {:.2f}".format(
        epoch, loss_train/BATCH_SIZE, acc(x_train, y_train), loss_test, acc(x_test, y_test)))

Starting training loop...
Epoch 0 | Train Loss 0.06107, Train Acc 97.23 - Test Loss 0.08982, Test Acc 97.41
Epoch 1 | Train Loss 0.02204, Train Acc 98.22 - Test Loss 0.06169, Test Acc 98.16
Epoch 2 | Train Loss 0.01656, Train Acc 98.68 - Test Loss 0.05040, Test Acc 98.45
Epoch 3 | Train Loss 0.01354, Train Acc 98.89 - Test Loss 0.04367, Test Acc 98.62
Epoch 4 | Train Loss 0.01199, Train Acc 99.05 - Test Loss 0.03996, Test Acc 98.68
Epoch 5 | Train Loss 0.01040, Train Acc 99.15 - Test Loss 0.03718, Test Acc 98.79
Epoch 6 | Train Loss 0.00926, Train Acc 99.30 - Test Loss 0.03570, Test Acc 98.86
Epoch 7 | Train Loss 0.00830, Train Acc 99.40 - Test Loss 0.03363, Test Acc 99.03
Epoch 8 | Train Loss 0.00764, Train Acc 99.48 - Test Loss 0.03258, Test Acc 99.04
Epoch 9 | Train Loss 0.00698, Train Acc 99.49 - Test Loss 0.03209, Test Acc 99.08
Epoch 10 | Train Loss 0.00665, Train Acc 99.56 - Test Loss 0.03093, Test Acc 99.01
Epoch 11 | Train Loss 0.00594, Train Acc 99.60 - Test Loss 0.03072, Tes