In [1]:
# Date    : 19/02/2023
# Author  : Sivuyile Sifuba
# Email   : sivuyilesifuba@gmail.com

# References
# [1] Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. 3rd International Conference on Learning Representations (ICLR 2015), 1–14.


In [2]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense 
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import set_random_seed, to_categorical

In [3]:
# load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
# from sparse label to categorical
num_labels = len(np.unique(y_train))
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# reshape input images
image_size = x_train.shape[1]
x_train = np.reshape(x_train,[-1, image_size, image_size, 1])
x_test = np.reshape(x_test,[-1, image_size, image_size, 1])

In [5]:
# This ensures that the model is deterministic
set_random_seed(seed=6)

# The model implementations is as described in the paper Very Deep Convolutional Networks for Large-Scale Image Recognition. The Model is a 16 weight layer 
# architecture also known as VGG-16. We have chosen to implement the 16 weight layer architecture that includes the 1x1 convolution layer in order to 
# enhance the non-linear characteristics of the decision function [1]. The network will be used to classify MNIST digits. In contrast to the original network
# we have opted for using the ADAM optimizer instead of SGD.

# VGG-16
model_input = Input(shape=(image_size, image_size, 1))
x = Conv2D(filters=64, kernel_size=(3,3), strides=(1, 1), activation='relu')(model_input)
x = Conv2D(filters=64, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)

x = Conv2D(filters=128, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=128, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)

x = Conv2D(filters=256, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=256, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=256, kernel_size=(1,1), strides=(1, 1), padding='same', activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)

x = Conv2D(filters=512, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=512, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=512, kernel_size=(1,1), strides=(1, 1), padding='same', activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2), strides=2)(x)

x = Conv2D(filters=512, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=512, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters=512, kernel_size=(1,1), strides=(1, 1), padding='same', activation='relu')(x)
#x = MaxPool2D(pool_size=(2, 2), strides=2)(x)

x = Flatten()(x)

x = Dense(units=128, activation='relu')(x)
x = Dense(units=128, activation='relu')(x)
model_output  = Dense(units=10, activation='softmax')(x)

# Create a Keras Model
VGG16 = Model(inputs=model_input , outputs=model_output)
VGG16.summary()
VGG16.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Train the model with input images and labels
VGG16.fit(x=x_train,
          y=y_train,
          validation_data=(x_test, y_test),
          epochs=20,
          batch_size=32)

# model accuracy on test dataset
score = VGG16.evaluate(x_test,
                       y_test,
                       batch_size=32,
                       verbose=0)
print("\nTest accuracy: %.1f%%" % (100.0 * score[1]))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 64)        640       
                                                                 
 conv2d_1 (Conv2D)           (None, 26, 26, 64)        36928     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 64)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 13, 13, 128)       73856     
                                                                 
 conv2d_3 (Conv2D)           (None, 13, 13, 128)       147584    
                                                             