<a href="https://colab.research.google.com/github/AlbertFlorinus/mnistcnn/blob/master/notebook/trainingcode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
from scipy.ndimage import rotate
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam, Adadelta
from keras.losses import categorical_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import LearningRateScheduler, CSVLogger
from cv2 import cv2

In [10]:
# gathering the mnist-dataset, train is used for training,
# test is used to predict images previously unseen,
# we do this to ensure overfitting has not occurred,
# y are labels to X which are the images

(X_traine, Y_train), (X_teste, Y_test) = mnist.load_data()

In [11]:
#upscaling the mnist dataset from 28x28 pixel images to 112x112
#this is a bandaid fix but helps with preprocessing real images outside mnist
X_train = np.empty((60000,112,112))
X_test = np.empty((10000,112,112))
for i in range(60000):
  imgs = X_traine[i]
  enlargeder = cv2.resize(imgs, (112, 112), interpolation=cv2.INTER_AREA)
  X_train[i,:,:] = enlargeder

for i in range(10000):
  imgs = X_teste[i]
  enlargeder = cv2.resize(imgs, (112, 112), interpolation=cv2.INTER_AREA)
  X_test[i,:,:] = enlargeder

In [12]:
# reshaping for keras compatibility
X_train = X_train.reshape(X_train.shape[0], 112, 112, 1)
X_test = X_test.reshape(X_test.shape[0], 112, 112, 1)


In [13]:
# changing datatype from uint8 to float32,
# this is to allow for normalizising the pixel value,
# between 0 and 1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normalizing pixels
X_train/=255
X_test/=255


In [14]:
# one-hot-encoding the outputs, or in simpler terms,
# storing the outputs as a vector of 1x10
number_of_classes = 10
Y_train = np_utils.to_categorical(Y_train, number_of_classes)
Y_test = np_utils.to_categorical(Y_test, number_of_classes)


In [15]:
print(Y_train.shape, Y_test.shape)

(60000, 10) (10000, 10)


In [16]:
print(X_train.shape, X_test.shape)

(60000, 112, 112, 1) (10000, 112, 112, 1)


In [17]:
model = Sequential()

In [18]:
# each convolutional/conv layer distorts the input arrays
# each model.add creates a new layer in the network
# adding a spatial convolutional/conv layer and 
# declaring input shape required for the img array
# input shape value only needed for first conv
# 32 for the amount of filters and thus also feature maps outputed,
# (3,3) for filter size, default stride is 1

model.add(Conv2D(16, (20,20), strides = 2, activation="relu", input_shape=(112,112,1), padding="same"))
model.add(Conv2D(16, (10,10), strides = 2, activation="relu", padding="same"))

model.add(Conv2D(32, (3,3), activation="relu"))
# adding a batchnormalization layer to reduce a batchs covariant shift,
# normalizing the images to execute more effectively,
model.add(BatchNormalization())

model.add(Conv2D(32, (3,3), activation="relu"))
model.add(BatchNormalization())

# adding conv layer with 5x5 filter and a stride of 2 instead of max pooling,
# downsampling image but retaining import data for classification.
model.add(Conv2D(32, (5,5), strides=2, padding="same",activation="relu"))
model.add(BatchNormalization())

# using dropout with a rate 0f 0.4, this randomly "drops",
# 40% of the nodes to a output value of 0 each iteration, which helps prevent overfitting
model.add(Dropout(0.4))

# raise amount from 32 to 64
model.add(Conv2D(64, (3,3), activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), activation="relu"))
model.add(BatchNormalization())


model.add(Conv2D(64, (5,5), strides=2, padding="same", activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.4))

# flattening the input to a 1d array,
# flattening the pixel data of 64 4x4 arrays
# to a 1d array containing 1024 pixel values,
# not 1024 pixels of the original image but of the,
# outputs from the convolutional neural network
# this ends the spatial/convolutional part of the network
model.add(Flatten())

# adding a fully connected layer of 128 neurons
model.add(Dense(128, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.4))

# Final layer of 10 neurons with a softmax activation,
# this outputs a prediction (number with highest activation value)
model.add(Dense(number_of_classes, activation="softmax"))


In [19]:
# Declaring loss function and optimizer,
# Adam is an enhancement of SGD.
# Accuracy metric for us to get results for evaluating the model 
model.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["accuracy"])

In [20]:
# Number of images to iterate simultaneously before each weight update
batchsize = 64

In [21]:
X_val = X_test[9000:]
Y_val = Y_test[9000:]

In [22]:
X_test = X_test[:9000]
Y_test = Y_test[:9000]


In [23]:
print(X_test.shape, Y_test.shape)
print(X_val.shape, Y_val.shape)

(9000, 112, 112, 1) (9000, 10)
(1000, 112, 112, 1) (1000, 10)


In [24]:
# Augmentning training data for better generalisation,
# and prevent overfitting
gen = ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.15)
train_generator = gen.flow(X_train, Y_train, batch_size=batchsize)

In [25]:
testing_generator = gen.flow(X_test, Y_test, batch_size=batchsize)

In [26]:
# Reducing learning rate to 95% of the last epoch,
# speeding up convergence by keeping weight updates smaller as the model,
# approaches convergence.
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)

In [27]:
# Log file for tracking information about the learning process and its metrics
csv_logger = CSVLogger("training_test.log", append=True, separator=";")

In [28]:
# starting training, validation_data is mnist data not trained on,
# to ensure us we arent overfitting to the training set but actually generalising
history = model.fit(train_generator,steps_per_epoch=X_train.shape[0]//batchsize, epochs=10, 
                  validation_data=testing_generator, callbacks=[annealer, csv_logger], verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [29]:
model.save("ALnet-3.0.h5")

In [30]:
score = model.evaluate(x=X_val, y=Y_val, verbose=1)
print("Test loss: ", score[0])
print("Test accuracy: ", score[1])

Test loss:  0.04459251090884209
Test accuracy:  0.9919999837875366


In [31]:
# printing Alnet-3.0 structure 
for layer in model.layers:
  print(layer.output_shape)

(None, 56, 56, 16)
(None, 28, 28, 16)
(None, 26, 26, 32)
(None, 26, 26, 32)
(None, 24, 24, 32)
(None, 24, 24, 32)
(None, 12, 12, 32)
(None, 12, 12, 32)
(None, 12, 12, 32)
(None, 10, 10, 64)
(None, 10, 10, 64)
(None, 8, 8, 64)
(None, 8, 8, 64)
(None, 4, 4, 64)
(None, 4, 4, 64)
(None, 4, 4, 64)
(None, 1024)
(None, 128)
(None, 128)
(None, 128)
(None, 10)
