## Import Libraries

In [None]:
import numpy as np
from numpy import mean
from numpy import std
from matplotlib import pyplot
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.optimizers import SGD,RMSprop
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
import cv2

from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("drive/My Drive/Colab Notebooks/ML/")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load MNIST dataset

- Loading data from keras library
- Reshaping dataset to have a single channel
- Converting categorical data to numerical data using one hot encoding

In [None]:
# Load dataset
(trainX, trainY), (testX, testY) = mnist.load_data()
 
# Reshape dataset
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))

# One hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)

### Pixels Scaling

**Normalization**: Pixel values are scaled to the range 0-1

Neural network models often cannot be trained on raw pixel values, such as pixel values in the range of 0 to 255.

The reason is that the network uses a weighted sum of inputs, and for the network to both be stable and train effectively, weights should be kept small.

Instead, the pixel values must be scaled prior to training.

Normalization is often the default approach as we can assume pixel values are always in the range 0-255, making the procedure very simple and efficient to implement.

In [None]:
# Convert from integers to floats
trainX = trainX.astype('float32')
testX = testX.astype('float32')

# Normalize to range 0-1
trainX = trainX / 255.0
testX = testX / 255.0

### CNN model definition

- #### Configuration
  - **1 2D Convolution Layer**: (3,3) is the dimensionality space of output, RELU is the activation function. HE initializer performs better than normal thats why is selected.
  - **1 Flatten Layer**: Flatten the data so they can be passed to dense layer (keeping 1 dimension)
  - **2 Dense Layers**: Dense layers are used when association can exist among any feature to any other feature in data point. Since between two layers of size n1 and n2, there can n1∗n2 connections and these are referred to as Dense. The first one contains a RELU activation function while the second is the softmax layer.

- #### Compilation
  - **Optimizer**: Gradient descent is a good one for general purposes ( Adam can be used as well )  
  - **Loss function**: Since we’re using a Softmax output layer, we’ll use the Cross-Entropy loss

In [None]:
def define_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(10, activation='softmax'))
 
    # model compilation
    opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0, centered=False,name="RMSprop")
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

## Data Augmentation

In order to avoid over-fitting problem, we need to expand artificially our handwritten digit dataset.
Data augmentation is a strategy that enables to significantly increase the diversity of data available for our training model. 

In [None]:
def adjust_gamma(image):
    img = np.power(image/float(np.max(image)), 1.5)

    return img

def my_preprocessing_func(img):
    img = adjust_gamma(img)

    image = np.array(img)
    return image / 255

datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=True,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=True,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False,    # randomly flip images
        preprocessing_function=my_preprocessing_func)  
datagen.fit(trainX)

## Dataset Expansion

Expand the original dataset with the augmented MNIST images.

In [None]:
counter = 0
batch_size=9
original_samples = trainX.shape[0]

for X_batch, y_batch in datagen.flow(trainX, trainY, batch_size):
    trainX = np.concatenate((trainX, X_batch), axis=0)
    trainY = np.concatenate((trainY, y_batch), axis=0)

    if counter == 1000:
        break

    counter += 1

    # # create a grid of 3x3 images
    # for i in range(0, 9):
        
    #     pyplot.subplot(330 + 1 + i)
    #     pyplot.imshow(X_batch[i].reshape(28, 28), cmap=pyplot.get_cmap('gray'))
    # pyplot.show()
    # break
    # ###########


### Model Evaluation

Model was finally fitted to the original dataset, merged with the augmented.
Validation was performed on the validation data.

The steps per epoch was calculated as train-length / batch-size, since this uses all of the data points, one batch size worth at a time.

When the metric had stopped improving the learning rate was reduced.

In [None]:
model = define_model()
history = model.fit(trainX, trainY, batch_size=32, validation_data=(testX, testY),steps_per_epoch=len(trainX) / 32, epochs=10, callbacks=[learning_rate_reduction], verbose=1)
_, acc = model.evaluate(testX, testY, verbose=0)
print('> %.3f' % (acc * 100.0))

model.save("./mymodel.h5")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
> 98.360
