## Load libraries and modules

In [None]:
# LOAD LIBRARIES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical                                               
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator                                      
from keras.callbacks import ReduceLROnPlateau

## Reading the data 

In [None]:
train_filepath = '/kaggle/input/digit-recognizer/train.csv' 
test_filepath = '/kaggle/input/digit-recognizer/test.csv'
train = pd.read_csv(train_filepath)
test = pd.read_csv(test_filepath)

In [None]:
print('Size of train data: ', train.shape)
print('Size of test data: ', test.shape)

In [None]:
print('Train Data')
display(train.head())
print('|'+'-'*140 + '|')
print('\n')

print('Test Data')
display(test.head())
print('\n')

> * We have 42000 train data, and 28000 test data
> * We have 784 pixels for each row, 28 by 28 pixels.  
> * We will use the data augmentation technique to augment the 42000 train data to many more. 
> * The test data will not be augmented, they will be just tested as they are. 

## Data Preparation
> * Separate y_train from train data.
> * Scale X_train and X_test. 
> * reshape X's as 2D data.
> * one-hot encode the y_train. 

In [None]:
# PREPARE DATA FOR NEURAL NETWORK
y_train = train["label"]
X_train = train.drop(labels = ["label"],axis = 1)
X_train = X_train / 255.0                                               
X_test = test / 255.0                                                   
X_train = X_train.values.reshape(-1,28,28,1)                             #(automatically_adjust_size, height, width, channels).
X_test = X_test.values.reshape(-1,28,28,1)                               # (automatically_adjust_size, height, width, channels).
y_train = to_categorical(y_train, num_classes = 10)

In [None]:
#Let's see how y_train looks like. 
y_train

## Data visualization 

In [None]:
# LET'S LOOK AT SOME OF THE IMAGES
print("30 Sample Handwritten Digits from training set" )
plt.figure(figsize=(30,9))
for i in range(30):
    plt.subplot(6, 10, (i+1)*2)       
    plt.imshow(X_train[i].reshape((28,28)),cmap=plt.cm.Blues)
plt.subplots_adjust(wspace=-0.1, hspace=-0.1)
plt.show()

### Data Augmentation 
* We have only 42000 train but 28000 test data, so we will augment the train data before using for the CNN model. 

In [None]:
# LET'S CREATE AN ImageDataGenerator INSTANCE TO AUGMENT THE TRAINING DATA. 
image_data_gen = ImageDataGenerator(
        rotation_range=15,  
        zoom_range = 0.10,  
        width_shift_range=0.15, 
        height_shift_range=0.15,)

### Visualizing Augmented Digits.

In [None]:
def plot_augmentations(number, index):
    X_train_i = X_train[index,].reshape((1,28,28,1))
    y_train_i = y_train[index,].reshape((1,10))
    
    print(f'Sample {number} and augmented')
    plt.figure(figsize=(15,4.5))
    for i in range(10):  
        plt.subplot(2, 10, (i+1)*2)
        X_train_, y_train_ = image_data_gen.flow(X_train_i,y_train_i).next()          
        plt.imshow(X_train_[0].reshape((28,28)),cmap=plt.cm.Blues.reversed())  
        plt.axis('off')
    plt.subplots_adjust(wspace=-0.1, hspace=-0.1)
    plt.show()

In [None]:
#LET'S LOOK AT AUGMENTED DIGITS. 
plot_augmentations(2, index=16)
print('\n')
plot_augmentations(6, index=21)
print('\n')
plot_augmentations(4, index=3)

## Model
* The data has only 28 by 28 pixels, we can use custom models. 
* We will use ensemble method, **Hard voting classifier technique** to predict the digits.  
* Let's build 5 Neural Networks. 

In [None]:
# BUILD CONVOLUTIONAL NEURAL NETWORKS
cnn_nets = 5
model = [0] *cnn_nets                                        #Initializing a list with zeroes. 
for l in range(cnn_nets):
    model[l] = Sequential()

    model[l].add(Conv2D(32, kernel_size=3, input_shape=(28, 28, 1)))    #26*26
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Conv2D(32, kernel_size=3))                             #24*24
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Conv2D(32, kernel_size=5, strides=2, padding='same'))  #12*12
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Dropout(0.4))

    model[l].add(Conv2D(64, kernel_size = 3))                          #10*10
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Conv2D(64, kernel_size = 3))                          #8*8
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Conv2D(64, kernel_size = 5, strides=2, padding='same')) #4*4
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Dropout(0.4))
    
    model[l].add(Conv2D(128, kernel_size = 3, padding='same'))           #4*4
    model[l].add(BatchNormalization())
    model[l].add(Activation('relu'))
    model[l].add(Dropout(0.4))

    model[l].add(Conv2D(128, kernel_size = 4, activation='relu'))       #1*1
    model[l].add(BatchNormalization())
    model[l].add(Flatten())
    model[l].add(Dropout(0.4))
    model[l].add(Dense(10, activation='softmax'))

    # COMPILE WITH ADAM OPTIMIZER AND CROSS ENTROPY COST
    model[l].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
# DECREASE LEARNING RATE IF PERFORMANCE DOESN'T IMPROVE. 
lr_patience=1
factor=0.95
lr_reduction_on_plateau = ReduceLROnPlateau(monitor='accuracy',
                                            patience=lr_patience,
                                            factor=factor)
# TRAIN NETWORKS
history = [0] * cnn_nets
epochs = 40

for j in range(cnn_nets):
    X_train_, X_val_, y_train_, y_val_ = train_test_split(X_train, y_train, test_size = 0.1)
    history[j] = model[j].fit(image_data_gen.flow(X_train_,y_train_, batch_size=32),                        
        epochs = epochs, steps_per_epoch = X_train_.shape[0]//32,  
        validation_data = (X_val_,y_val_), callbacks=[lr_reduction_on_plateau], verbose=1)
    print('|'+'-' * 60+'|')
    print("CNN model no: {0:d}:\n Number of epochs={1:d}, Training accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
        j+1,epochs,max(history[j].history['accuracy']),max(history[j].history['val_accuracy']) ))
    print('|'+'-' * 60+'|')

## Make Predictions

In [None]:
# ENSEMBLE PREDICTIONS AND SUBMIT
preds = np.zeros( (X_test.shape[0],10) ) 
for j in range(cnn_nets):
    preds = preds + model[j].predict(X_test)
y_pred = np.argmax(preds,axis = 1)


## Preview Predictions

In [None]:
# PREVIEW PREDICTIONS
plt.figure(figsize=(15,10))
for i in range(40):  
    plt.subplot(8, 10, (i+1)*2)
    plt.imshow(X_test[i].reshape((28,28)),cmap=plt.cm.Blues)
    plt.title("Prediction: %d" %  y_pred[i], y=0.9) 
    plt.axis('off')
plt.subplots_adjust(wspace=0.0, hspace=0.5)
plt.show()