# Kaggle Competition - Digit Recognizer

Resources:
- [Digit Recognizer Competition](https://www.kaggle.com/competitions/digit-recognizer)
- [Introduction to CNN Keras - 0.997 (top 6%)](https://www.kaggle.com/code/yassineghouzam/introduction-to-cnn-keras-0-997-top-6)
- [Deep Neural Network Keras way](https://www.kaggle.com/code/lingjian/deep-neural-network-keras-way/notebook)


## Imports

In [None]:
# Libraries
import itertools
import pickle
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPool2D
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Paths
root = Path.cwd().parent
train_filename = root / "data/train.csv"
test_filename = root / "data/test.csv"
submission_filepath = root / "data/dr_submission.csv"
model_filename = root / 'app/model.pkl'

# Fix seed for reproducibility
random_seed = 2
np.random.seed(random_seed)

## Train and Test data

In [None]:
# Load data sets
train = pd.read_csv(train_filename)
test = pd.read_csv(test_filename)

# Organise data sets
X_train = train.iloc[:,1:].values.astype('float32')  # drop header row
y_train = train.iloc[:,0].values.astype('int32')  # keep labels only
X_test = test.values.astype('float32')

# Make space
del train, test

## Pre-process data

In [None]:
# Normalize data
X_train = X_train / 255
X_test = X_test / 255

# Reshape data 3 dimensions, i.e., (num_images, img_rows, img_cols, canal)
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

In [None]:
# Preview data set
for i in range(6, 9):
    plt.subplot(330 + (i+1))
    plt.imshow(X_train[i], cmap=plt.get_cmap('gray'))
    plt.title(y_train[i])
plt.show()

## One Hot encode labels

In [None]:
y_train = to_categorical(y_train, num_classes=10)
num_classes = y_train.shape[1]

In [None]:
# Preview ohe label set
plt.title(y_train[0])
plt.plot(y_train[0])
plt.xticks(range(num_classes))
plt.show()

## Split training and valdiation set

In [None]:
x_sub_train, x_sub_val, y_sub_train, y_sub_val = train_test_split(X_train, y_train, test_size=0.1, random_state=random_seed)

In [None]:
# Preview train data example
plt.imshow(x_sub_train[0][:,:,0], cmap=plt.get_cmap('gray'))
plt.show()

## Design neural network architecture

In [None]:
# Architechture is:
# In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

## Set the optimizer and annealer

In [None]:
optimizer = RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

learning_rate_reduction = ReduceLROnPlateau(
    monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

## Compile network

In [None]:
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

## Data augmentation

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=False,
        vertical_flip=False)

datagen.fit(x_sub_train)

## Fit the model

In [None]:
epochs = 1
batch_size = 86
steps_per_epoch = x_sub_train.shape[0] // batch_size

# With data augmentation - approx. 2min depending on machine
history = model.fit(
    datagen.flow(x_sub_train, y_sub_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(x_sub_val, y_sub_val),
    verbose=2,
    steps_per_epoch=steps_per_epoch,
    callbacks=[learning_rate_reduction])

# # Without data augmentation
# history = model.fit(x_sub_train, y_sub_train, batch_size=batch_size, epochs=epochs, validation_data=(x_sub_val, y_sub_val), verbose=2)

## Evaluate the model

In [None]:
# Plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', marker='+', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', marker='+', label="Validation loss", axes=ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', marker='+', label="Training accuracy")
ax[1].plot(history.history['val_accuracy'], color='r', marker='+', label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
# Look at confusion matrix
def plot_confusion_matrix(
    cm, classes, title='Confusion matrix', cmap=plt.cm.Blues):
    """This function prints and plots the confusion matrix.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Predict the values from the validation dataset
Y_pred = model.predict(x_sub_val)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(y_sub_val,axis = 1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(10)) 

In [None]:
# Display some error results 

# Errors are difference between predicted labels and true labels
errors = (Y_pred_classes - Y_true != 0)

Y_pred_classes_errors = Y_pred_classes[errors]
Y_pred_errors = Y_pred[errors]
Y_true_errors = Y_true[errors]
X_val_errors = x_sub_val[errors]

def display_errors(errors_index,img_errors,pred_errors, obs_errors):
    """This function shows 6 images with their predicted and real labels.
    """
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error],obs_errors[error]))
            n += 1

# Probabilities of the wrong predicted numbers
Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)

# Predicted probabilities of the true values in the error set
true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors

# Sorted list of the delta prob errors
sorted_dela_errors = np.argsort(delta_pred_true_errors)

# Top 6 errors 
most_important_errors = sorted_dela_errors[-6:]

# Show the top 6 errors
display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors)

## Prediction

In [None]:
predictions = model.predict(X_test, verbose=0)
predictions = np.argmax(predictions, axis=1)

## Submission

In [None]:
submission = pd.DataFrame({"ImageId": list(range(1, len(predictions)+1)), "Label": predictions})
submission.to_csv(submission_filepath, index=False, header=True)

## Save model to disk

In [None]:
pickle.dump(model, open(model_filename, 'wb'))