# **Imports**
First of all, we import all the libraries and functions that we will use throughout the notebook.

In [39]:
import os
import csv
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow

from tensorflow import keras 
from keras import models
from keras import applications
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.models import *
from keras import backend as K
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# **Data loading**

We read the training data, test data and sample submissions.

In [40]:
train  = pd.read_csv('../input/digit-recognizer/train.csv')
test   = pd.read_csv('../input/digit-recognizer/test.csv')
sample = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

X = np.array(train.iloc[:, 1:])
Y = np.array(train.iloc[:, 0])
n = X.shape[0]
m = X.shape[1]

print("Number of training samples: ", n)
print ("Number of features: ", m)

# **Data display**

Let's see some randomly selected images from the dataset.

In [41]:
index = np.random.randint(0, n, 50)
plt.figure(figsize = (20, 10))
for i, idx in enumerate(index):
    plt.subplot(5, 10, i+1)
    plt.imshow(np.resize(X[idx], (28, 28, 1)))
print("Random examples".center(150))   
plt.show()

We show the number of examples of each label. As you can see the training data is balanced.

In [42]:
x_labels = sorted(list(set(Y)))
y_labels = [Y.tolist().count(i) for i in x_labels]

plt.figure(figsize = (15,5))
plt.ylabel('Number of labels',  fontsize=20)
plt.xlabel('Labels',  fontsize=20)
plt.xticks([i for i in range(len(x_labels))]) 
plt.bar(x_labels, y_labels)
plt.show()

# **Data preprocessing**

In order to train a neural network we perform the following normalizations and transformations:

* Resize the images to $28 \times 28 \times 1$, which will be the input of our neural network.
* Transform the output $Y$ class following a one-hot encoding, becouse we are facing a classification problem with multiple classes.
* Normalize the training data dividing by the maximun value of the features ($255$).

## Resize the images

In [43]:
print('data dimensions before resizing data: ', X.shape, '\n')

X = np.reshape(X, (X.shape[0], 28, 28, 1))

print('data dimensions after resiaing data: ', X.shape)

## One-hot encoding

In [44]:
print('Example output before one-hot encoding: ', Y[0], '\n')

Y = to_categorical(Y)

print('Example output after one-hot encoding: ', Y[0])

## Normalize the training data

In [45]:
print('Minimum value of the features before normalization: ', np.min(X))
print('Maximum value of features before normalization: ', np.max(X), '\n')

X = X / 255.0

print('Minimum value of the features before normalization: ', np.min(X))
print('Maximum value of features before normalization: ', np.max(X))

# Division of data into train and test

We will split the train set into two new sets $p$ and $v$ . Basically, we will use the train set $p$ to train the model and the validation set $v$ to estimate the hyperparameters (number of iterations, etc). The train set was divided into $80\%$ for p and $20\%$ for $v$.

In [46]:
indexs = np.arange(n)
posicion = int(n * 0.8)

X_train_p = X[indexs[:posicion]]
X_train_v = X[indexs[posicion:]] 
Y_train_p = Y[indexs[:posicion]]
Y_train_v = Y[indexs[posicion:]]

print(X_train_v.shape)
print("Number of examples of X_train_p: ", X_train_p.shape[0])
print("Number of examples of X_train_v: ", X_train_v.shape[0])

# **Model training**

In [47]:
inputs = Input(shape = (28, 28, 1))
model = Conv2D(32, kernel_size  = (3, 3), activation = 'relu')(inputs)
model = Conv2D(64, kernel_size  = (3, 3), activation = 'relu')(model)
model = MaxPooling2D(pool_size  = (2, 2))(model)
model = Conv2D(128, kernel_size = (3, 3), activation = 'relu')(model)
model = Conv2D(256, kernel_size = (3, 3), activation = 'relu')(model)
model = MaxPooling2D(pool_size  = (2, 2))(model)
model = Conv2D(512, kernel_size = (3, 3), activation = 'relu')(model)
model = Dropout(0.25)(model)
model = Flatten()(model)

model = Dense(512, activation = 'relu')(model)
model = Dense(256, activation = 'relu')(model)
model = Dropout(0.25)(model)
model = Dense(10, activation   = 'softmax')(model)

To mitigate overtraining we apply early stopping.

In [48]:
model = Model(inputs = inputs, outputs = model)
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='model.h5',
        monitor='val_loss', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_loss', patience=10,verbose=1)
]

model.summary()

# **Entrenamiento del modelo**

We start the training of the network. We specify the training sets and the number of iterations. We will save all the information in the history variable $h$ so that later we can plot the obtained result.

In [49]:
h = model.fit(X_train_p, Y_train_p, batch_size = 128, epochs = 100, validation_data = (X_train_v, Y_train_v),  callbacks=callbacks_list)

We show the history of the behavior for the cost function in the training and validation sets.

In [50]:
plt.figure(figsize = (12,6))

iterations = len(h.history['val_loss']) + 1

plt.plot(range(1, iterations), h.history['loss'], 'g', label = 'train')
plt.plot(range(1, iterations), h.history['val_loss'], 'r', label = 'validation')
plt.legend(['train', 'val'], loc ='upper left')
plt.xticks([i for i in range(1, iterations)]) 
plt.show()

Then we classify the test examples.

In [51]:
for i in range (sample.shape[0]):
    x = np.reshape(np.array(test.iloc[i, :]), ((28, 28, 1)))
    x = np.array([x / 255])
    sample.iloc[i,1] = model.predict(x).argmax(axis = 1)

Finally, we save the classified examples

In [52]:
sample.to_csv('./submission.csv', index = False)