# **Imports**
First of all, we import all the libraries and functions that we will use throughout the notebook.

In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.utils import to_categorical

ModuleNotFoundError: No module named 'pandas'

# **Data loading**

In [None]:
test = np.genfromtxt('data/train.csv', delimiter = ',', skip_header = 1)
print("Total samples:", test.shape)
X = test[:, 1:]
Y = test[:, 0]
n = X.shape[0]
m = X.shape[1]

print (n)
print (m)

# **Data display**

Let's see some randomly selected images from the dataset.

In [None]:
index = np.random.randint(0, n, 50)
plt.figure(figsize = (20, 10))
for i, idx in enumerate(index):
    plt.subplot(5, 10, i+1)
    plt.imshow(np.reshape(X[idx,:], ((28, 28))))
plt.show()

We show the number of examples of each label. As you can see the training data is balanced.

In [None]:
x_labels = sorted(list(set(Y)))
y_labels = [Y.tolist().count(i) for i in x_labels]

plt.figure(figsize = (15,5))
plt.ylabel('Number of labels')
plt.xlabel('Labels')
plt.xticks([i for i in range(len(x_labels))]) 
plt.bar(x_labels, y_labels)
plt.show()

# **Data preprocessing**

In order to train the model it is necessary to normalize the test data.

In [None]:
X = X / 255

# **Division of data into train and test**

Since we are facing a classification problem with multiple classes, we will transform the output $Y$ class following a one-hot encoding.

In [None]:
Y = to_categorical(Y)

Let's show some random example to check that everything is correct.

In [None]:
sample = np.random.choice(X.shape[0])
plt.imshow(np.reshape(X[sample, :], ((28, 28))))
print("Output associated to the label: ", Y[sample])

We will use $80\%$ of the data for training and the remaining $20\%$ for testing. To choose them randomly we will use a random shuffle.

In [None]:
random.seed(1)
indexs = np.arange(n)
random.shuffle (indexs)
position = int(n * 0.8)

X_train = X[indexs[:position]]
Y_train = Y[indexs[:position]]

X_test  = X[indexs[position:]]
Y_test  = Y[indexs[position:]]

print("Number of examples for training: ", X_train.shape[0])
print("Number of examples for testing: ", X_test.shape[0])

# **Model training**

In [None]:
inputs = Input(shape = (28, 28, 1))
conv1 = Conv2D(32, kernel_size  = (3, 3), activation = 'relu')(inputs)
conv2 = Conv2D(64, kernel_size  = (3, 3), activation = 'relu')(conv1)
pool1 = MaxPooling2D(pool_size  = (2, 2))(conv2)
conv3 = Conv2D(128, kernel_size = (3, 3), activation = 'relu')(pool1)
pool2 = MaxPooling2D(pool_size  = (2, 2))(conv3)
x = Dropout(0.25)(pool2)
flat = Flatten()(x)