In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
import tensorflow as tf

# **1. Data preparation**

## **1-1 Load data**
**We plus mnist data to increase our training set**

In [None]:
train = pd.read_csv("database/train.csv")
test = pd.read_csv("database/test.csv")
(x_train1, y_train1), (x_test1, y_test1) = mnist.load_data()

**Delete the column of label and connect kaggle's data with mnist data**

In [None]:
train1 = np.concatenate([x_train1, x_test1], axis=0)
y_train1 = np.concatenate([y_train1, y_test1], axis=0)

Y_train1 = y_train1
X_train1 = train1.reshape(-1, 28*28)

Y_train = train["label"]
X_train = train.drop(labels = ["label"], axis = 1) 


## **1-2 Normalization**

**We perform a grayscale normalization to reduce the effect of illumination's differences.Moreover the CNN converg faster on [0..1] data than on [0..255].**

In [None]:

X_train = X_train / 255.0
test = test / 255.0
X_train1 = X_train1 / 255.0

test = test.to_numpy()

X_train = np.concatenate((X_train.values, X_train1))
Y_train = np.concatenate((Y_train, Y_train1))



**convert label to one-hot encod**

In [None]:
Y_train = to_categorical(Y_train, num_classes = 10) 

## **1-3 Reshape**

**convert form 1D to 3D**

In [None]:
X_train = X_train.reshape(-1,28,28,1)   
test = test.reshape(-1,28,28,1)

# **2. Model**

We choose to set 64 filters for the two firsts conv2D layers and 32 filters for the two last ones. Each filter transforms a part of the image (defined by the kernel size) using the kernel filter. The kernel filter matrix is applied on the whole image. Filters can be seen as a transformation of the image.

The CNN can isolate features that are useful everywhere from these transformed images (feature maps).

The second important layer in CNN is the pooling (MaxPool2D) layer. This layer simply acts as a downsampling filter. It looks at the 2 neighboring pixels and picks the maximal value. 

Dropout is a regularization method, where a proportion of nodes in the layer are randomly ignored (setting their wieghts to zero) for each training sample. 

'relu' is the rectifier (activation function max(0,x). The rectifier activation function is used to add non linearity to the network.

The Flatten layer is use to convert the final feature maps into a one single 1D vector. 

In [None]:
model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (5,5), activation ='relu', input_shape = (28,28,1)))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (5,5), activation ='relu'))
model.add(BatchNormalization())


model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters = 32, kernel_size = (3,3), activation ='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 32, kernel_size = (3,3), activation ='relu'))
model.add(BatchNormalization())

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(BatchNormalization())

model.add(Dense(10, activation = "softmax"))

model.summary()

In [None]:
model.compile(optimizer = 'adam' , loss = "categorical_crossentropy", metrics=["accuracy"])

**Use ReduceLROnPlateau() to dynamically reduce our learning rate**

In [None]:

reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.3, verbose=1, patience=2, min_lr=0.00000001)

epochs = 30
batch_size = 128

history = model.fit(X_train,Y_train,epochs=epochs,batch_size=batch_size,verbose=2,validation_split=0.2,callbacks=[reduce_lr])

# **3. Show accuracy and loss**

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
results = model.predict(test)
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("submission.csv",index=False)

In [None]:
"submission.csv"