In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit

# Keras imports
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


### MNIST

### Read the data and format

In [2]:
# Read the csv file
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

train_df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
# Labels and reshape the images
labels = train_df['label']
# Reshape train
train_img = train_df.drop('label', 1).as_matrix()
train_img = train_img.reshape(train_img.shape[0], 28, 28, 1)
# Reshape test
test_img = test_df.as_matrix()
test_img = test_img.reshape(test_img.shape[0], 28, 28, 1)

In [4]:
# Functions to normalize and standartize the dataset
def normalize(x):
    return x/255

def standartize(x):
    mean = np.mean(x)
    std = np.std(x)
    return (x - mean)/std

# Apply the pre-processing
train_img = normalize(train_img)

### One hot encoding

In [5]:
labels = to_categorical(labels)
labels

array([[ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  1.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]])

### Time to go deep: Convolutional Neural Networks

### Build the model

In [8]:
# Keras model
# (conv + relu + BN + MP)-> (conv + relu + BN + MP) --> Regular MLP
cnn = Sequential()

# 1st conv block
cnn.add(Conv2D(32, kernel_size=(5, 5),
    activation='relu',
    input_shape=train_img.shape[1:]))
cnn.add(BatchNormalization())
cnn.add(MaxPooling2D(pool_size=(3, 3)))

# 2nd conv block
cnn.add(Conv2D(64, kernel_size=(5, 5),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(MaxPooling2D(pool_size=(2, 2)))

# regular mlp
cnn.add(Flatten())
cnn.add(Dropout(0.2))
cnn.add(Dense(256, activation='relu'))
cnn.add(Dropout(0.2))
cnn.add(Dense(512, activation='relu'))
cnn.add(Dropout(0.2))
cnn.add(Dense(10, activation='softmax'))

In [9]:
# Constants
batch_size = 64
epochs = 60

# Split the dataset into training and test
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.05)
for train_index, test_index in sss.split(train_img, labels):
    x_train, x_test = train_img[train_index], train_img[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

# Image Augmentation
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)
    
# Options for the model
print("Size of the CNN: %s" % cnn.count_params())

sgd = keras.optimizers.SGD(lr=0.05, momentum=0.9, decay=1e-6, nesterov=True)
cnn.compile(loss=keras.losses.categorical_crossentropy,
    optimizer=sgd,
    metrics=['accuracy'])

history = cnn.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),
    steps_per_epoch=x_train.shape[0] // batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(x_test, y_test))

score = cnn.evaluate(x_test, y_test, verbose=0)
    
# Print the confusion matrix of the model
pred_values = np.argmax(cnn.predict(x_test), axis = 1)
cm = confusion_matrix(np.argmax(y_test, axis = 1), pred_values)
print(cm)

# Print metrics
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Size of the CNN: 254986
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
[[200   0   0   0   0   0   0   0   0   0]
 [  0 257   0   0   0   0   0   0   0   0]
 [  0   0 191   0   0   0   0   0   0   0]
 [  0   0   0 216   0   0   0   1   0   1]
 [  0   1   0   0 193   0   0   0   0   0]
 [  0   0   0   1   0 176   0   0   2   1]
 [  0  

In [13]:
predictions = cnn.predict_classes(normalize(test_img))
submissions = pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)), "Label": predictions})
submissions.to_csv("submission.txt", index=False, header=True)

