In [86]:
import numpy as np
from keras import layers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model
import pandas as pd
import keras.backend as K


%matplotlib inline

First lets import the data and split on a 90-10 basis

In [10]:
from sklearn.model_selection import train_test_split

# Loading the data (Digits)
train = np.loadtxt('train.csv',skiprows = 1,delimiter = ',')

train, test = train_test_split(train,train_size = 0.90)



After extracting the labels and one-hot encoding them, I will convert the data to the necessary shape.

In [11]:
from sklearn.preprocessing import OneHotEncoder

X_train = train[:,1:].copy()
Y_train = train[:,0].reshape(train.shape[0],1).copy()
X_test = test[:,1:].copy()
Y_test = test[:,0].reshape(test.shape[0],1).copy()
X_train = (X_train/255.).reshape(X_train.shape[0],28,28,1).copy()
X_test = (X_test/255.).reshape(X_test.shape[0],28,28,1).copy()

oh = OneHotEncoder(sparse = False)
Y_train = oh.fit_transform(X=Y_train.reshape(X_train.shape[0],1))
Y_test = oh.fit_transform(X=Y_test.reshape(X_test.shape[0],1))
print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 37800
number of test examples = 4200
X_train shape: (37800, 28, 28, 1)
Y_train shape: (37800, 10)
X_test shape: (4200, 28, 28, 1)
Y_test shape: (4200, 10)


In [90]:
def DigitRecognizer(input_shape):
    """
    Implementation of the Digit Recognizer.
    
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """
    
    
    X_input = Input(input_shape)

    # Zero-Padding: pads the border of X_input with zeroes
    X = ZeroPadding2D((3, 3))(X_input)

    # Layer 1
    X = Conv2D(64, (5, 5), name = 'conv0', activation = 'relu')(X)
    X = Conv2D(64, (3, 3), name = 'conv1', activation = 'relu', padding = 'same')(X)
    X = BatchNormalization(axis = 3, name = 'bn0')(X)
    X = Dropout(rate = 0.4)(X)
    X = MaxPooling2D((2, 2), name='max_pool0')(X)
    
    # Layer 2
    X = Conv2D(32, (2, 2), name = 'conv2', activation = 'relu', padding = 'same')(X)
    X = BatchNormalization(axis = 3, name = 'bn1')(X)
    X = Dropout(rate = 0.3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2), name='max_pool1')(X)
    
    # Layer 3
    X = Conv2D(128, (2, 2), name = 'conv3', activation = 'relu', padding = 'same')(X)
    X = BatchNormalization(axis = 3, name = 'bn2')(X)
    X = Dropout(rate = 0.3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2), name='max_pool2')(X)
    
    
    X = Flatten()(X)
    
    X = Dense(1024, activation='relu', name='fc1')(X)
    X = Dropout(rate = 0.3)(X)
    
    X = Dense(10, activation='softmax', name='fc2')(X)

    model = Model(inputs = X_input, outputs = X, name='DigitRecognizer')
    
    
    return model

Let's create and view the structure of the model:

In [77]:
model = DigitRecognizer((28,28,1))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
zero_padding2d_14 (ZeroPaddi (None, 34, 34, 1)         0         
_________________________________________________________________
conv0 (Conv2D)               (None, 30, 30, 64)        1664      
_________________________________________________________________
conv1 (Conv2D)               (None, 30, 30, 64)        36928     
_________________________________________________________________
bn0 (BatchNormalization)     (None, 30, 30, 64)        256       
_________________________________________________________________
dropout_48 (Dropout)         (None, 30, 30, 64)        0         
_________________________________________________________________
max_pool0 (MaxPooling2D)     (None, 15, 15, 64)        0         
__________

The model incorporates a large number of parameters, so I added dropout to avoid overfitting. Lets train it for 8 epochs, evaluating the test set in each one to monitor both performances.

In [78]:
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])

In [80]:
for epoch in range(0,8):
    model.fit(x = X_train, y = Y_train, epochs = 1, batch_size = 512)
    preds_train = model.evaluate(x = X_train, y = Y_train)
    preds_test = model.evaluate(x = X_test, y = Y_test)
    
    print()
    print('Epoch ' + str(epoch + 1))
    print('--------------------------')
    print ("Train Accuracy = " + str(round(preds_train[1]*100,2))+'%')
    print ("Validation Accuracy = " + str(round(preds_test[1]*100,2))+'%')
    print('--------------------------')
    print()

Epoch 1/1

Epoch 1
--------------------------
Train Accuracy = 99.25%
Validation Accuracy = 98.76%
--------------------------

Epoch 1/1

Epoch 2
--------------------------
Train Accuracy = 99.59%
Validation Accuracy = 99.19%
--------------------------

Epoch 1/1

Epoch 3
--------------------------
Train Accuracy = 99.46%
Validation Accuracy = 99.05%
--------------------------

Epoch 1/1

Epoch 4
--------------------------
Train Accuracy = 99.67%
Validation Accuracy = 99.31%
--------------------------

Epoch 1/1

Epoch 5
--------------------------
Train Accuracy = 99.74%
Validation Accuracy = 99.12%
--------------------------

Epoch 1/1

Epoch 6
--------------------------
Train Accuracy = 99.75%
Validation Accuracy = 99.19%
--------------------------

Epoch 1/1

Epoch 7
--------------------------
Train Accuracy = 99.76%
Validation Accuracy = 99.26%
--------------------------

Epoch 1/1

Epoch 8
--------------------------
Train Accuracy = 99.7%
Validation Accuracy = 99.1%
--------------

Here I basically trained the model for 16 epochs since I ran the cell 2 times. Its obvious that the accuracy has hit the top so no point training further. But that can also mean that it is overfitting. Lets test by submitting it on kaggle.

In [81]:
final = np.loadtxt('test.csv',skiprows = 1,delimiter = ',')
final = final.reshape(final.shape[0],28,28,1)

In [82]:
X_final = (final/255.).reshape(final.shape[0],28,28,1).copy()
predictions = model.predict(X_final)
predictions = np.round(predictions)
predictions.shape

(28000, 10)

In [83]:
new =predictions@np.array([0,1,2,3,4,5,6,7,8,9]).reshape(10,1)
prd = pd.Series(new.reshape(X_final.shape[0],))
prd = prd.astype(int)

In [84]:
output = pd.concat([pd.Series(prd.index),pd.Series(prd.values)],axis = 1)
output.columns = ['ImageId','Label']
output['ImageId'] += 1
output.to_csv('keras.csv',sep = ',', header = ['ImageId','Label'], index = False)

"Your submission scored 0.98942, which is an improvement of your previous score of 0.98571. Great job!". Well not that great if one considers that the best model out there achieves 99,97% accuracy. But its a start. Lets work on improving it further.