In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(2)

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D,AveragePooling2D,BatchNormalization
from keras.optimizers import RMSprop,Adam
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


## Data Pre-processing
    - Import data
    - Inspect data
    - Normalize data
    - Reshape data
    - Create cross-validation set    

<b>Import data<b>



In [4]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["label"]
x_train = train.drop(labels=["label"], axis=1)

<b>Inspect data<b>

In [5]:
x_train.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
x_train.describe()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
count,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,...,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.219286,0.117095,0.059024,0.02019,0.017238,0.002857,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.31289,4.633819,3.274488,1.75987,1.894498,0.414264,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [7]:
y_train.value_counts()


1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

Similiar number of instances for all 10 digits, thus no attribute sub sampling required during training

<b>Normalize data</b>

In [8]:
x_train_s = x_train/255
test_s = test/255

#print(x_train_s.shape)
#print(test_s.shape)

In [9]:
#scaler = MinMaxScaler()

#x_train_s = scaler.fit_transform(x_train.astype(np.float64))
#test_s = scaler.fit_transform (test.astype(np.float64))

print(x_train_s.shape)
print(test_s.shape)

(42000, 784)
(28000, 784)


<b>Reshape data</b>
    
    It is necessary to reshape the image to 3 dimensions as Keras requires an extra dimension denoting the image channel

In [10]:
#x_train_se = pd.DataFrame(x_train_s)
#test_se = pd.DataFrame(test_s)

x_train = x_train_s.values.reshape(-1,28,28,1)
test = test_s.values.reshape(-1,28,28,1)

print(x_train.shape)
print(test.shape)

(42000, 28, 28, 1)
(28000, 28, 28, 1)


<b>Create validation set</b>

In [11]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.1, random_state=2)

In [12]:
#One-hot-encode labels for CNN structure
print(y_train)
y_train = to_categorical(y_train, num_classes = 10)
print(y_train)
print(y_val)
y_val = to_categorical(y_val, num_classes = 10)
print(y_val)


30613    8
39467    7
4232     9
33862    3
30695    5
23751    1
14617    3
12522    1
2940     0
3951     3
39541    2
26638    5
19937    2
1755     1
41032    1
32143    6
4885     3
33440    3
28368    1
31202    8
36040    1
38255    0
31706    3
36351    5
4454     2
22604    7
9804     1
31221    4
2282     6
5723     7
        ..
12420    3
19694    8
36432    4
16415    8
27859    1
40390    4
25902    0
16639    2
21372    2
20026    6
15905    9
8170     9
29547    1
21418    9
20084    1
41084    2
6548     1
32031    5
11071    9
5167     1
35541    8
33201    9
14696    4
33867    2
18898    0
31019    7
30280    9
6637     2
35343    9
23720    4
Name: label, Length: 37800, dtype: int64
[[0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]]
38732    6
3686     9
30090    5
31205    7
11331    9
22883    2
26503    3
27313    3
35521    7
40140    0
20865    6
11783    9


CNN structure requires one-hot-encoded output

## Model Implementation
   It was decided to implement a CNN due to its popularity in the field of visual classification.
   

CNN Model structures

In [29]:
# CNN
modelle = 8
model = Sequential()
#Model 1 - 6 are random models that where play models
if modelle == 1:
    # Build CNN framework

    model.add(Conv2D(32, (5, 5), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    #model.add(Conv2D(64, (3, 3), padding='Same',activation='relu'))
    # model.add(BatchNormalization())
    #model.add(Conv2D(64, (3, 3), padding='Same',activation='relu'))
    # model.add(BatchNormalization())
    #model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(256, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0)

elif modelle == 2:

    model.add(Conv2D(64, (3, 3), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    # model.add(BatchNormalization())

    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    # model.add(BatchNormalization())

    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))

    model.add(Flatten())

    model.add(Dense(512, activation="relu"))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0)

elif modelle == 3:

    model.add(Conv2D(32, (5, 5), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(256, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0)

elif modelle == 4:

    model.add(Conv2D(32, (5, 5), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(256, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation="softmax"))

    # optimizer
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                     epsilon=1e-08, decay=0.0, amsgrad=False)

elif modelle == 5:

    model.add(Conv2D(32, (5, 5), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    # model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    # model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    # model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    # model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(256, activation="relu"))
    # model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0)
elif modelle == 0:

    model.add(Conv2D(64, (3, 3), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))

    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))

    model.add(Flatten())

    model.add(Dense(512, activation="relu"))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                     epsilon=1e-08, decay=0.0, amsgrad=False)
#From modelle 7 a specific development process was followed 
elif modelle == 7:  # Model 1 in report with dropout

    model.add(Conv2D(64, (3, 3), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                     epsilon=1e-08, decay=0.0, amsgrad=False)
elif modelle == 8:  # Addition of batch normalization

    model.add(Conv2D(64, (3, 3), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    

    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))

    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))    

    model.add(Flatten())

    model.add(Dense(512, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                     epsilon=1e-08, decay=0.0, amsgrad=False)
elif modelle == 9:  # Both batch normalization and dropout

    model.add(Conv2D(64, (3, 3), padding='Same',
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    

    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    

    model.add(Flatten())

    model.add(Dense(512, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    # Optimizer
    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                     epsilon=1e-08, decay=0.0, amsgrad=False)

Model compilation

In [30]:
#Compile model
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics = ["accuracy"])

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_36 (Conv2D)           (None, 28, 28, 64)        640       
_________________________________________________________________
batch_normalization_41 (Batc (None, 28, 28, 64)        256       
_________________________________________________________________
conv2d_37 (Conv2D)           (None, 28, 28, 64)        36928     
_________________________________________________________________
batch_normalization_42 (Batc (None, 28, 28, 64)        256       
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 14, 14, 128)       73856     
_________________________________________________________________
batch_normalization_43 (Batc (None, 14, 14, 128)       512       
__________

Learning Rate Scheduling

In [32]:
learning_rate_annealer = ReduceLROnPlateau(
    monitor='val_acc', patience=2, verbose=1, factor=0.5, min_lr=0.00001)

In [33]:
epochs = 30 
batch_size = 128

Data Augmentation

In [34]:
datagen = ImageDataGenerator(
    rotation_range=10,  # randomly rotate images in the range 0-10
    zoom_range=0.1,  # Randomly zoom image in the range of 0 - 10%
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.1,
    # randomly shift images vertically (fraction of total height)
    height_shift_range=0.1
)
datagen.fit(x_train)

Fit data augmentation and model to data

In [35]:
history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                              epochs=epochs, validation_data=(x_val, y_val),
                              verbose=2, steps_per_epoch=x_train.shape[0] // batch_size,
                              callbacks=[learning_rate_annealer])

Epoch 1/10
 - 18s - loss: 0.1457 - acc: 0.9540 - val_loss: 0.1322 - val_acc: 0.9605
Epoch 2/10
 - 15s - loss: 0.0565 - acc: 0.9821 - val_loss: 0.0480 - val_acc: 0.9850
Epoch 3/10
 - 15s - loss: 0.0439 - acc: 0.9864 - val_loss: 0.0281 - val_acc: 0.9895
Epoch 4/10
 - 16s - loss: 0.0374 - acc: 0.9883 - val_loss: 0.0521 - val_acc: 0.9848
Epoch 5/10
 - 16s - loss: 0.0322 - acc: 0.9895 - val_loss: 0.0256 - val_acc: 0.9919
Epoch 6/10
 - 16s - loss: 0.0314 - acc: 0.9904 - val_loss: 0.0334 - val_acc: 0.9890
Epoch 7/10
 - 16s - loss: 0.0285 - acc: 0.9910 - val_loss: 0.0568 - val_acc: 0.9857

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/10
 - 16s - loss: 0.0169 - acc: 0.9948 - val_loss: 0.0156 - val_acc: 0.9948
Epoch 9/10
 - 16s - loss: 0.0145 - acc: 0.9956 - val_loss: 0.0240 - val_acc: 0.9926
Epoch 10/10
 - 16s - loss: 0.0150 - acc: 0.9952 - val_loss: 0.0177 - val_acc: 0.9933

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.000250000011874362

In [91]:
history.history['val_acc']


[0.9780952377546401,
 0.9828571428571429,
 0.9811904761904762,
 0.9847619047619047,
 0.9871428577105205,
 0.984999999659402,
 0.9857142857142858,
 0.9935714285714285,
 0.9928571434248061,
 0.9933333339009967,
 0.9940476187070211,
 0.9959523809523809,
 0.9954761910438538,
 0.9940476190476191,
 0.9947619053295681,
 0.995,
 0.9952380958057585,
 0.9954761910438538,
 0.9947619053295681,
 0.9952380958057585,
 0.9952380958057585,
 0.9952380958057585,
 0.9950000005676632,
 0.9952380952380953,
 0.9947619053295681,
 0.9947619053295681,
 0.9950000005676632,
 0.9950000005676632,
 0.9952380952380953,
 0.9952380952380953]

Final Model

In [52]:
epochs = 14
batch_size = 128

model = Sequential()

model.add(Conv2D(64, (3, 3), padding='Same',
                 activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), padding='Same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(512, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation="softmax"))

# Optimizer
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=1e-08, decay=0.0, amsgrad=False)
#optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0)

# Compile model
model.compile(optimizer=optimizer,
              loss="categorical_crossentropy", metrics=["accuracy"])

# Set the optimizer learning rate annealer
learning_rate_annealer = ReduceLROnPlateau(
    monitor='val_acc', patience=2, verbose=1, factor=0.5, min_lr=0.00001)

In [53]:
datagen = ImageDataGenerator(
    rotation_range=10,  # randomly rotate images in the range 0-10
    zoom_range=0.1,  # Randomly zoom image in the range of 0 - 10%
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.1,
    # randomly shift images vertically (fraction of total height)
    height_shift_range=0.1
)
datagen.fit(x_train)

In [54]:
history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                              epochs=epochs, validation_data=(x_val, y_val),
                              verbose=2, steps_per_epoch=x_train.shape[0] // batch_size,
                              callbacks=[learning_rate_annealer])

Epoch 1/14
 - 19s - loss: 0.2967 - acc: 0.9123 - val_loss: 0.0667 - val_acc: 0.9807
Epoch 2/14
 - 16s - loss: 0.0838 - acc: 0.9735 - val_loss: 0.0395 - val_acc: 0.9881
Epoch 3/14
 - 16s - loss: 0.0651 - acc: 0.9803 - val_loss: 0.0555 - val_acc: 0.9840
Epoch 4/14
 - 16s - loss: 0.0546 - acc: 0.9837 - val_loss: 0.0379 - val_acc: 0.9883
Epoch 5/14
 - 16s - loss: 0.0459 - acc: 0.9860 - val_loss: 0.0303 - val_acc: 0.9912
Epoch 6/14
 - 16s - loss: 0.0408 - acc: 0.9881 - val_loss: 0.0295 - val_acc: 0.9912
Epoch 7/14
 - 16s - loss: 0.0373 - acc: 0.9884 - val_loss: 0.0414 - val_acc: 0.9883

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/14
 - 16s - loss: 0.0272 - acc: 0.9914 - val_loss: 0.0187 - val_acc: 0.9936
Epoch 9/14
 - 16s - loss: 0.0212 - acc: 0.9934 - val_loss: 0.0163 - val_acc: 0.9955
Epoch 10/14
 - 16s - loss: 0.0246 - acc: 0.9925 - val_loss: 0.0187 - val_acc: 0.9931
Epoch 11/14
 - 16s - loss: 0.0245 - acc: 0.9926 - val_loss: 0.0202 - val_acc: 

In [55]:
history.history['val_acc']

[0.9807142853736878,
 0.9880952386629014,
 0.9840476190476191,
 0.9883333339009966,
 0.9911904761904762,
 0.9911904767581394,
 0.9883333333333333,
 0.9935714285714285,
 0.9954761904761905,
 0.9930952380952381,
 0.9935714285714285,
 0.9938095238095238,
 0.9947619047619047,
 0.9957142857142857]

In [59]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_43 (Conv2D)           (None, 28, 28, 64)        640       
_________________________________________________________________
batch_normalization_49 (Batc (None, 28, 28, 64)        256       
_________________________________________________________________
conv2d_44 (Conv2D)           (None, 28, 28, 64)        36928     
_________________________________________________________________
batch_normalization_50 (Batc (None, 28, 28, 64)        256       
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_45 (Conv2D)           (None, 14, 14, 128)       73856     
__________

Test Set Predictions

In [56]:
#Predict test set labels
results = model.predict(test)
print(results)

# Convert one-hot vector to number
results = np.argmax(results,axis = 1)
print(results)

results = pd.Series(results,name="Label")

[[8.9622379e-11 2.6301067e-10 1.0000000e+00 ... 1.6981352e-09
  6.0936417e-10 3.5127254e-10]
 [9.9998271e-01 1.2417572e-08 5.6701726e-08 ... 1.2304106e-07
  2.7745241e-07 2.2994125e-06]
 [1.7803558e-09 1.2663149e-09 2.5297552e-08 ... 2.7987314e-08
  1.4508027e-08 9.9999928e-01]
 ...
 [3.2882385e-14 1.1538588e-12 1.4883173e-11 ... 7.6947363e-12
  1.3844252e-11 1.0326526e-12]
 [1.3807085e-07 1.0042033e-07 2.4022825e-07 ... 1.9072457e-06
  1.0376761e-07 9.9999046e-01]
 [8.8311155e-12 1.8658568e-11 1.0000000e+00 ... 6.6427291e-10
  1.1766610e-09 1.0724676e-10]]
[2 0 9 ... 3 9 2]


In [58]:
# Save the final predictions
final_result = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

final_result.to_csv("cnn_mnist_submission_10.csv",index=False)
