In [143]:
import numpy as np
import pandas as pd
from keras.utils import to_categorical

data_train = pd.read_csv("/Users/sravanik/Downloads/MNIST.csv")
data_train.head(5)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [144]:
data_test = pd.read_csv("/Users/sravanik/Downloads/test.csv")

In [145]:
data_test.head(5)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<h5>Setting Image Size</h5>

In [146]:
img_rows,img_cols = 28,28
input_shape = (img_rows,img_cols,1)

<h5>Modifying Train and Test Data</h5>

In [147]:
X = np.array(data_train.iloc[:,1:])
y = to_categorical(np.array(data_train.iloc[:,0]))

In [148]:
#Test data
X_test = np.array(data_test.iloc[:, 0:])
#y_test = to_categorical(np.array(data_test.iloc[:, 0]))

<h5>Splitting Dataset</h5>

In [149]:
from sklearn.model_selection import train_test_split

X_train,X_val,y_train,y_val = train_test_split(X,y,test_size = 0.2,random_state = 13)

In [150]:
X_test.shape

(28000, 784)

In [151]:
X_train = X_train.reshape(X_train.shape[0],img_rows,img_cols,1)

In [152]:
X_val = X_val.reshape(X_val.shape[0],img_rows,img_cols,1)

In [153]:
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)

In [154]:
X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

In [155]:
X_train /= 255
X_val /= 255
X_test /= 255

In [156]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

In [157]:
batch_size = 256
num_classes = 10
epochs = 15

In [158]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 kernel_initializer='he_normal',
                 input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

<h5>Data Augmenting</h5>

In order to avoid overfitting problem, we need to expand artificially our handwritten digit dataset. We can make your existing dataset even larger. The idea is to alter the training data with small transformations to reproduce the variations occuring when someone is writing a digit.

For example, the number is not centered The scale is not the same (some who write with big/small numbers) The image is rotated...

Approaches that alter the training data in ways that change the array representation while keeping the label the same are known as data augmentation techniques. Some popular augmentations people use are grayscales, horizontal flips, vertical flips, random crops, color jitters, translations, rotations, and much more.

By applying just a couple of these transformations to our training data, we can easily double or triple the number of training examples and create a very robust model.

In [159]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(X_train)

In [None]:
history = model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_val, y_val))

In [160]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [161]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [162]:
# Fit the model
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val,y_val),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])

Epoch 1/15
 - 58s - loss: 1.1370 - acc: 0.6113 - val_loss: 0.1611 - val_acc: 0.9527
Epoch 2/15
 - 64s - loss: 0.3867 - acc: 0.8786 - val_loss: 0.0934 - val_acc: 0.9686
Epoch 3/15
 - 62s - loss: 0.2738 - acc: 0.9145 - val_loss: 0.0663 - val_acc: 0.9782
Epoch 4/15
 - 55s - loss: 0.2050 - acc: 0.9359 - val_loss: 0.0539 - val_acc: 0.9818
Epoch 5/15
 - 64s - loss: 0.1789 - acc: 0.9457 - val_loss: 0.0483 - val_acc: 0.9837
Epoch 6/15
 - 65s - loss: 0.1544 - acc: 0.9514 - val_loss: 0.0469 - val_acc: 0.9842
Epoch 7/15
 - 70s - loss: 0.1433 - acc: 0.9562 - val_loss: 0.0389 - val_acc: 0.9862
Epoch 8/15
 - 58s - loss: 0.1297 - acc: 0.9598 - val_loss: 0.0369 - val_acc: 0.9871
Epoch 9/15
 - 60s - loss: 0.1320 - acc: 0.9601 - val_loss: 0.0400 - val_acc: 0.9862
Epoch 10/15
 - 61s - loss: 0.1141 - acc: 0.9648 - val_loss: 0.0320 - val_acc: 0.9895
Epoch 11/15
 - 64s - loss: 0.1132 - acc: 0.9662 - val_loss: 0.0364 - val_acc: 0.9892
Epoch 12/15
 - 55s - loss: 0.1073 - acc: 0.9681 - val_loss: 0.0321 - val_a

In [163]:
results = model.predict(X_test)
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

In [164]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("cnn_mnist_datagen.csv",index=False)