In [2]:
import numpy as np
import pandas as pd
import datetime as dt
img_rows, img_cols = 28, 28
num_classes = 10
train = pd.read_csv("data/train.csv")
print(train.shape)
np.random.seed(seed=1984)
# train.head()

(42000, 785)


In [3]:
start = dt.datetime.now()
test= pd.read_csv("data/test.csv")
print(test.shape)
# test.head()

(28000, 784)


In [4]:
x = train[train.columns[1:]].values.astype('float32')
x_test = test.values.astype('float32')
y = train["label"].values.astype('int32')

from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2)

print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape)

(33600, 784) (33600,)
(8400, 784) (8400,)
(28000, 784)


In [6]:
import keras
from keras import backend as K

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_valid = x_valid.reshape(x_valid.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_valid = x_valid.reshape(x_valid.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

max_p = x_train.max()
x_train = x_train / max_p
x_valid = x_valid / max_p
x_test = x_test / max_p

Using TensorFlow backend.


In [7]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape)

(33600, 28, 28, 1) (33600, 10)
(8400, 28, 28, 1) (8400, 10)
(28000, 28, 28, 1)


In [8]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [9]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same',input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
__________

In [10]:
model.compile(optimizer=RMSprop(lr=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [11]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.0, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(x_train)

In [14]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 9027190194798484939, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 118882304
 locality {
   bus_id: 1
 }
 incarnation: 15540717609627922478
 physical_device_desc: "device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:09:00.0"]

In [23]:
batch_size = 64
epochs = 50
lr_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=0.0001, patience=1, verbose=1)

### Load model, if any

In [24]:
model = load_model('mnistmodel.h5')

In [25]:
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=x_train.shape[0] // batch_size,
                    callbacks=[lr_reduce],
                    validation_data=(x_valid, y_valid),
                    epochs = epochs, verbose = 2)

Epoch 1/50
6s - loss: 0.0298 - acc: 0.9907 - val_loss: 0.0163 - val_acc: 0.9950
Epoch 2/50
6s - loss: 0.0308 - acc: 0.9909 - val_loss: 0.0162 - val_acc: 0.9951
Epoch 3/50
6s - loss: 0.0324 - acc: 0.9903 - val_loss: 0.0162 - val_acc: 0.9950
Epoch 4/50
6s - loss: 0.0326 - acc: 0.9901 - val_loss: 0.0162 - val_acc: 0.9950
Epoch 5/50
7s - loss: 0.0295 - acc: 0.9911 - val_loss: 0.0162 - val_acc: 0.9951
Epoch 6/50
6s - loss: 0.0309 - acc: 0.9904 - val_loss: 0.0163 - val_acc: 0.9951
Epoch 7/50
6s - loss: 0.0299 - acc: 0.9904 - val_loss: 0.0162 - val_acc: 0.9950
Epoch 8/50
6s - loss: 0.0311 - acc: 0.9902 - val_loss: 0.0163 - val_acc: 0.9951
Epoch 9/50
6s - loss: 0.0297 - acc: 0.9909 - val_loss: 0.0162 - val_acc: 0.9951
Epoch 10/50
7s - loss: 0.0296 - acc: 0.9907 - val_loss: 0.0162 - val_acc: 0.9950
Epoch 11/50
6s - loss: 0.0317 - acc: 0.9904 - val_loss: 0.0162 - val_acc: 0.9951
Epoch 12/50
6s - loss: 0.0289 - acc: 0.9909 - val_loss: 0.0161 - val_acc: 0.9951
Epoch 13/50
6s - loss: 0.0308 - acc: 

<keras.callbacks.History at 0x7f74f419e128>

### Save the model

In [26]:
model.save('mnistmodel.h5')

In [27]:
score = model.evaluate(x_valid, y_valid, verbose=0)
print('valid loss:', score[0])
print('valid accuracy:', score[1])

valid loss: 0.0162966389345
valid accuracy: 0.995


In [28]:
# Predict the values from the validation dataset
y_pred = model.predict(x_test)

In [29]:
y_pred_classes = np.argmax(y_pred,axis = 1)
ids = range(1,len(y_pred_classes)+1)
submission = pd.DataFrame(np.column_stack((ids,y_pred_classes)),columns=("ImageId","Label"))
submission.to_csv("submission.csv", index=None)