In [1]:
import numpy as np
import pandas as pd

In [2]:
train_data = pd.read_csv('./data/train.csv')
test_data = pd.read_csv('./data/test.csv')

In [3]:
train_data_labels = np.array(train_data['label']) # labels
train_data_pixels_1d = np.array(train_data.drop(columns=['label'], axis=1))   # pixels

In [4]:
# normalize data
train_data_pixels_1d = train_data_pixels_1d / 255.0
test_data = test_data / 255.0

In [5]:
print(train_data_pixels_1d.shape)
print(test_data.shape)

(42000, 784)
(28000, 784)


In [6]:
train_data_pixels_2d = train_data_pixels_1d.reshape(train_data_pixels_1d.shape[0], 28, 28, 1)
test_data_2d = np.array(test_data).reshape(test_data.shape[0], 28, 28, 1)

In [7]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
# make label into onehot array
one_hot_label = to_categorical(train_data_labels)

Using TensorFlow backend.


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
train_X, test_X, train_y, test_y = train_test_split(train_data_pixels_2d, one_hot_label, random_state=0)

In [29]:
from keras.preprocessing.image import ImageDataGenerator
# With data augmentation to prevent overfitting (accuracy 0.99286)

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(train_X)

In [11]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D

In [24]:
# define model 
model = Sequential()

# conv2d 1
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
# conv2d 2
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# conv2d 3
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
# conv2d 5
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

# conv2d 3
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
# conv2d 5
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


# conv2d 3
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
# conv2d 5
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dense(256, activation = "relu"))
model.add(Dense(256, activation = "relu"))
model.add(Dense(256, activation = "relu"))

model.add(Dense(256, activation = "relu"))
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

In [33]:
# Define the optimizer
from keras.optimizers import RMSprop
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
# Compile the model
model.compile(optimizer ='adam' , loss = "categorical_crossentropy", metrics=["accuracy"])

In [27]:
from keras.callbacks import ReduceLROnPlateau
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [34]:
batch_size = 86
epochs = 10
model.fit(train_X, train_y, 
              epochs=epochs, 
              batch_size=batch_size, 
              validation_data=(test_X, test_y))

# Fit the model
history = model.fit_generator(train_X,train_y, batch_size=batch_size,
                                              epochs = epochs, 
                                              validation_data = (train_X,test_y),
                                              verbose = 2, 
                                              callbacks=[learning_rate_reduction])


Train on 31500 samples, validate on 10500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


TypeError: fit_generator() got an unexpected keyword argument 'batch_size'

In [35]:
model_score = model.evaluate(test_X, test_y)



In [36]:
import time
MODEL_NAME = f'MNIST_CONV2D_{model_score[1]:.2}'
model.save(f'./models/{MODEL_NAME}.model')

# Make Submission

In [None]:
train_data_predictions = model.predict(test_data_2d)
train_data_predictions_numbers = [np.argmax(pred) for pred in train_data_predictions]

In [None]:
submission_df = pd.DataFrame(columns=['ImageId', 'Label'], index=range(len(train_data_predictions)))
submission_df['Label'] = train_data_predictions_numbers
submission_df['ImageId'] = range(1, len(train_data_predictions)+1)

In [None]:
submission_df.to_csv('./data/submission_conv2d.csv', index=False)

In [None]:
submission_df.tail()