In [1]:
import numpy as np
import pandas
import sklearn
from sklearn.grid_search import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Reshape, Flatten, Convolution2D, Convolution1D, AveragePooling2D, MaxPooling2D
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier

Using Theano backend.
Using gpu device 0: GeForce GTX 1070 (CNMeM is disabled, cuDNN not available)


In [2]:
data = pandas.read_csv('train.csv')

In [3]:
labels = data['label'].values
labels = np_utils.to_categorical(labels)

In [4]:
features = data.iloc[:,1:].values

In [5]:
features.shape, labels.shape

((42000, 784), (42000, 10))

In [6]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [7]:
label_train, label_test, feat_train, feat_test = sklearn.cross_validation.train_test_split(labels, features, test_size=0.2, random_state=42)

In [8]:
feat_train.shape, label_train.shape

((33600, 784), (33600, 10))

In [9]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=128)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[8.708913433438255, 0.4592857142857143]

## Different batch sizes

In [10]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[6.62019079208374, 0.58857142857142852]

In [11]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=64)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[11.208292908441452, 0.30452380952380953]

In [12]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=2000)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[3.6746569406418574, 0.76964285714285718]

In [13]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=5000)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[8.4212830025809158, 0.47642857142857142]

Batch sizes of 500 and more observations are better, probably lower values cause overfitting. Bigger batches are also faster (obviously).

In [14]:
## Optimizer

In [15]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [16]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[5.4901970954168409, 0.65571428571428569]

## Deeper networks

In [17]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [18]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[7.0871525337582542, 0.55988095238095237]

In [19]:
def add_layers_to_model(model, layer_size):
    model.pop()    # Activation(softmax)
    model.pop()    # Dense(10)
    model.add(Dense(layer_size))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))     # needed to have right output shape
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [20]:
nn_model = add_layers_to_model(nn_model, 128)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[4.2759696488153365, 0.73428571428571432]

In [21]:
nn_model = add_layers_to_model(nn_model, 64)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[3.6591158085777646, 0.77142857142857146]

In [22]:
nn_model = add_layers_to_model(nn_model, 128)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[6.9378743244352794, 0.56916666666666671]

It improves to a certain point, adding more layers after that decreases accuracy

## Convolution

In [23]:
feat_train = feat_train.reshape((feat_train.shape[0], 1, 28, 28))
feat_test = feat_test.reshape((feat_test.shape[0], 1, 28, 28))

In [24]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [25]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.12351059441099919, 0.96488095238095239]

In [26]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [27]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.16222245575061867, 0.95857142857142852]

In [28]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [29]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.14169917189205686, 0.96107142857142858]

In [30]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [31]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.30620023734512786, 0.93261904761904757]