In [1]:
import numpy as np
import pandas
import sklearn
from sklearn.grid_search import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Reshape, Flatten, Convolution2D, Convolution1D, AveragePooling2D, MaxPooling2D
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
import pickle

Using Theano backend.
Using gpu device 0: GeForce GTX 1070 (CNMeM is disabled, cuDNN not available)


In [2]:
data = pandas.read_csv('train.csv')

In [3]:
labels = data['label'].values
labels = np_utils.to_categorical(labels)

In [4]:
features = data.iloc[:,1:].values

In [5]:
features.shape, labels.shape

((42000, 784), (42000, 10))

In [6]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [7]:
label_train, label_test, feat_train, feat_test = sklearn.cross_validation.train_test_split(labels, features, test_size=0.2, random_state=42)

In [8]:
feat_train.shape, label_train.shape

((33600, 784), (33600, 10))

In [9]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=128)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[8.7446558143979018, 0.45714285714285713]

## Different batch sizes

In [10]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[6.7870943351018997, 0.57821428571428568]

In [11]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=64)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[10.5208949025472, 0.34726190476190477]

In [12]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=2000)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[6.9256377683367045, 0.56952380952380954]

In [13]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=5000)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[3.932785792350769, 0.75404761904761908]

Batch sizes of 500 and more observations are better, probably lower values cause overfitting. Bigger batches are also faster (obviously).

In [14]:
## Optimizer

In [15]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [16]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[5.4410234841846288, 0.66047619047619044]

## Deeper networks

In [17]:
def create_flat_keras_model():
    model = Sequential()
    model.add(Dense(784, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [18]:
nn_model = create_flat_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[6.9223974663870678, 0.56999999999999995]

In [19]:
def add_layers_to_model(model, layer_size):
    model.pop()    # Activation(softmax)
    model.pop()    # Dense(10)
    model.add(Dense(layer_size))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))     # needed to have right output shape
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [20]:
nn_model = add_layers_to_model(nn_model, 128)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[3.1842276305244082, 0.80059523809523814]

In [21]:
nn_model = add_layers_to_model(nn_model, 64)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[4.6063342044467017, 0.71369047619047621]

In [22]:
nn_model = add_layers_to_model(nn_model, 128)
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[5.7054469517299111, 0.64511904761904759]

It improves to a certain point, adding more layers after that decreases accuracy

## Convolution

In [23]:
feat_train = feat_train.reshape((feat_train.shape[0], 1, 28, 28))
feat_test = feat_test.reshape((feat_test.shape[0], 1, 28, 28))

In [24]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [25]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.10246478169358203, 0.96892857142857147]

In [26]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [27]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.19957109306539808, 0.94904761904761903]

In [28]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [29]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.12983495716094262, 0.96785714285714286]

In [30]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=32, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [31]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.36183496790272851, 0.9302380952380952]

In [34]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [35]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=10, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

[0.095447766229044648, 0.97380952380952379]

## best result with more epochs

In [36]:
def create_convoluted_keras_model():
    model = Sequential()
    model.add(Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode='valid', input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=["accuracy"])
    return model

In [37]:
nn_model = create_convoluted_keras_model()
nn_model.fit(feat_train, label_train, nb_epoch=200, batch_size=512)
nn_model.evaluate(feat_test, label_test)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

[0.064162280909397393, 0.98785714285714288]

In [43]:
?nn_model.save #save('nn.keras')

Object `nn_model.save` not found.


In [None]:
nn_model.pred