In [27]:
from __future__ import print_function, division
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future


import os
from keras.models import Model
from keras.layers import Input, LSTM, GRU, Bidirectional, GlobalMaxPooling1D, Lambda, Concatenate, Dense
import keras.backend as K
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if len(K.tensorflow_backend._get_available_gpus()) > 0:
  from keras.layers import CuDNNLSTM as LSTM
  from keras.layers import CuDNNGRU as GRU



In [28]:
def get_mnist(limit=None):
  if not os.path.exists('../large_files'):
    print("You must create a folder called large_files adjacent to the class folder first.")
  if not os.path.exists('train.csv'):
    print("Looks like you haven't downloaded the data or it's not in the right spot.")
    print("Please get train.csv from https://www.kaggle.com/c/digit-recognizer")
    print("and place it in the large_files folder.")

  print("Reading in and transforming data...")
  df = pd.read_csv('train.csv')
  data = df.values
  np.random.shuffle(data)
  X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
  Y = data[:, 0]
  if limit is not None:
    X, Y = X[:limit], Y[:limit]
  return X, Y


In [29]:
X, Y = get_mnist()



You must create a folder called large_files adjacent to the class folder first.
Reading in and transforming data...


In [30]:
Y

array([1, 2, 6, ..., 9, 5, 1])

In [34]:
# config
D = 28
M = 128

In [35]:
# input is an image of size 28x28
input_ = Input(shape=(D, D))

In [36]:
# up-down
rnn1 = Bidirectional(LSTM(M, return_sequences=True))
x1 = rnn1(input_) # output is N x D x 2M
print(x1.shape)
x1 = GlobalMaxPooling1D()(x1) # output is N x 2M
print(x1.shape)

(?, 28, 256)
(?, 256)


In [37]:
# left-right
rnn2 = Bidirectional(LSTM(M, return_sequences=True))

In [38]:
# custom layer
permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1)))

In [39]:
x2 = permutor(input_)
x2 = rnn2(x2) # output is N x D x 2M
print(x2.shape)
x2 = GlobalMaxPooling1D()(x2) # output is N x 2M
print(x2.shape)

(?, 28, 256)
(?, 256)


In [40]:
# put them together
concatenator = Concatenate(axis=1)
x = concatenator([x1, x2]) # output is N x 4M

# final dense layer
output = Dense(10, activation='softmax')(x)

model = Model(inputs=input_, outputs=output)


In [41]:
# testing
o = model.predict(X)
print("o.shape:", o.shape)

o.shape: (42000, 10)


In [42]:
# compile
model.compile(
  loss='sparse_categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)


In [43]:
print('Training model...')
r = model.fit(X, Y, batch_size=32, epochs=10, validation_split=0.3)

Training model...
Train on 29399 samples, validate on 12601 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [45]:
def get_mnist_test(limit=None):
#   if not os.path.exists('../large_files'):
#     print("You must create a folder called large_files adjacent to the class folder first.")
  if not os.path.exists('../input/train.csv'):
    print("Looks like you haven't downloaded the data or it's not in the right spot.")
    print("Please get train.csv from https://www.kaggle.com/c/digit-recognizer")
    print("and place it in the large_files folder.")

  print("Reading in and transforming data...")
  df1 = pd.read_csv('test.csv')
  data1 = df1.values
  np.random.shuffle(data1)
  X_test = data1[:, 0:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
#   Y_test = data1[:, 0]
  if limit is not None:
    X_test = X_test[:limit]
  return X_test

X_test = get_mnist_test()


Looks like you haven't downloaded the data or it's not in the right spot.
Please get train.csv from https://www.kaggle.com/c/digit-recognizer
and place it in the large_files folder.
Reading in and transforming data...


In [46]:
predictions = model.predict(X_test, verbose=0)


In [47]:
pred_classes_output = predictions.argmax(axis=1)


In [48]:
pred_classes_output

array([9, 2, 0, ..., 4, 4, 5])

In [49]:
submissions=pd.DataFrame({"ImageId": list(range(1,len(pred_classes_output)+1)),
                         "Label": pred_classes_output})
submissions.to_csv("output.csv", index=False, header=True)

In [50]:
p = pd.read_csv("output.csv")
p.head(-5)

Unnamed: 0,ImageId,Label
0,1,9
1,2,2
2,3,0
3,4,6
4,5,0


In [51]:
p.head(-5)

Unnamed: 0,ImageId,Label
0,1,9
1,2,2
2,3,0
3,4,6
4,5,0
...,...,...
27990,27991,4
27991,27992,3
27992,27993,6
27993,27994,0
