# Digit Recognizer | Kaggle
https://www.kaggle.com/c/digit-recognizer

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split

## Load data

In [2]:
df = pd.read_csv('data/train.csv')
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
X = df.iloc[:,1:]
y = df.iloc[:,:1]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0)

In [4]:
initial_X = X_train.values.astype('float32')
initial_X_test= X_test.values.astype('float32')
print(initial_X.shape)
print(initial_X_test.shape)

(29399, 784)
(12601, 784)


In [5]:
from sklearn.preprocessing import MinMaxScaler
from keras.utils import np_utils

scaler = MinMaxScaler()
num_categories = 10

X_scaled = scaler.fit_transform(initial_X)
test_X_scaled = scaler.transform(initial_X_test)
y_train = np_utils.to_categorical(y_train, num_categories)
y_test = np_utils.to_categorical(y_test, num_categories)

input_dimension = X_scaled.shape[1]

Using TensorFlow backend.


## Build model

In [6]:
from keras.models import Sequential
from keras.layers.core import Dense

model = Sequential()
model.add(Dense(500, activation='relu', input_shape=(input_dimension,)))
model.add(Dense(500, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', 
              optimizer="adam",
              metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 500)               392500    
_________________________________________________________________
dense_2 (Dense)              (None, 500)               250500    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5010      
Total params: 648,010
Trainable params: 648,010
Non-trainable params: 0
_________________________________________________________________


## Training

In [7]:
model.fit(X_scaled, y_train, batch_size=100, epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x9dbaced7b8>

In [8]:
score = model.evaluate(X_scaled, y_train)
print (' - loss: {0:.4f} - acc:{1:.4f}'.format(score[0], score[1]))



In [9]:
score = model.evaluate(test_X_scaled, y_test)
print (' - loss: {0:.4f} - acc:{1:.4f}'.format(score[0], score[1]))



In [10]:
model.save('model.h5')

## Result for Kaggle (Score = 0.97271)

In [11]:
test_data = pd.read_csv('data/test.csv')
test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
final_test_X = test_data.values.astype('float32')
final_test_X.shape

(28000, 784)

In [13]:
final_test_X_scaled = scaler.transform(final_test_X)

In [14]:
my_predict = model.predict(final_test_X_scaled)

In [15]:
my_predict.shape

(28000, 10)

In [16]:
#From categorical to integer
from numpy import argmax
my_predict_value = argmax(my_predict,axis=1)
my_predict_value.shape

(28000,)

In [17]:
result = pd.DataFrame(my_predict_value)

In [18]:
result.head()

Unnamed: 0,0
0,2
1,0
2,9
3,0
4,3


In [19]:
result.index+=1
result.columns=['Label']
result.head()

Unnamed: 0,Label
1,2
2,0
3,9
4,0
5,3


In [20]:
result.index.name='ImageId'

In [21]:
result.head()

Unnamed: 0_level_0,Label
ImageId,Unnamed: 1_level_1
1,2
2,0
3,9
4,0
5,3


In [22]:
result.to_csv('data/results_nn.csv', header=True)