In [1]:
import pandas as pd
import numpy as np

np.random.seed(1212)

import keras
from keras.models import Model
from keras.layers import *
from keras import optimizers

In [2]:
df_train = pd.read_csv("C://Users//arush//Python_Proj_HandwrittenDigitRecog//train.csv//train.csv")
df_test = pd.read_csv("C://Users//arush//Python_Proj_HandwrittenDigitRecog//test.csv//test.csv")

In [3]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

X_test = df_test.iloc[:, 0:784]

print(X_test.shape)

(28000, 784)


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_cv, y_train, y_cv = train_test_split(df_features, df_label, 
                                                test_size = 0.2,
                                                random_state = 1212)

X_train = X_train.values.reshape(33600, 784) #(33600, 784)
X_cv = X_cv.values.reshape(8400, 784) #(8400, 784)

X_test = X_test.values.reshape(28000, 784)

In [5]:
print((min(X_train[1]), max(X_train[1])))

(0, 255)


In [6]:
# Feature Normalization 
X_train = X_train.astype('float32'); X_cv= X_cv.astype('float32'); X_test = X_test.astype('float32')
X_train /= 255; X_cv /= 255; X_test /= 255

# Convert labels to One Hot Encoded
num_digits = 10
y_train = keras.utils.to_categorical(y_train, num_digits)
y_cv = keras.utils.to_categorical(y_cv, num_digits)

In [7]:
# Printing 2 examples of labels after conversion
print(y_train[0]) # 2
print(y_train[3]) # 7

[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]


In [8]:
# Input Parameters
n_input = 784 # number of features
n_hidden_1 = 300
n_hidden_2 = 100
n_hidden_3 = 100
n_hidden_4 = 200
num_digits = 10

In [9]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

In [10]:
# Our model would have '6' layers - input layer, 4 hidden layer and 1 output layer
model = Model(Inp, output)
model.summary() # We have 297,910 parameters to estimate

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 784)]             0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 300)               235500    
                                                                 
 Hidden_Layer_2 (Dense)      (None, 100)               30100     
                                                                 
 Hidden_Layer_3 (Dense)      (None, 100)               10100     
                                                                 
 Hidden_Layer_4 (Dense)      (None, 200)               20200     
                                                                 
 Output_Layer (Dense)        (None, 10)                2010      
                                                                 
Total params: 297,910
Trainable params: 297,910
Non-trainable

In [11]:
# Insert Hyperparameters
learning_rate = 0.1
training_epochs = 20
batch_size = 100
sgd = optimizers.SGD(learning_rate=learning_rate)

In [12]:
# We rely on the plain vanilla Stochastic Gradient Descent as our optimizing methodology
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

In [13]:
history1 = model.fit(X_train, y_train,
                     batch_size = batch_size,
                     epochs = training_epochs,
                     verbose = 2,
                     validation_data=(X_cv, y_cv))

Epoch 1/20
336/336 - 4s - loss: 1.7433 - accuracy: 0.5654 - val_loss: 0.8442 - val_accuracy: 0.7993 - 4s/epoch - 12ms/step
Epoch 2/20
336/336 - 2s - loss: 0.5668 - accuracy: 0.8454 - val_loss: 0.4332 - val_accuracy: 0.8743 - 2s/epoch - 6ms/step
Epoch 3/20
336/336 - 2s - loss: 0.3876 - accuracy: 0.8878 - val_loss: 0.3462 - val_accuracy: 0.8986 - 2s/epoch - 7ms/step
Epoch 4/20
336/336 - 2s - loss: 0.3272 - accuracy: 0.9043 - val_loss: 0.3109 - val_accuracy: 0.9087 - 2s/epoch - 7ms/step
Epoch 5/20
336/336 - 2s - loss: 0.2903 - accuracy: 0.9137 - val_loss: 0.2809 - val_accuracy: 0.9179 - 2s/epoch - 6ms/step
Epoch 6/20
336/336 - 2s - loss: 0.2633 - accuracy: 0.9219 - val_loss: 0.2594 - val_accuracy: 0.9242 - 2s/epoch - 6ms/step
Epoch 7/20
336/336 - 2s - loss: 0.2421 - accuracy: 0.9284 - val_loss: 0.2420 - val_accuracy: 0.9276 - 2s/epoch - 6ms/step
Epoch 8/20
336/336 - 2s - loss: 0.2233 - accuracy: 0.9346 - val_loss: 0.2254 - val_accuracy: 0.9336 - 2s/epoch - 6ms/step
Epoch 9/20
336/336 - 2s

In [14]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

# We rely on ADAM as our optimizing methodology
adam = keras.optimizers.Adam(learning_rate=learning_rate)
model2 = Model(Inp, output)

model2.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [15]:
history2 = model2.fit(X_train, y_train,
                      batch_size = batch_size,
                      epochs = training_epochs,
                      verbose = 2,
                      validation_data=(X_cv, y_cv))

Epoch 1/20
336/336 - 4s - loss: 0.3341 - accuracy: 0.9016 - val_loss: 0.1463 - val_accuracy: 0.9562 - 4s/epoch - 12ms/step
Epoch 2/20
336/336 - 2s - loss: 0.1258 - accuracy: 0.9619 - val_loss: 0.1119 - val_accuracy: 0.9642 - 2s/epoch - 7ms/step
Epoch 3/20
336/336 - 2s - loss: 0.0839 - accuracy: 0.9737 - val_loss: 0.1029 - val_accuracy: 0.9692 - 2s/epoch - 7ms/step
Epoch 4/20
336/336 - 2s - loss: 0.0614 - accuracy: 0.9802 - val_loss: 0.1146 - val_accuracy: 0.9649 - 2s/epoch - 7ms/step
Epoch 5/20
336/336 - 2s - loss: 0.0463 - accuracy: 0.9856 - val_loss: 0.1031 - val_accuracy: 0.9717 - 2s/epoch - 7ms/step
Epoch 6/20
336/336 - 2s - loss: 0.0385 - accuracy: 0.9869 - val_loss: 0.1005 - val_accuracy: 0.9723 - 2s/epoch - 7ms/step
Epoch 7/20
336/336 - 2s - loss: 0.0305 - accuracy: 0.9899 - val_loss: 0.0897 - val_accuracy: 0.9761 - 2s/epoch - 7ms/step
Epoch 8/20
336/336 - 2s - loss: 0.0257 - accuracy: 0.9915 - val_loss: 0.0921 - val_accuracy: 0.9757 - 2s/epoch - 7ms/step
Epoch 9/20
336/336 - 2s

In [16]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

learning_rate = 0.01
adam = keras.optimizers.Adam(learning_rate=learning_rate)
model2a = Model(Inp, output)

model2a.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [17]:
history2a = model2a.fit(X_train, y_train,
                        batch_size = batch_size,
                        epochs = training_epochs,
                        verbose = 2,
                        validation_data=(X_cv, y_cv))

Epoch 1/20
336/336 - 4s - loss: 0.3411 - accuracy: 0.8982 - val_loss: 0.1556 - val_accuracy: 0.9529 - 4s/epoch - 13ms/step
Epoch 2/20
336/336 - 2s - loss: 0.1189 - accuracy: 0.9637 - val_loss: 0.1106 - val_accuracy: 0.9663 - 2s/epoch - 6ms/step
Epoch 3/20
336/336 - 2s - loss: 0.0816 - accuracy: 0.9737 - val_loss: 0.1209 - val_accuracy: 0.9630 - 2s/epoch - 7ms/step
Epoch 4/20
336/336 - 2s - loss: 0.0571 - accuracy: 0.9810 - val_loss: 0.1016 - val_accuracy: 0.9698 - 2s/epoch - 7ms/step
Epoch 5/20
336/336 - 3s - loss: 0.0434 - accuracy: 0.9860 - val_loss: 0.1124 - val_accuracy: 0.9701 - 3s/epoch - 8ms/step
Epoch 6/20
336/336 - 2s - loss: 0.0375 - accuracy: 0.9885 - val_loss: 0.0984 - val_accuracy: 0.9748 - 2s/epoch - 7ms/step
Epoch 7/20
336/336 - 2s - loss: 0.0311 - accuracy: 0.9901 - val_loss: 0.1346 - val_accuracy: 0.9649 - 2s/epoch - 7ms/step
Epoch 8/20
336/336 - 2s - loss: 0.0254 - accuracy: 0.9918 - val_loss: 0.0919 - val_accuracy: 0.9770 - 2s/epoch - 7ms/step
Epoch 9/20
336/336 - 2s

In [18]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

learning_rate = 0.5
adam = keras.optimizers.Adam(learning_rate=learning_rate)
model2b = Model(Inp, output)

model2b.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [19]:
history2b = model2b.fit(X_train, y_train,
                        batch_size = batch_size,
                        epochs = training_epochs,
                            validation_data=(X_cv, y_cv))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
# Input Parameters
n_input = 784 # number of features
n_hidden_1 = 300
n_hidden_2 = 100
n_hidden_3 = 100
n_hidden_4 = 100
n_hidden_5 = 200
num_digits = 10

In [21]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
x = Dense(n_hidden_5, activation='relu', name = "Hidden_Layer_5")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

In [22]:
# Our model would have '7' layers - input layer, 5 hidden layer and 1 output layer
model3 = Model(Inp, output)
model3.summary() # We have 308,010 parameters to estimate

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 784)]             0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 300)               235500    
                                                                 
 Hidden_Layer_2 (Dense)      (None, 100)               30100     
                                                                 
 Hidden_Layer_3 (Dense)      (None, 100)               10100     
                                                                 
 Hidden_Layer_4 (Dense)      (None, 100)               10100     
                                                                 
 Hidden_Layer_5 (Dense)      (None, 200)               20200     
                                                                 
 Output_Layer (Dense)        (None, 10)                2010

In [23]:
# We rely on 'Adam' as our optimizing methodology
adam = keras.optimizers.Adam(learning_rate=0.01)

model3.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [24]:
history3 = model3.fit(X_train, y_train,
                      batch_size = batch_size,
                      epochs = training_epochs,
                      validation_data=(X_cv, y_cv))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
# Input Parameters
n_input = 784 # number of features
n_hidden_1 = 300
n_hidden_2 = 100
n_hidden_3 = 100
n_hidden_4 = 200
num_digits = 10

In [26]:
Inp = Input(shape=(784,))
x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
x = Dropout(0.3)(x)
x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
x = Dropout(0.3)(x)
x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
x = Dropout(0.3)(x)
x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)

In [27]:
# Our model would have '6' layers - input layer, 4 hidden layer and 1 output layer
model4 = Model(Inp, output)
model4.summary() # We have 297,910 parameters to estimate

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 784)]             0         
                                                                 
 Hidden_Layer_1 (Dense)      (None, 300)               235500    
                                                                 
 dropout (Dropout)           (None, 300)               0         
                                                                 
 Hidden_Layer_2 (Dense)      (None, 100)               30100     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 Hidden_Layer_3 (Dense)      (None, 100)               10100     
                                                                 
 dropout_2 (Dropout)         (None, 100)               0   

In [28]:
model4.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [29]:
history = model4.fit(X_train, y_train,
                    batch_size = batch_size,
                    epochs = training_epochs,
                    validation_data=(X_cv, y_cv))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
test_pred = pd.DataFrame(model4.predict(X_test, batch_size=200))
test_pred = pd.DataFrame(test_pred.idxmax(axis = 1))
test_pred.index.name = 'ImageId'
test_pred = test_pred.rename(columns = {0: 'Label'}).reset_index()
test_pred['ImageId'] = test_pred['ImageId'] + 1

test_pred.head()



Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [31]:
test_pred.to_csv('model_result.csv', index = False)