In [1]:
import numpy as np
import pandas as pd

np.random.seed(1212)

import keras
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import *
from keras import optimizers

# Load Dataset

In [2]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

In [3]:
df_train.head() # 784 pixel, 1 label

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
df_train.shape, df_test.shape

((42000, 785), (28000, 784))

# Split the data

In [5]:
x = df_train.iloc[:, 1:]
y = df_train.iloc[:, 0]

x_test = df_test

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_cv, y_train, y_cv = train_test_split(x, y, test_size=0.2, random_state=1212)

# convert the data for dataFrame to np_array

In [7]:
x_train = x_train.to_numpy()
x_cv = x_cv.to_numpy()

x_test = x_test.to_numpy()

# find the pixel range

In [8]:
print(min(x_train[1]), max(x_train[1]))

0 255


# Feature Normalization

In [9]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_cv = x_cv.astype('float32')

x_train /= 255
x_test /= 255
x_cv /= 255

# convert labels to One Hot Encoded

In [10]:
# 0 to 9 digits so use 10
y_train = to_categorical(y_train, 10)
y_cv = to_categorical(y_cv, 10)

In [11]:
y_train[0], y_train[3]

(array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]))

# create a NN model

In [12]:
inp = Input(shape=(784,))
x = Dense(300, activation='relu', name='Hidden_Layer_1')(inp)
x = Dense(100, activation='relu', name='Hidden_Layer_2')(x)
x = Dense(100, activation='relu', name='Hidden_Layer_3')(x)
x = Dense(200, activation='relu', name='Hidden_Layer_4')(x)
outp = Dense(10, activation='softmax', name='Output_Layer')(x)

# Use SGD optimizers Model

In [13]:
model1 = Model(inp, outp)
model1.summary()

In [14]:
# we rely on the plain vanilla (Stochastic Gradient Descent) as our optimizing methodology
model1.compile(loss='categorical_crossentropy',
             optimizer=optimizers.SGD(learning_rate=0.1),
             metrics=['accuracy'])

# train model
history1 = model1.fit(x_train, y_train,
                    batch_size=100, epochs=20,
                    verbose=2, validation_data=(x_cv,y_cv))

Epoch 1/20
336/336 - 4s - 13ms/step - accuracy: 0.8265 - loss: 0.5780 - val_accuracy: 0.9339 - val_loss: 0.2200
Epoch 2/20
336/336 - 2s - 7ms/step - accuracy: 0.9403 - loss: 0.1942 - val_accuracy: 0.9561 - val_loss: 0.1481
Epoch 3/20
336/336 - 2s - 7ms/step - accuracy: 0.9585 - loss: 0.1332 - val_accuracy: 0.9610 - val_loss: 0.1260
Epoch 4/20
336/336 - 2s - 7ms/step - accuracy: 0.9691 - loss: 0.1004 - val_accuracy: 0.9608 - val_loss: 0.1253
Epoch 5/20
336/336 - 2s - 7ms/step - accuracy: 0.9764 - loss: 0.0758 - val_accuracy: 0.9658 - val_loss: 0.1082
Epoch 6/20
336/336 - 2s - 7ms/step - accuracy: 0.9822 - loss: 0.0591 - val_accuracy: 0.9725 - val_loss: 0.0947
Epoch 7/20
336/336 - 2s - 7ms/step - accuracy: 0.9853 - loss: 0.0470 - val_accuracy: 0.9714 - val_loss: 0.0994
Epoch 8/20
336/336 - 2s - 7ms/step - accuracy: 0.9903 - loss: 0.0347 - val_accuracy: 0.9696 - val_loss: 0.1035
Epoch 9/20
336/336 - 2s - 7ms/step - accuracy: 0.9915 - loss: 0.0291 - val_accuracy: 0.9701 - val_loss: 0.1054


# Use ADAM optimizers Model

In [21]:
inp = Input(shape=(784,))
x = Dense(300, activation='relu', name='Hidden_Layer_1')(inp)
x = Dense(100, activation='relu', name='Hidden_Layer_2')(x)
x = Dense(100, activation='relu', name='Hidden_Layer_3')(x)
x = Dense(200, activation='relu', name='Hidden_Layer_4')(x)
outp = Dense(10, activation='softmax', name='Output_Layer')(x)

model2 = Model(inp, outp)
model2.summary()

In [22]:
# we rely on the ADAM as our optimizing methodology
model2.compile(loss='categorical_crossentropy',
             optimizer=optimizers.Adam(learning_rate=0.1),
             metrics=['accuracy'])

# train model2
history2 = model2.fit(x_train, y_train,
                    batch_size=100, epochs=20,
                    verbose=2, validation_data=(x_cv,y_cv))

Epoch 1/20
336/336 - 6s - 17ms/step - accuracy: 0.1108 - loss: 5.5198 - val_accuracy: 0.1086 - val_loss: 2.3055
Epoch 2/20
336/336 - 3s - 8ms/step - accuracy: 0.1061 - loss: 2.9841 - val_accuracy: 0.1086 - val_loss: 2.3056
Epoch 3/20
336/336 - 3s - 8ms/step - accuracy: 0.1050 - loss: 2.3067 - val_accuracy: 0.0995 - val_loss: 2.3100
Epoch 4/20
336/336 - 3s - 8ms/step - accuracy: 0.1036 - loss: 2.3083 - val_accuracy: 0.1086 - val_loss: 2.3092
Epoch 5/20
336/336 - 3s - 8ms/step - accuracy: 0.1018 - loss: 2.3077 - val_accuracy: 0.1086 - val_loss: 2.3060
Epoch 6/20
336/336 - 3s - 8ms/step - accuracy: 0.1041 - loss: 2.3088 - val_accuracy: 0.1086 - val_loss: 2.3054
Epoch 7/20
336/336 - 3s - 8ms/step - accuracy: 0.1064 - loss: 2.3068 - val_accuracy: 0.1086 - val_loss: 2.3068
Epoch 8/20
336/336 - 3s - 8ms/step - accuracy: 0.1057 - loss: 2.3074 - val_accuracy: 0.0963 - val_loss: 2.3078
Epoch 9/20
336/336 - 3s - 8ms/step - accuracy: 0.1064 - loss: 2.3086 - val_accuracy: 0.1086 - val_loss: 2.3075


In [17]:
inp = Input(shape=(784,))
x = Dense(300, activation='relu', name='Hidden_Layer_1')(inp)
x = Dense(100, activation='relu', name='Hidden_Layer_2')(x)
x = Dense(100, activation='relu', name='Hidden_Layer_3')(x)
x = Dense(200, activation='relu', name='Hidden_Layer_4')(x)
outp = Dense(10, activation='softmax', name='Output_Layer')(x)

model3 = Model(inp, outp)
model3.summary()

In [18]:
model3.compile(loss='categorical_crossentropy',
             optimizer=optimizers.Adam(learning_rate=0.05),
             metrics=['accuracy'])

history3 = model3.fit(x_train, y_train,
                    batch_size=100, epochs=20,
                    verbose=2, validation_data=(x_cv,y_cv))

Epoch 1/20
336/336 - 6s - 17ms/step - accuracy: 0.6437 - loss: 1.3871 - val_accuracy: 0.8193 - val_loss: 0.6049
Epoch 2/20
336/336 - 3s - 8ms/step - accuracy: 0.7894 - loss: 0.8234 - val_accuracy: 0.4237 - val_loss: 1.5957
Epoch 3/20
336/336 - 3s - 8ms/step - accuracy: 0.7431 - loss: 0.8536 - val_accuracy: 0.6158 - val_loss: 1.1414
Epoch 4/20
336/336 - 3s - 8ms/step - accuracy: 0.6431 - loss: 1.0629 - val_accuracy: 0.6012 - val_loss: 1.1604
Epoch 5/20
336/336 - 3s - 8ms/step - accuracy: 0.7033 - loss: 0.9210 - val_accuracy: 0.7531 - val_loss: 0.8050
Epoch 6/20
336/336 - 3s - 8ms/step - accuracy: 0.7085 - loss: 0.9606 - val_accuracy: 0.5571 - val_loss: 1.6272
Epoch 7/20
336/336 - 3s - 8ms/step - accuracy: 0.2501 - loss: 1.9953 - val_accuracy: 0.1874 - val_loss: 1.8998
Epoch 8/20
336/336 - 3s - 8ms/step - accuracy: 0.1981 - loss: 1.8847 - val_accuracy: 0.1929 - val_loss: 1.8720
Epoch 9/20
336/336 - 3s - 8ms/step - accuracy: 0.2069 - loss: 1.8638 - val_accuracy: 0.2036 - val_loss: 2.3129


In [19]:
inp = Input(shape=(784,))
x = Dense(300, activation='relu', name='Hidden_Layer_1')(inp)
x = Dense(100, activation='relu', name='Hidden_Layer_2')(x)
x = Dense(100, activation='relu', name='Hidden_Layer_3')(x)
x = Dense(100, activation='relu', name='Hidden_Layer_4')(x)
x = Dense(200, activation='relu', name='Hidden_Layer_5')(x)
outp = Dense(10, activation='softmax', name='Output_Layer')(x)

model4 = Model(inp, outp)
model4.summary()

In [20]:
model4.compile(loss='categorical_crossentropy',
             optimizer=optimizers.Adam(learning_rate=0.01),
             metrics=['accuracy'])

history4 = model4.fit(x_train, y_train,
                    batch_size=100, epochs=20,
                    verbose=2, validation_data=(x_cv,y_cv))

Epoch 1/20
336/336 - 6s - 18ms/step - accuracy: 0.8755 - loss: 0.4082 - val_accuracy: 0.9261 - val_loss: 0.2536
Epoch 2/20
336/336 - 3s - 8ms/step - accuracy: 0.9503 - loss: 0.1849 - val_accuracy: 0.9513 - val_loss: 0.1936
Epoch 3/20
336/336 - 3s - 8ms/step - accuracy: 0.9573 - loss: 0.1686 - val_accuracy: 0.9587 - val_loss: 0.1692
Epoch 4/20
336/336 - 3s - 8ms/step - accuracy: 0.9644 - loss: 0.1373 - val_accuracy: 0.9614 - val_loss: 0.1656
Epoch 5/20
336/336 - 3s - 8ms/step - accuracy: 0.9673 - loss: 0.1253 - val_accuracy: 0.9598 - val_loss: 0.1637
Epoch 6/20
336/336 - 3s - 8ms/step - accuracy: 0.9737 - loss: 0.1047 - val_accuracy: 0.9649 - val_loss: 0.1588
Epoch 7/20
336/336 - 3s - 8ms/step - accuracy: 0.9756 - loss: 0.0988 - val_accuracy: 0.9635 - val_loss: 0.1652
Epoch 8/20
336/336 - 3s - 9ms/step - accuracy: 0.9738 - loss: 0.1117 - val_accuracy: 0.9619 - val_loss: 0.1611
Epoch 9/20
336/336 - 3s - 8ms/step - accuracy: 0.9768 - loss: 0.0939 - val_accuracy: 0.9645 - val_loss: 0.1417


# using Drop-out Regularization

In [24]:
inp = Input(shape=(784,))
x = Dense(300, activation='relu', name='Hidden_Layer_1')(inp)
x = Dropout(0.3)(x)
x = Dense(100, activation='relu', name='Hidden_Layer_2')(x)
x = Dropout(0.3)(x)
x = Dense(100, activation='relu', name='Hidden_Layer_3')(x)
x = Dropout(0.3)(x)
x = Dense(200, activation='relu', name='Hidden_Layer_4')(x)
outp = Dense(10, activation='softmax', name='Output_Layer')(x)

model5 = Model(inp, outp)
model5.summary()

In [25]:
model5.compile(loss='categorical_crossentropy',
             optimizer='adam', # optimizers.Adam(learning_rate=0.01)
             metrics=['accuracy'])

In [26]:
history5 = model5.fit(x_train, y_train,
                    batch_size=100, epochs=20,
                    verbose=2, validation_data=(x_cv,y_cv))

Epoch 1/20
336/336 - 7s - 20ms/step - accuracy: 0.8133 - loss: 0.5851 - val_accuracy: 0.9439 - val_loss: 0.1863
Epoch 2/20
336/336 - 3s - 9ms/step - accuracy: 0.9339 - loss: 0.2281 - val_accuracy: 0.9610 - val_loss: 0.1350
Epoch 3/20
336/336 - 3s - 8ms/step - accuracy: 0.9508 - loss: 0.1667 - val_accuracy: 0.9669 - val_loss: 0.1115
Epoch 4/20
336/336 - 3s - 8ms/step - accuracy: 0.9575 - loss: 0.1433 - val_accuracy: 0.9720 - val_loss: 0.1024
Epoch 5/20
336/336 - 3s - 8ms/step - accuracy: 0.9635 - loss: 0.1254 - val_accuracy: 0.9724 - val_loss: 0.0976
Epoch 6/20
336/336 - 3s - 9ms/step - accuracy: 0.9675 - loss: 0.1091 - val_accuracy: 0.9740 - val_loss: 0.0952
Epoch 7/20
336/336 - 3s - 8ms/step - accuracy: 0.9717 - loss: 0.0967 - val_accuracy: 0.9750 - val_loss: 0.0918
Epoch 8/20
336/336 - 3s - 9ms/step - accuracy: 0.9747 - loss: 0.0894 - val_accuracy: 0.9758 - val_loss: 0.0882
Epoch 9/20
336/336 - 3s - 9ms/step - accuracy: 0.9754 - loss: 0.0831 - val_accuracy: 0.9742 - val_loss: 0.0915


In [31]:
y_pred = model5.predict(x_test, batch_size=200)

test_pred = pd.DataFrame(y_pred)
test_pred = pd.DataFrame(test_pred.idxmax(axis=1))
test_pred.index.name = "ImageId"

test_pred = test_pred.rename(columns={0:'Label'}).reset_index()
test_pred['ImageId'] = test_pred['ImageId'] + 1

test_pred.head()

[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [32]:
test_pred.to_csv('mnist_submission.csv', index=False)