In [1]:
#import required library
import numpy as np 
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Conv2D, Flatten, BatchNormalization, MaxPooling2D
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
#read both train and test csv file
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

#display head
display(train.head())
display(test.head())

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
#from train -> X_train, y_train and get numpy array.
X_train = train.iloc[:,1:].values
y_train = train['label'].values

#get numpy array for test data.
X_test = test.values

#shape of the data
X_train.shape, y_train.shape, X_test.shape

((42000, 784), (42000,), (28000, 784))

In [5]:
#values on input range from 0 to 255, dtype int
print(f'min: {np.min(X_train)}')
print(f'max: {np.max(X_train)}')
print(f'data type: {X_train.dtype}')

#check proportion of labels
pd.Series(y_train).value_counts(normalize=True).sort_index()

min: 0
max: 255
data type: int64


0    0.098381
1    0.111524
2    0.099452
3    0.103595
4    0.096952
5    0.090357
6    0.098500
7    0.104786
8    0.096738
9    0.099714
dtype: float64

In [6]:
#normalize train and test images
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_test = (X_test.astype(np.float32) - 127.5)/127.5

#encode labels
y_train=to_categorical(y_train)

In [7]:
#Reshape train and test images from 784 to 28 x 28 x 1
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

#shape of data
X_train.shape, X_test.shape

((42000, 28, 28, 1), (28000, 28, 28, 1))

In [8]:
#create training set and validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, random_state = 42)

In [9]:
#define a model
model = Sequential()

#Convolution layer 1
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization())

#Convolution layer 2
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

#Convolution layer 3
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

#fully connected layer
model.add(Flatten())

#dense layer 1
model.add(Dense(units=200, activation='relu'))
model.add(BatchNormalization())

#dense layer 2
model.add(Dense(units=100, activation='relu'))
model.add(BatchNormalization())

#output layer
model.add(Dense(units=10, activation='softmax'))

In [10]:
#compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 batch_normalization (BatchN  (None, 26, 26, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 batch_normalization_1 (Batc  (None, 24, 24, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 32)       0         
 )                                                               
                                                        

In [12]:
#fit the model
model.fit(x=X_train, y=y_train, epochs=10, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9412f282e0>

In [13]:
#evaluate model for validation set
model.evaluate(X_val, y_val)



[0.035284463316202164, 0.9900952577590942]

In [14]:
#prediction on test data
pred = np.argmax(model.predict(X_test), axis=1)

#check first 10 predictions
pred[:10]

array([2, 0, 9, 0, 3, 7, 0, 3, 0, 3])

In [15]:
#read sample_submission.csv in the dataframe
#replace Label with predictions
sub = pd.read_csv('sample_submission.csv')
sub["Label"] = pred
sub.to_csv('submission.csv', index=False)