# Digit Recognizer
## Kaggle competition
### Learn computer vision fundamentals with the famous MNIST data
This version uses the tensorflow toolbox, inorder to build a deep CNN
(convolutional neural network)

### Import Data
Dataset is downloaded from https://www.kaggle.com/c/digit-recognizer/data?select=test.csv

In [93]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import tensorflow as tf

train = pd.read_csv('train.csv')
train_data = np.array(train)
train_tar = train_data[:,0]
train_data = train_data[:,1:]
train_data = 0.99*train_data/255 + 0.1

imagesize = 28
imagenumb = len(train_tar)

train_data = train_data.reshape(-1,imagesize,imagesize,1)

train_target = train_tar

### Implement Convolutional Neural Network with Tensorflow


In [94]:
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

In [95]:
CNN = models.Sequential()
CNN.add(layers.Conv2D(filters = 32 ,kernel_size=(6,6), padding='Same', activation='relu', input_shape=(28,28,1)))
CNN.add(layers.BatchNormalization())
CNN.add(layers.Conv2D(filters=64, kernel_size=(6,6), padding='Same', activation='relu'))
CNN.add(layers.BatchNormalization())
CNN.add(layers.MaxPool2D(pool_size=(2, 2)))
CNN.add(layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same', activation='relu'))
CNN.add(layers.BatchNormalization())
CNN.add(layers.MaxPool2D(pool_size=(2,2)))
CNN.add(layers.Flatten())
CNN.add(layers.Dense(64, activation='relu'))
CNN.add(layers.Dense(32,activation='relu'))
CNN.add(layers.Dense(10, activation='softmax'))

CNN.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_52 (Conv2D)           (None, 28, 28, 32)        1184      
_________________________________________________________________
batch_normalization_47 (Batc (None, 28, 28, 32)        128       
_________________________________________________________________
conv2d_53 (Conv2D)           (None, 28, 28, 64)        73792     
_________________________________________________________________
batch_normalization_48 (Batc (None, 28, 28, 64)        256       
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_54 (Conv2D)           (None, 14, 14, 64)        36928     
_________________________________________________________________
batch_normalization_49 (Batc (None, 14, 14, 64)      

### Compile and Train the CNN

In [96]:
print(train_data.shape, train_target.shape)
print(train_target)

CNN.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
progress = CNN.fit(train_data, train_target, epochs=2)

(42000, 28, 28, 1) (42000,)
[1 0 1 ... 7 6 9]
Epoch 1/2
Epoch 2/2


### Import competition test data and Prediction

In [None]:
test = pd.read_csv('test.csv')
test_data = np.array(test)
test_data = 0.99*test_data/255 + 0.1

test_data = test_data.reshape(-1,imagesize,imagesize,1)

prediction = CNN.predict(test_data)

In [113]:
pred = np.empty(prediction.shape[0])
for i in range(prediction.shape[0]):
    pred[i] = np.argmax(prediction[i,:])

In [114]:
ID = np.arange(1,test_data.shape[0]+1)
ID = np.array(ID, ndmin=2, dtype=int).T
pred = np.array(pred, ndmin=2, dtype=int).T

In [115]:
sub = np.hstack((ID,pred))
sub = pd.DataFrame(sub, columns=['ImageId', 'Label'])
sub.to_csv('sub.csv', index=False)