### Digit Recognizer (MNIST) using LeNet-5

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
import os
MNIST_PATH = os.path.join("datasets", "mnist")

In [3]:
import pandas as pd

def load_mnist_data(filename, mnist_path = MNIST_PATH):
    csv_path = os.path.join(mnist_path, filename)
    return pd.read_csv(csv_path)
    

In [4]:
train_data = load_mnist_data('train.csv')
test_data = load_mnist_data('test.csv')

In [5]:
X_test = test_data[:]
X_test.shape

(28000, 784)

In [6]:
X_train_full = train_data[:]
X_train_full.shape

(42000, 785)

In [7]:
y_train_full = train_data["label"]

In [8]:
y_train_full.shape

(42000,)

In [9]:
X_train_full = train_data.drop("label", axis = 1)
X_train_full.shape

(42000, 784)

In [10]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

In [12]:
X_valid.shape

(10500, 784)

In [13]:
X_test.shape

(28000, 784)

In [14]:
y_train.shape

(31500,)

In [15]:
y_valid.shape

(10500,)

In [30]:
X_valid = X_valid.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)
#similarly for X_train

In [31]:
X_train.shape

(31500, 28, 28, 1)

In [32]:
X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_valid = np.pad(X_valid, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test  = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')

In [33]:
X_train.shape

(31500, 32, 32, 1)

In [34]:
X_valid.shape

(10500, 32, 32, 1)

In [35]:
X_test.shape

(28000, 32, 32, 1)

In [37]:
X_mean = X_train.mean(axis=0, keepdims=True)
X_std = X_train.std(axis=0, keepdims=True) + 1e-7
X_train = (X_train - X_mean) / X_std
X_valid = (X_valid - X_mean) / X_std
X_test = (X_test - X_mean) / X_std

In [38]:
y_train = keras.utils.to_categorical(y_train)
y_valid = keras.utils.to_categorical(y_valid)

### tanh or relu does not make much difference

In [None]:
model = keras.models.Sequential([
    #Conv Layer1
    keras.layers.Conv2D(filters = 6, kernel_size = 5, strides = 1, activation = "tanh", input_shape = [32, 32, 1]),
    #Average pooling
    keras.layers.AveragePooling2D(pool_size = 2, strides = 2),
    #Conv Layer2
    keras.layers.Conv2D(filters = 16, kernel_size = 5, stride = 1, activation = "tanh"),
    keras.layers.AveragePooling2D(pool_size = 2, strides = 2),
    #Conv layer 3
    keras.layers.Conv2D(filters = 120, kernel_size = 5, strides = 1, activation = "tanh"),
    #Flatten the input into 1D array
    keras.layers.Flatten(),
    keras.layers.Dense(uints = 84, activation = "tanh"),
    #Output layer
    keras.layers.Dense(units = 10, activation = "softmax")
])

### compile the model, sgd / adam / rmsprop

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])

### Fit the model

In [None]:
history = model.fit(X_train, y_train, epochs = 30, validation_data = [X_valid, y_valid])

### Predict using test set

In [None]:
y_pred = model.predict(X_test)

In [None]:
import numpy as np
y_pred = pd.DataFrame(y_pred.argmax(axis = 1))
y_pred.index.name = 'ImageId'
y_pred = y_pred.rename(columns = {0: 'Label'}).reset_index()
y_pred['ImageId'] = y_pred['ImageId'] + 1

y_pred.head()

In [None]:
y_pred.to_csv('mnist_submission.csv', index = False)

### achieved an accuracy of 98%