## Imports

In [5]:
from numpy.random import seed
seed(888)
#import tensorflow as tf
import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import set_random_seed
set_random_seed(404)

In [6]:
import os
import numpy as np

## Constants

In [27]:
X_TRAIN_PATH = './MNIST/digit_xtrain.csv'
X_TEST_PATH = './MNIST/digit_xtest.csv'
Y_TRAIN_PATH = './MNIST/digit_ytrain.csv'
Y_TEST_PATH = './MNIST/digit_ytest.csv'

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUT = IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS

NUMBER_OF_CLASSES = 10
VALIDATION_SIZE = 10000

## Getting the data


In [8]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype = int)

CPU times: total: 141 ms
Wall time: 143 ms


In [9]:
y_train_all.shape

(60000,)

In [10]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype = int)

In [11]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype = int)

CPU times: total: 16.8 s
Wall time: 16.8 s


In [12]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype = int)

CPU times: total: 2.81 s
Wall time: 2.84 s


## Data Exploration

In [13]:
x_train_all.shape

(60000, 784)

In [14]:
#0 means that the pixel is completely white whereas the value 255 indicates that the pixel is extremely dark
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [15]:
y_train_all.shape

(60000,)

In [16]:
x_test.shape

(10000, 784)

## Data Preprocessing

In [17]:
#Re-scaling
x_train_all, x_test = x_train_all/255.0, x_test/255.0

### Convert target values to one-hot encoding

In [18]:
y_train_all = np.eye(NUMBER_OF_CLASSES)[y_train_all]

In [19]:
y_train_all.shape

(60000, 10)

In [20]:
y_test = np.eye(NUMBER_OF_CLASSES)[y_test]

In [21]:
y_test.shape

(10000, 10)

## Create validation dataset from training dataset

In [22]:
x_validation = x_train_all[:VALIDATION_SIZE]
y_validation = y_train_all[:VALIDATION_SIZE]

In [23]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [24]:
x_train.shape

(50000, 784)

In [25]:
x_validation.shape

(10000, 784)

## Setting up TensorFlow graph

In [29]:
X = tf.placeholder(tf.float32, shape = [None, TOTAL_INPUT])
y = tf.placeholder(tf.float32, shape = [None, NUMBER_OF_CLASSES])

### Setting up Neural Network

#### Hyperparameters

In [32]:
number_of_epochs = 5
learning_rate = 1e-4

hidden_layer_1 = 512
hidden_layer_2 = 64

In [34]:
initial_weight_1 = tf.truncated_normal(shape = [TOTAL_INPUT, hidden_layer_1], stddev=0.1, seed=42)
weight_1 = tf.Variable(initial_value = initial_weight_1)

In [35]:
initial_bias_1 = tf.constant(value = 0.0, shape = [hidden_layer_1])
bias_1 = tf.Variable(initial_value = initial_bias_1)

In [37]:
layer_1_input = tf.matmul(X, weight_1) + bias_1

In [38]:
layer_1_output = tf.nn.relu(layer_1_input)

In [39]:
# Creating the second layer
initial_weight_2 = tf.truncated_normal(shape = [hidden_layer_1, hidden_layer_2], stddev=0.1, seed=42)
weight_2 = tf.Variable(initial_value = initial_weight_2)

initial_bias_2 = tf.constant(value = 0.0, shape = [hidden_layer_2])
bias_2 = tf.Variable(initial_value = initial_bias_2)

layer_2_input = tf.matmul(layer_1_output, weight_2) + bias_2
layer_2_output = tf.nn.relu( layer_2_input)

In [40]:
# Creating the OUTPUT layer
initial_weight_3 = tf.truncated_normal(shape =[hidden_layer_2,NUMBER_OF_CLASSES], stddev=0.1, seed=42)
weight_3 = tf.Variable(initial_value = initial_weight_3)

initial_bias_3 = tf.constant(value = 0.0, shape = [NUMBER_OF_CLASSES])
bias_3 = tf.Variable(initial_value = initial_bias_3)

layer_3_input = tf.matmul(layer_2_output, weight_3) + bias_3
output = tf.nn.softmax( layer_3_input)

## Loss, Optimisation and Metrics