## Imports

In [5]:
from numpy.random import seed
seed(888)
#import tensorflow as tf
import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import set_random_seed
set_random_seed(404)

In [6]:
import os
import numpy as np

## Constants

In [27]:
X_TRAIN_PATH = './MNIST/digit_xtrain.csv'
X_TEST_PATH = './MNIST/digit_xtest.csv'
Y_TRAIN_PATH = './MNIST/digit_ytrain.csv'
Y_TEST_PATH = './MNIST/digit_ytest.csv'

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUT = IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS

NUMBER_OF_CLASSES = 10
VALIDATION_SIZE = 10000

## Getting the data


In [8]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype = int)

CPU times: total: 141 ms
Wall time: 143 ms


In [9]:
y_train_all.shape

(60000,)

In [10]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype = int)

In [11]:
%%time

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype = int)

CPU times: total: 16.8 s
Wall time: 16.8 s


In [12]:
%%time

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype = int)

CPU times: total: 2.81 s
Wall time: 2.84 s


## Data Exploration

In [13]:
x_train_all.shape

(60000, 784)

In [14]:
#0 means that the pixel is completely white whereas the value 255 indicates that the pixel is extremely dark
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [15]:
y_train_all.shape

(60000,)

In [16]:
x_test.shape

(10000, 784)

## Data Preprocessing

In [17]:
#Re-scaling
x_train_all, x_test = x_train_all/255.0, x_test/255.0

### Convert target values to one-hot encoding

In [18]:
y_train_all = np.eye(NUMBER_OF_CLASSES)[y_train_all]

In [19]:
y_train_all.shape

(60000, 10)

In [20]:
y_test = np.eye(NUMBER_OF_CLASSES)[y_test]

In [21]:
y_test.shape

(10000, 10)

## Create validation dataset from training dataset

In [22]:
x_validation = x_train_all[:VALIDATION_SIZE]
y_validation = y_train_all[:VALIDATION_SIZE]

In [23]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [24]:
x_train.shape

(50000, 784)

In [25]:
x_validation.shape

(10000, 784)

## Setting up TensorFlow graph

In [53]:
X = tf.placeholder(tf.float32, shape = [None, TOTAL_INPUT])
Y = tf.placeholder(tf.float32, shape = [None, NUMBER_OF_CLASSES])

### Setting up Neural Network

#### Hyperparameters

In [54]:
number_of_epochs = 5
learning_rate = 1e-4

hidden_layer_1 = 512
hidden_layer_2 = 64

In [55]:
initial_weight_1 = tf.truncated_normal(shape = [TOTAL_INPUT, hidden_layer_1], stddev=0.1, seed=42)
weight_1 = tf.Variable(initial_value = initial_weight_1)

In [56]:
initial_bias_1 = tf.constant(value = 0.0, shape = [hidden_layer_1])
bias_1 = tf.Variable(initial_value = initial_bias_1)

In [57]:
layer_1_input = tf.matmul(X, weight_1) + bias_1

In [58]:
layer_1_output = tf.nn.relu(layer_1_input)

In [59]:
# Creating the second layer
initial_weight_2 = tf.truncated_normal(shape = [hidden_layer_1, hidden_layer_2], stddev=0.1, seed=42)
weight_2 = tf.Variable(initial_value = initial_weight_2)

initial_bias_2 = tf.constant(value = 0.0, shape = [hidden_layer_2])
bias_2 = tf.Variable(initial_value = initial_bias_2)

layer_2_input = tf.matmul(layer_1_output, weight_2) + bias_2
layer_2_output = tf.nn.relu( layer_2_input)

In [60]:
# Creating the OUTPUT layer
initial_weight_3 = tf.truncated_normal(shape =[hidden_layer_2,NUMBER_OF_CLASSES], stddev=0.1, seed=42)
weight_3 = tf.Variable(initial_value = initial_weight_3)

initial_bias_3 = tf.constant(value = 0.0, shape = [NUMBER_OF_CLASSES])
bias_3 = tf.Variable(initial_value = initial_bias_3)

layer_3_input = tf.matmul(layer_2_output, weight_3) + bias_3
output = tf.nn.softmax( layer_3_input)

## Loss, Optimisation and Metrics
### Defining Loss Function


In [61]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels =Y , logits=output))

### Defining Optimizer

In [62]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(loss)

### Accuracy metric

In [65]:
correct_prediction = tf.equal(tf.argmax(output, axis=1) , tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## Run Session

In [66]:
session = tf.Session()

In [67]:
#Initializing the variables 
init = tf.global_variables_initializer()
session.run(init)

In [69]:
bias_3.eval(session)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

## Making Batches

In [71]:
size_of_batch = 1000
num_examples = y_train.shape[0]
number_iterations = int(num_examples/ size_of_batch)

index_in_epoch = 0

In [87]:
def next_batch(batch_size, dataset, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return dataset[start: end], labels[start: end]

## Training loop

In [None]:
for epoch in range(number_of_epochs):
     for i in range(number_iterations):
            batch_x ,batch_y = next_batch(batch_size=size_of_batch, dataset = x_train, labels = y_train)
            
            feed_dictionary = {X: batch_x, Y: batch_y}
            
            session.run(train_step, feed_dict = feed_dictionary)
            
            batch_accuracy = session.run(fetches= [accuracy], feed_dict= feed_dictionary)
            
     print(f'Epoch {epoch} \t | Training Accuracy = {batch_accuracy}')
    
print("Training complete")