# Deep Neural Network for MNIST Classification

The dataset provides 28x28 images of handwritten digits (1 per image). With 784 images in total, the goal is to write an algorithm that detects which digit is written. Since there are only 10 digits, this is a classification problem with 10 classes.

This code uses Python 3 environment and Tensorflow version 1.14

## 1. Import the relevant packages

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## 2. Outline the model

In [25]:
input_size = 784
output_size = 10
hidden_layer_size = 100

# Reset any variables left in memory from previous runs.
tf.reset_default_graph()

# Declare placeholders where the data will be fed into.
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.float32, [None, output_size])

In [26]:
# FIRST HIDDEN LAYER

# Weights and biases for the first linear combination between the inputs and the first hidden layer.
# tf.get_variable("name",shape): declare variables, default is Xavier (Glorot)
weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])

# Operation between the inputs and the first hidden layer.
# apply relu activations to the linear combinations between inputs and weights, plus the bias.
# tf.nn: a module that contains neural network (nn) support. Contains the most commonly used actiovation functions.
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

In [27]:
# SECOND HIDDEN LAYER

# Weights and biases for the second linear combination.
# This is between the first and second hidden layers.
weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])

# Operation between the first and the second hidden layers.
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

In [28]:
# OUTPUT LAYER

# Weights and biases for the final linear combination.
# That's between the second hidden layer and the output layer.
weights_3 = tf.get_variable("weights_3", [hidden_layer_size, output_size])
biases_3 = tf.get_variable("biases_3", [output_size])

# Operation between the second hidden layer and the final output.
outputs = tf.matmul(outputs_2, weights_3) + biases_3

In [29]:
# LOSS FUNCTION

loss = tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=targets)

# Get the average loss
mean_loss = tf.reduce_mean(loss)

In [30]:
# OPTIMIZATION

# use Adam optimizer
# 0.001 is a suboptimal value of the learning rate
optimize = tf.train.AdamOptimizer(learning_rate=0.002).minimize(mean_loss)

In [31]:
# OUTPUT

# tf.argmax(targets,axis): return the index of the column where logits is highest
# tf.equal(): boolean (check is two values are equal)
out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))

# Get the average accuracy of the outputs.
# since out_equals_target is boolean, change it to float for better calculation
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

In [32]:
# PREPARE SESSION

# Declare the session variable.
sess = tf.InteractiveSession()

# Initialize the variables. Default initializer is Xavier.
initializer = tf.global_variables_initializer()
sess.run(initializer)

In [33]:
# BATCHING

# batch size = 1: SGD - stochastic gradient descent
# batch size = number of samples: GD - gradient descent
batch_size = 100

# Calculate the number of batches per epoch for the training set.
batches_number = mnist.train._num_examples // batch_size

# number of epochs.
max_epochs = 15

# Set the validation loss at high number to not trigger early stopping at the first epoch
prev_validation_loss = 9999999.

In [34]:
# Create a loop for the epochs. This loop will:
# 1. Loads 100 inputs and 100 targets (batch_size = 100)
# 2. Optimizes the algorithm and calculates the batch loss
# 3. Records the loss for the iteration
# 4. Starts over with the next 100 batches
# 5. Stops when the training set is exhausted

for epoch_counter in range(max_epochs):
    
    # Keep track of the sum of batch losses in the epoch.
    curr_epoch_loss = 0.
    
    # Iterate over the batches in this epoch.
    for batch_counter in range(batches_number):
        
        # Input batch and target batch are assigned values from the train dataset, given a batch size
        # mnist.train.next_batch(batch_size): loads the batches one after another
        input_batch, target_batch = mnist.train.next_batch(batch_size)
        
        # Run the optimization step and get the mean loss for this batch.
        # Feed it with the inputs and the targets we just got from the train dataset
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={inputs: input_batch, targets: target_batch})
        
        # Increment the sum of batch losses.
        curr_epoch_loss += batch_loss
    
    # curr_epoch_loss contained the sum of all batches inside the epoch
    # Average batch losses over the whole epoch:
    curr_epoch_loss /= batches_number
    
    # At the end of each epoch, get the validation loss and accuracy
    # Get the input batch and the target batch from the validation dataset
    input_batch, target_batch = mnist.validation.next_batch(mnist.validation._num_examples)
    
    # Run without the optimization step (simply forward propagate)
    validation_loss, validation_accuracy = sess.run([mean_loss, accuracy], 
        feed_dict={inputs: input_batch, targets: target_batch})
    
    # Print statistics for the current epoch
    # Epoch counter + 1, because epoch_counter automatically starts from 0, instead of 1
    print('Epoch '+str(epoch_counter+1)+
          '. Mean loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy * 100.)+'%')
    
    # Trigger early stopping if validation loss begins increasing.
    if validation_loss > prev_validation_loss:
        break
        
    # Store this epoch's validation loss to be used as previous validation loss in the next iteration.
    prev_validation_loss = validation_loss

print('End of training.')

Epoch 1. Mean loss: 0.275. Validation loss: 0.121. Validation accuracy: 96.40%
Epoch 2. Mean loss: 0.112. Validation loss: 0.099. Validation accuracy: 97.16%
Epoch 3. Mean loss: 0.077. Validation loss: 0.091. Validation accuracy: 97.36%
Epoch 4. Mean loss: 0.062. Validation loss: 0.089. Validation accuracy: 97.24%
Epoch 5. Mean loss: 0.049. Validation loss: 0.087. Validation accuracy: 97.52%
Epoch 6. Mean loss: 0.042. Validation loss: 0.086. Validation accuracy: 97.48%
Epoch 7. Mean loss: 0.034. Validation loss: 0.088. Validation accuracy: 97.54%
End of training.


## 3. Test the model

In [35]:
input_batch, target_batch = mnist.test.next_batch(mnist.test._num_examples)
test_accuracy = sess.run([accuracy], 
    feed_dict={inputs: input_batch, targets: target_batch})

test_accuracy_percent = test_accuracy[0] * 100.

# Print the test accuracy formatted in percentages
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 97.61%
