### Deep Neural Network for MNIST Classification

In [3]:
import numpy as np
import tensorflow as tf
import time
from tensorflow.examples.tutorials.mnist import input_data
# TensorFLow includes a data provider for MNIST that we'll use.

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Outline the model

In [25]:
input_size = 784
output_size = 10
# Use same hidden layer size for both hidden layers. Not a necessity.
hidden_layer_size = 100

# Resets any variables left in memory from previous runs.
tf.reset_default_graph()

# Declare placeholders where the variable will be put in
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.float32, [None, output_size])

# Weights and biases for the first linear combination between inputs and first hiddne layer.
# get_variable makes use of the default TensorFlow initializer which is Xavier initialization.
weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])
# Operation between inputs and first hidden layer. ReLu was used as activation function.
outputs_1 = tf.nn.relu(tf.matmul(inputs,weights_1) + biases_1)

# Weights and biases for the first linear combination between first hidden layer and second hidden layer.
weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])
# Operation between first hidden layer and second hidden layer. ReLu was used again here.
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

# Weights and biases for the final combination.
weights_3 = tf.get_variable("weights_3", [hidden_layer_size, output_size])
biases_3 = tf.get_variable("biases_3", [output_size])
# Final operation between second hidden layer and outputs. ReLu wasn't used here
# because we use a trick to include it directly into the loss function. Works for softmax and sigmoid.
outputs = tf.matmul(outputs_2,weights_3) + biases_3

# Calculates the loss function for every output or pair.
loss = tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets)

# Gets the average loss.
mean_loss = tf.reduce_mean(loss)

# Optimization stage defined below.
optimize = tf.train.AdamOptimizer(learning_rate=0.001).minimize(mean_loss)

# Get a 0 or 1 for every input in the batch.
out_equals_target = tf.equal(tf.argmax(outputs,1), tf.argmax(targets,1))

# Get the average accuracy of the outputs.
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

# Declare session variable.
session = tf.InteractiveSession()

# Initialize the variables. Default is Xavier.
initializer = tf.global_variables_initializer()

# Run the session.
session.run(initializer)

# Batch size
batch_size = 100
# Calculate the number of batches per epoch
batches_number = mnist.train._num_examples // batch_size

# Basic early stopping.
max_epochs = 15

# Keep track of the validation loss of the previous epoch.
# The variable is kept at very high number to make sure we dont trigger stop
# in the first epoch.
previous_validation_loss = 9999999.

#counting the time taken for the algorithm to train.
start_time = time.time()    

# Create a loop for the epochs. Epoch_counter is a variable which automatically starts from 0.
for epoch_counter in range(max_epochs):
    # Keep track of the sum of batch losses in the epoch.
    curr_epoch_loss = 0.
    
    # Iterate over the batches in this epoch.
    for batch_counter in range(batches_number):
        
        # Input batch and target batch are assigned values from the train dataset, given a batch size
        input_batch, target_batch = mnist.train.next_batch(batch_size)
        
        # Run the optimization step and get the mean loss for this batch.
        # Feed it with the inputs and the targets we just got from the train dataset
        _, batch_loss = session.run([optimize, mean_loss],
                                   feed_dict={inputs: input_batch, targets: target_batch})
        
        # Increment the sum of batch losses
        curr_epoch_loss += batch_loss
    
    # Calculat the average batch loss
    curr_epoch_loss /= batches_number
    
    # At the end of each epoch, get the validation loss and accuracy
    input_batch, target_batch = mnist.validation.next_batch(mnist.validation._num_examples)
    
    # Run without the optimization step (simply forward propagate)
    validation_loss, validation_accuracy = session.run([mean_loss, accuracy],
                                                   feed_dict={inputs: input_batch, targets: target_batch})
    
    # Print statistics for the current epoch
    print('Epoch ' + str(epoch_counter + 1) +
         '. Training loss: ' + '{0:.3f}'.format(curr_epoch_loss) +
         '. Validation loss: ' + '{0:.3f}'.format(validation_loss) +
         '. Validation accuracy: ' + '{0:.2f}'.format(validation_accuracy * 100) + '%')
    
    # Trigger early stopping if validation loss begins increasing.
    if validation_loss > previous_validation_loss:
        break
    
    # Store this epoch's validation loss to be used as previous validation loss in the next iteration.
    previous_validation_loss = validation_loss

# Shows when the training has ended. Not compulsary but a nice QoL implement.
print('End of training')
print("Training time: %s seconds" % (time.time() - start_time))



Epoch 1. Training loss: 0.330. Validation loss: 0.153. Validation accuracy: 95.54%
Epoch 2. Training loss: 0.138. Validation loss: 0.111. Validation accuracy: 96.78%
Epoch 3. Training loss: 0.097. Validation loss: 0.092. Validation accuracy: 97.56%
Epoch 4. Training loss: 0.075. Validation loss: 0.086. Validation accuracy: 97.38%
Epoch 5. Training loss: 0.059. Validation loss: 0.083. Validation accuracy: 97.60%
Epoch 6. Training loss: 0.049. Validation loss: 0.080. Validation accuracy: 97.60%
Epoch 7. Training loss: 0.040. Validation loss: 0.079. Validation accuracy: 97.70%
Epoch 8. Training loss: 0.032. Validation loss: 0.081. Validation accuracy: 97.84%
End of training
Training time: 8.464206457138062 seconds


### Test the data

In [11]:
input_batch, target_batch = mnist.test.next_batch(mnist.test._num_examples)
test_accuracy = session.run([accuracy],
                           feed_dict={inputs: input_batch, targets: target_batch})

test_accuracy_percent = test_accuracy[0] * 100.

print('Test accuracy: ' + '{0:.2f}'.format(test_accuracy_percent) + '%')

Test accuracy: 97.49%
