# MNIST take two

An attempt to improve the previous models accuracy by using:

- The Adam optimiser https://arxiv.org/abs/1412.6980  
- Early stopping (reuglarisation)  
- He initialisation https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf  
- Extra hidden layers

In [1]:
%matplotlib inline

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler

In [22]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [24]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [13]:
he_init = tf.contrib.layers.variance_scaling_initializer() #he initialisation for layers

with tf.name_scope("neural_net"): #Architecture
    hidden1 = tf.layers.dense(X, hidden1_n, activation=tf.nn.elu, kernel_initializer=he_init, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, hidden2_n, name="hidden2", kernel_initializer=he_init, activation=tf.nn.elu)
    hidden3 = tf.layers.dense(hidden2, hidden3_n, name="hidden3", kernel_initializer=he_init, activation=tf.nn.elu)
    hidden4 = tf.layers.dense(hidden3, hidden4_n, name="hidden4", kernel_initializer=he_init, activation=tf.nn.elu)
    hidden5 = tf.layers.dense(hidden4, hidden5_n, name="hidden5", kernel_initializer=he_init, activation=tf.nn.elu)
    logits = tf.layers.dense(hidden5, outputs_n, name="outputs", kernel_initializer=he_init)

In [14]:
with tf.name_scope("loss"): # Cost function
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(cross_entropy, name = "loss")

In [15]:
with tf.name_scope("train"): #Optimiser
    optimiser = tf.train.AdamOptimizer(learning_rate=0.001)
    training_op = optimiser.minimize(loss)

In [16]:
with tf.name_scope("eval"): # Evaluation metric (accuracy)
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [18]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [10]:
n_epochs = 500
batch_size = 40

In [25]:
max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./savepoints/MNIST_take_two.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./savepoints/MNIST_take_two.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.049299	Best loss: 0.049299	Accuracy: 98.59%
1	Validation loss: 0.044320	Best loss: 0.044320	Accuracy: 98.55%
2	Validation loss: 0.047358	Best loss: 0.044320	Accuracy: 98.59%
3	Validation loss: 0.034234	Best loss: 0.034234	Accuracy: 99.26%
4	Validation loss: 0.034486	Best loss: 0.034234	Accuracy: 98.98%
5	Validation loss: 0.045551	Best loss: 0.034234	Accuracy: 98.67%
6	Validation loss: 0.050927	Best loss: 0.034234	Accuracy: 98.59%
7	Validation loss: 0.055186	Best loss: 0.034234	Accuracy: 98.55%
8	Validation loss: 0.042415	Best loss: 0.034234	Accuracy: 98.98%
9	Validation loss: 0.051686	Best loss: 0.034234	Accuracy: 98.48%
10	Validation loss: 0.035941	Best loss: 0.034234	Accuracy: 99.26%
11	Validation loss: 0.051602	Best loss: 0.034234	Accuracy: 98.83%
12	Validation loss: 0.038714	Best loss: 0.034234	Accuracy: 99.02%
13	Validation loss: 0.028449	Best loss: 0.028449	Accuracy: 99.61%
14	Validation loss: 0.052004	Best loss: 0.028449	Accuracy: 98.87%
15	Validation loss: 