MNIST via TF's low-level Python API, 3 hidden layers (Deep Neural Net)

In [1]:
import tensorflow as tf

In [2]:
tf.reset_default_graph() #Resets the graph that was already there

In [3]:
n_inputs = 28*28 # = 784
n_hidden1 = 320
n_hidden2 = 120
n_hidden3 = 50
n_outputs = 10

In [4]:
#Placeholders for images
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X") 
y = tf.placeholder(tf.int64, shape=(None), name="y")

In [5]:
#Function representing a layer
#Tensorflow achieves this with one line, so this is commented out
'''
def layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        lout = tf.matmul(X, W) + b
        if activation is not None:
            return activation(lout)
        else:
            return lout
'''

'\ndef layer(X, n_neurons, name, activation=None):\n    with tf.name_scope(name):\n        n_inputs = int(X.get_shape()[1])\n        stddev = 2 / np.sqrt(n_inputs)\n        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n        W = tf.Variable(init, name="kernel")\n        b = tf.Variable(tf.zeros([n_neurons]), name="bias")\n        lout = tf.matmul(X, W) + b\n        if activation is not None:\n            return activation(lout)\n        else:\n            return lout\n'

In [6]:
'''
hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
hidden3 = neuron_layer(hidden2, n_hidden3, name="hidden3", activation=tf.nn.relu)   
logits = neuron_layer(hidden3, n_outputs, name="outputs")
'''

'\nhidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)\nhidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)\nhidden3 = neuron_layer(hidden2, n_hidden3, name="hidden3", activation=tf.nn.relu)   \nlogits = neuron_layer(hidden3, n_outputs, name="outputs")\n'

In [7]:
#Creating layers of a neural network
with tf.name_scope("layers"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu) #"dense" means "fully connected"
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    hidden3 = tf.layers.dense(hidden2, n_hidden3, name="hidden3", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden3, n_outputs, name="outputs")

In [8]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss") #Reduce mean of cross entropy for the batch

In [9]:
learning_rate = 0.1 #Large enough to jump from one local minimum to another

with tf.name_scope("minimize"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)

In [10]:
with tf.name_scope("accuracy"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [11]:
init = tf.global_variables_initializer()
saver = tf.train.Saver() #Saves checkpoints since this takes a while to train, saves the whole graph of all the values

In [12]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting data\train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting data\train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting data\t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting data\t10k-labels-idx1-ubyte.gz


In [13]:
n_epochs = 20 #Number of passes through the data
batch_size = 50
#A total of 240,000 passes through a batch

In [14]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for step in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        print("Epoch {:>2d}".format(epoch), "\tTrain accuracy {:.4f}".format(acc_train), 
                                                  "\tTest accuracy {:.4f}".format(acc_test))
    final_accuracy = acc_test
    save_model = saver.save(sess, "models/my_model_first.ckpt") #Saves the final model in a directory called "models"

Epoch  0 	Train accuracy 1.0000 	Test accuracy 0.9555
Epoch  1 	Train accuracy 1.0000 	Test accuracy 0.9701
Epoch  2 	Train accuracy 1.0000 	Test accuracy 0.9726
Epoch  3 	Train accuracy 1.0000 	Test accuracy 0.9694
Epoch  4 	Train accuracy 1.0000 	Test accuracy 0.9760
Epoch  5 	Train accuracy 1.0000 	Test accuracy 0.9798
Epoch  6 	Train accuracy 1.0000 	Test accuracy 0.9797
Epoch  7 	Train accuracy 1.0000 	Test accuracy 0.9800
Epoch  8 	Train accuracy 1.0000 	Test accuracy 0.9804
Epoch  9 	Train accuracy 1.0000 	Test accuracy 0.9808
Epoch 10 	Train accuracy 1.0000 	Test accuracy 0.9817
Epoch 11 	Train accuracy 1.0000 	Test accuracy 0.9824
Epoch 12 	Train accuracy 1.0000 	Test accuracy 0.9824
Epoch 13 	Train accuracy 1.0000 	Test accuracy 0.9833
Epoch 14 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 15 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 16 	Train accuracy 1.0000 	Test accuracy 0.9829
Epoch 17 	Train accuracy 1.0000 	Test accuracy 0.9827
Epoch 18 	Train accuracy 1.0

After a while, with a large learning rate, we can't do better than this

In [15]:
learning_rate = 0.0001 #New lower learning rate

In [16]:
with tf.Session() as sess:
    saver.restore(sess, "models/my_model_first.ckpt")
    for epoch in range(n_epochs):
        for step in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        print("Epoch {:>2d}".format(epoch), "\tTrain accuracy {:.4f}".format(acc_train), 
                                                  "\tTest accuracy {:.4f}".format(acc_test))
        if acc_test > final_accuracy:
            final_accuracy = acc_test
            save_model = saver.save(sess, "models/my_model_final.ckpt") #Save the model with the highest accuracy
    print("\nFinal Accuracy {}".format(final_accuracy))

INFO:tensorflow:Restoring parameters from models/my_model_first.ckpt
Epoch  0 	Train accuracy 1.0000 	Test accuracy 0.9832
Epoch  1 	Train accuracy 1.0000 	Test accuracy 0.9828
Epoch  2 	Train accuracy 1.0000 	Test accuracy 0.9826
Epoch  3 	Train accuracy 1.0000 	Test accuracy 0.9827
Epoch  4 	Train accuracy 1.0000 	Test accuracy 0.9827
Epoch  5 	Train accuracy 1.0000 	Test accuracy 0.9828
Epoch  6 	Train accuracy 1.0000 	Test accuracy 0.9829
Epoch  7 	Train accuracy 1.0000 	Test accuracy 0.9829
Epoch  8 	Train accuracy 1.0000 	Test accuracy 0.9831
Epoch  9 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 10 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 11 	Train accuracy 1.0000 	Test accuracy 0.9829
Epoch 12 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 13 	Train accuracy 1.0000 	Test accuracy 0.9830
Epoch 14 	Train accuracy 1.0000 	Test accuracy 0.9829
Epoch 15 	Train accuracy 1.0000 	Test accuracy 0.9831
Epoch 16 	Train accuracy 1.0000 	Test accuracy 0.9828
Epoch 17 	Tra