MNIST via TF's low-level Python API, 3 hidden layers

In [1]:
import tensorflow as tf

In [2]:
tf.reset_default_graph()

In [3]:
n_inputs = 28*28
n_hidden1 = 200
n_hidden2 = 200
n_hidden3 = 100
n_hidden4 = 100
n_outputs = 10

In [4]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

In [5]:
training = tf.placeholder_with_default(False, shape=(), name='training') #Boolean switch which says if we're training or not

dropout_rate = 0.25 #Hyperparameter: We drop 25% of the neurons at each step
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

In [6]:
he_init = tf.contrib.layers.variance_scaling_initializer(mode = "FAN_AVG")
#We don't need this if we have SELU

Here is what the scaled exponential linear units look like

In [7]:
#Already available as a command in tf 1.4
def selu(z,
         scale=1.0507009873554804934193349852946,
         alpha=1.6732632423543772848170429916717):
    return scale * tf.where(z >= 0.0, z, alpha * tf.nn.elu(z)) 

In [8]:
#Dropouts after every layer
with tf.name_scope("layers"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1", activation=selu, kernel_initializer=he_init)
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training) 
    #Dropout is implemented if training is true (training is false when we're classifying)
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, name="hidden2", activation=selu, kernel_initializer=he_init)
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
    hidden3 = tf.layers.dense(hidden2_drop, n_hidden3, name="hidden3", activation=selu,kernel_initializer=he_init)
    hidden3_drop = tf.layers.dropout(hidden3, dropout_rate, training=training)
    hidden4 = tf.layers.dense(hidden3_drop, n_hidden3, name="hidden4", activation=selu,kernel_initializer=he_init)
    hidden4_drop = tf.layers.dropout(hidden4, dropout_rate, training=training)
    logits = tf.layers.dense(hidden4_drop, n_outputs, name="outputs") #We don't drop neurons from the outputs

In [9]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [10]:
learning_rate = tf.placeholder(tf.float32, shape=(), name="lr")
#Learning rate is a placeholder, because the optimizer will select its own

with tf.name_scope("minimize"):
    optimizer = tf.train.AdamOptimizer() #Forget Gradient Descent, we're using this thing
    train_op = optimizer.minimize(loss)

In [11]:
with tf.name_scope("accuracy"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [12]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [13]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data")

Extracting data\train-images-idx3-ubyte.gz
Extracting data\train-labels-idx1-ubyte.gz
Extracting data\t10k-images-idx3-ubyte.gz
Extracting data\t10k-labels-idx1-ubyte.gz


In [14]:
n_epochs = 500
batch_size = 80
final_accuracy = 0

In [15]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for step in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={X: X_batch, y: y_batch, learning_rate: 0.001})
        acc_train = accuracy.eval(feed_dict={training: True, X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
        print("Epoch {:>2d}".format(epoch), "\tTrain accuracy {:.4f}".format(acc_train), 
                                                  "\tTest accuracy {:.4f}".format(acc_test))
        if acc_test > final_accuracy:
            final_accuracy = acc_test
    save_model = saver.save(sess, "models/my_model_final.ckpt")
    print("\nFinal Accuracy {}".format(final_accuracy))

Epoch  0 	Train accuracy 0.6625 	Test accuracy 0.9499
Epoch  1 	Train accuracy 0.6000 	Test accuracy 0.9642
Epoch  2 	Train accuracy 0.7125 	Test accuracy 0.9682
Epoch  3 	Train accuracy 0.7375 	Test accuracy 0.9651
Epoch  4 	Train accuracy 0.8250 	Test accuracy 0.9704
Epoch  5 	Train accuracy 0.8375 	Test accuracy 0.9700
Epoch  6 	Train accuracy 0.8375 	Test accuracy 0.9653
Epoch  7 	Train accuracy 0.7000 	Test accuracy 0.9712
Epoch  8 	Train accuracy 0.7875 	Test accuracy 0.9741
Epoch  9 	Train accuracy 0.8625 	Test accuracy 0.9743
Epoch 10 	Train accuracy 0.7750 	Test accuracy 0.9746
Epoch 11 	Train accuracy 0.7750 	Test accuracy 0.9725
Epoch 12 	Train accuracy 0.7875 	Test accuracy 0.9697
Epoch 13 	Train accuracy 0.8625 	Test accuracy 0.9730
Epoch 14 	Train accuracy 0.6875 	Test accuracy 0.9758
Epoch 15 	Train accuracy 0.7625 	Test accuracy 0.9696
Epoch 16 	Train accuracy 0.7875 	Test accuracy 0.9702
Epoch 17 	Train accuracy 0.7875 	Test accuracy 0.9709
Epoch 18 	Train accuracy 0.8

Epoch 151 	Train accuracy 0.7250 	Test accuracy 0.9791
Epoch 152 	Train accuracy 0.8625 	Test accuracy 0.9808
Epoch 153 	Train accuracy 0.8500 	Test accuracy 0.9783
Epoch 154 	Train accuracy 0.8500 	Test accuracy 0.9818
Epoch 155 	Train accuracy 0.8000 	Test accuracy 0.9805
Epoch 156 	Train accuracy 0.8125 	Test accuracy 0.9756
Epoch 157 	Train accuracy 0.8500 	Test accuracy 0.9792
Epoch 158 	Train accuracy 0.8875 	Test accuracy 0.9798
Epoch 159 	Train accuracy 0.8125 	Test accuracy 0.9800
Epoch 160 	Train accuracy 0.8500 	Test accuracy 0.9808
Epoch 161 	Train accuracy 0.7875 	Test accuracy 0.9833
Epoch 162 	Train accuracy 0.9000 	Test accuracy 0.9792
Epoch 163 	Train accuracy 0.8250 	Test accuracy 0.9787
Epoch 164 	Train accuracy 0.8250 	Test accuracy 0.9809
Epoch 165 	Train accuracy 0.8750 	Test accuracy 0.9810
Epoch 166 	Train accuracy 0.8500 	Test accuracy 0.9797
Epoch 167 	Train accuracy 0.8250 	Test accuracy 0.9799
Epoch 168 	Train accuracy 0.8250 	Test accuracy 0.9782
Epoch 169 

Epoch 300 	Train accuracy 0.8000 	Test accuracy 0.9800
Epoch 301 	Train accuracy 0.7375 	Test accuracy 0.9797
Epoch 302 	Train accuracy 0.7625 	Test accuracy 0.9805
Epoch 303 	Train accuracy 0.8125 	Test accuracy 0.9810
Epoch 304 	Train accuracy 0.6375 	Test accuracy 0.9782
Epoch 305 	Train accuracy 0.7750 	Test accuracy 0.9820
Epoch 306 	Train accuracy 0.8875 	Test accuracy 0.9824
Epoch 307 	Train accuracy 0.7875 	Test accuracy 0.9789
Epoch 308 	Train accuracy 0.8625 	Test accuracy 0.9807
Epoch 309 	Train accuracy 0.8000 	Test accuracy 0.9814
Epoch 310 	Train accuracy 0.7000 	Test accuracy 0.9806
Epoch 311 	Train accuracy 0.7000 	Test accuracy 0.9791
Epoch 312 	Train accuracy 0.7500 	Test accuracy 0.9796
Epoch 313 	Train accuracy 0.8625 	Test accuracy 0.9801
Epoch 314 	Train accuracy 0.8125 	Test accuracy 0.9804
Epoch 315 	Train accuracy 0.7250 	Test accuracy 0.9812
Epoch 316 	Train accuracy 0.8375 	Test accuracy 0.9818
Epoch 317 	Train accuracy 0.8000 	Test accuracy 0.9825
Epoch 318 

Epoch 449 	Train accuracy 0.7625 	Test accuracy 0.9799
Epoch 450 	Train accuracy 0.6750 	Test accuracy 0.9797
Epoch 451 	Train accuracy 0.8125 	Test accuracy 0.9795
Epoch 452 	Train accuracy 0.7875 	Test accuracy 0.9800
Epoch 453 	Train accuracy 0.7875 	Test accuracy 0.9782
Epoch 454 	Train accuracy 0.8375 	Test accuracy 0.9779
Epoch 455 	Train accuracy 0.8125 	Test accuracy 0.9795
Epoch 456 	Train accuracy 0.7250 	Test accuracy 0.9784
Epoch 457 	Train accuracy 0.7750 	Test accuracy 0.9803
Epoch 458 	Train accuracy 0.7625 	Test accuracy 0.9794
Epoch 459 	Train accuracy 0.7125 	Test accuracy 0.9793
Epoch 460 	Train accuracy 0.8000 	Test accuracy 0.9789
Epoch 461 	Train accuracy 0.7250 	Test accuracy 0.9824
Epoch 462 	Train accuracy 0.8125 	Test accuracy 0.9825
Epoch 463 	Train accuracy 0.7500 	Test accuracy 0.9804
Epoch 464 	Train accuracy 0.8625 	Test accuracy 0.9794
Epoch 465 	Train accuracy 0.8000 	Test accuracy 0.9793
Epoch 466 	Train accuracy 0.7250 	Test accuracy 0.9821
Epoch 467 

Not much of an improvment, because we have a shallow network. But, this is faster and more efficient.