In [9]:
import tensorflow as tf
from functools import partial

tf.reset_default_graph()

n_inputs = 28 * 28   # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

learning_rate = 0.01
batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()

    my_batch_norm_layer = partial(
            tf.layers.batch_normalization,
            training=training,
            momentum=batch_norm_momentum)

    my_dense_layer = partial(
            tf.layers.dense,
            kernel_initializer=he_init)

    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [10]:
file_writer = tf.summary.FileWriter("logs/dnn/dnn", tf.get_default_graph())
file_writer.close()

In [11]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [12]:
import numpy as np

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [13]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [14]:
n_epochs = 20
batch_size = 200

In [18]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops], feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

0 Validation accuracy: 0.8948
1 Validation accuracy: 0.917
2 Validation accuracy: 0.9324
3 Validation accuracy: 0.939
4 Validation accuracy: 0.9442
5 Validation accuracy: 0.948
6 Validation accuracy: 0.9532
7 Validation accuracy: 0.9578
8 Validation accuracy: 0.96
9 Validation accuracy: 0.9626
10 Validation accuracy: 0.9646
11 Validation accuracy: 0.9662
12 Validation accuracy: 0.968
13 Validation accuracy: 0.9686
14 Validation accuracy: 0.9688
15 Validation accuracy: 0.9698
16 Validation accuracy: 0.9702
17 Validation accuracy: 0.9704
18 Validation accuracy: 0.9706
19 Validation accuracy: 0.9708
