In [1]:
from functools import partial

import tensorflow as tf
import numpy as np

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


### Hyperparameters

In [7]:
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

n_epochs = 40
batch_size = 50

learning_rate = 0.01
momentum = 0.9
threshold = 1.0 # For gradient clipping

scale = 0.001 # L1 regularization hyperparameter

dropout_rate = 0.5

logdir = "./tb-log"

### Load MNIST Data

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


### Construction Phase

* He initialization(by tf.layers.dense)
* ELU
* Batch Normalization
* Gradient Clipping (if you need)
* Frozen layers (if you need)
* Optimizers
 * Momentum (with Nesterov accelerated gradient)
 * RMSProp
 * Adam
* Regularization
 * L1 regularization
 * Dropout

In [10]:
tf.reset_default_graph()

# Training flag
training = tf.placeholder_with_default(False, shape=(), name='training')

# Convinent partial functions
my_dense_layer = partial(tf.layers.dense,
                         kernel_regularizer=tf.contrib.layers.l1_regularizer(scale))
my_batch_norm_layer = partial(tf.layers.batch_normalization,
                              training=training, momentum=momentum)
my_dropout = partial(tf.layers.dropout,
                     rate=dropout_rate,
                     training=training)

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
X_drop = my_dropout(X)
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X_drop, n_hidden1, name="hidden1")
    bn1 = my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1, name="hidden1_bn")
    bn1_drop = my_dropout(bn1_act)
    
    hidden2 = my_dense_layer(bn1_drop, n_hidden2, name="hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2, name="hidden2_bn")
    bn2_drop = my_dropout(bn2_act)
    
    logits_before_bn = my_dense_layer(bn2_drop, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy)
    
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

    loss = tf.add_n([base_loss] + reg_losses, name="loss")

with tf.name_scope("train"):
    '''
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    
    # decay=0.9 usually works well
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                          momentum=momentum,
                                          decay=0.9,
                                          epsilon=1e-10)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    '''
    # momentum=0.9 usually works well
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                           momentum=momentum,
                                           use_nesterov=True)
    
    ### Gradient clipping (not necessary when you use batch normalization)
    '''
    grads_and_vars = optimizer.compute_gradients(loss)
    capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
                  for grad, var in grad_and_vars]
    training_op = optimizer.apply_gradients(capped_gvs)
    '''
    
    ### Freezing layers
    '''
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope="hidden[34]|outputs")
    # By specifying var_list, you freeze layers other than variables you provide.
    training_op = optimizer.minimize(loss, var_list=train_vars)
    '''
    
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    
init = tf.global_variables_initializer()

# To save batch normalization variables, you should specify tf.global_variables()
saver = tf.train.Saver(tf.global_variables())

### Execution Phase

In [11]:
# To calculate moving averages for batch normalization layers
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run([training_op, extra_update_ops], feed_dict={training: True, X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        
        print(epoch, "Train Accuracy: ", acc_train, "Test Accuracy: ", acc_test)
    
    save_path = saver.save(sess, "./ch10_model_final.ckpt")

0 Train Accuracy:  0.82 Test Accuracy:  0.8801
1 Train Accuracy:  0.92 Test Accuracy:  0.8939
2 Train Accuracy:  0.94 Test Accuracy:  0.8831
3 Train Accuracy:  0.86 Test Accuracy:  0.8948
4 Train Accuracy:  0.88 Test Accuracy:  0.8941
5 Train Accuracy:  0.92 Test Accuracy:  0.8952
6 Train Accuracy:  0.96 Test Accuracy:  0.8939
7 Train Accuracy:  0.94 Test Accuracy:  0.9028
8 Train Accuracy:  0.92 Test Accuracy:  0.8811
9 Train Accuracy:  0.92 Test Accuracy:  0.8917
10 Train Accuracy:  0.88 Test Accuracy:  0.9004
11 Train Accuracy:  0.98 Test Accuracy:  0.9032
12 Train Accuracy:  0.94 Test Accuracy:  0.8943
13 Train Accuracy:  0.9 Test Accuracy:  0.8992
14 Train Accuracy:  0.92 Test Accuracy:  0.8923
15 Train Accuracy:  0.98 Test Accuracy:  0.9001
16 Train Accuracy:  0.9 Test Accuracy:  0.9058
17 Train Accuracy:  0.88 Test Accuracy:  0.8897
18 Train Accuracy:  0.92 Test Accuracy:  0.9093
19 Train Accuracy:  0.96 Test Accuracy:  0.9045
20 Train Accuracy:  0.92 Test Accuracy:  0.8974
21 T

In [10]:
with tf.Session() as sess:
    saver.restore(sess, "./ch10_model_final.ckpt")
    X_new_scaled = [mnist.test.images[5]]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)
    
    print(y_pred)

INFO:tensorflow:Restoring parameters from ./ch10_model_final.ckpt
[1]


### Reusing a Tensorflow Model

In [27]:
tf.reset_default_graph()

model_path = "./ch10_model_final.ckpt"
saver = tf.train.import_meta_graph(model_path + ".meta")

# If you need to know the names
'''
for op in tf.get_default_graph().get_operations():
    print(op.name)
'''

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
accuracy = tf.get_default_graph().get_tensor_by_name("eval/accuracy:0")
training_op = tf.get_default_graph().get_operation_by_name("train/GradientDescent")

with tf.Session() as sess:
    saver.restore(sess, model_path)
    # Train on your own

INFO:tensorflow:Restoring parameters from ./ch10_model_final.ckpt


### Caching frozen layers

In [28]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
training = tf.placeholder_with_default(False, shape=(), name='training')

y = tf.placeholder(tf.int64, shape=(None), name="y")

my_batch_norm_layer = partial(tf.layers.batch_normalization,
                              training=training, momentum=momentum)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
    bn1 = my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1, name="hidden1_bn")
    
    # To cache a frozen layer, you should stop gradients backpropagating from that layer.
    bn1_stop = tf.stop_gradient(bn1_act)
    
    hidden2 = tf.layers.dense(bn1_stop, n_hidden2, name="hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2, name="hidden2_bn")
    
    logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [30]:
n_batches = mnist.train.num_examples // batch_size

with tf.Session() as sess:
    saver.restore(sess, "./ch10_model_final.ckpt")
    
    bn1_act_cache = sess.run(bn1_act, feed_dict={X: mnist.train.images})
    
    for epoch in range(n_epochs):
        shuffeld_idx = np.random.permutation(mnist.train.num_examples)
        
        bn1_act_batches = np.array_split(bn1_act_cache[shuffeld_idx], n_batches)
        y_batches = np.array_split(mnist.train.labels[shuffeld_idx], n_batches)
        
        for bn1_act_batch, y_batch in zip(bn1_act_batches, y_batches):
            # You can feed any tensors even if it's not a placeholder
            sess.run(training_op, feed_dict={bn1_act: bn1_act_batch, y:y_batch})

INFO:tensorflow:Restoring parameters from ./ch10_model_final.ckpt
