# Neural Network with TensorFlow

In [1]:
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [57]:
reset_graph()

In [3]:
from datetime import datetime

def log_dir(prefix="", model=False, timestamp=False, subdirname=""):
    """
    - model - set True if model is saved (e.g. saver.save())
    - timestamp - set True to create unique directory
    """
    time = ""
    root_dir = "../models" if model else "../tf_logs"
    if prefix:
        prefix += "-"
    if timestamp:
        time = "-" + datetime.utcnow().strftime("%Y%m%d%H%M%S")
    name = prefix + "model" + time
    logdir = "{}/{}".format(root_dir, name) if not subdirname else "{}/{}/{}".format(root_dir, subdirname, name)
    return logdir

In [4]:
n_inputs = 28 * 28  # MNIST image
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [5]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int32, shape=(None), name="y")

In [61]:
# simple function for a neuron layer - actually we don't need it since TF has its own implementation
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs + n_neurons)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [62]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [63]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

In [64]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [65]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [66]:
file_writer = tf.summary.FileWriter(log_dir(prefix="mnist_NN"), tf.get_default_graph())
file_writer.close()

Our graph is ready. Let's start a learning phase:

Prepare the data:

In [22]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
print(X_train.shape, X_train.dtype)
print(y_train.shape, y_train.dtype)
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0   # reshape and scale from 0.0 to 1.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_test = y_test.astype(np.int32)
y_train = y_train.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

(60000, 28, 28) uint8
(60000,) uint8


Start learning:

In [42]:
n_epochs = 40
batch_size = 50

In [23]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, batch_size):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [74]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print("Epoch {:2}\tBatch accuracy: {:.5f}\tVal accuracy: {:.5f}".format(epoch, acc_train, acc_val))
    save_path = saver.save(sess, log_dir(prefix="mnist_NN", model=True, subdirname="mnist_NN_model"))

Epoch  0	Batch accuracy: 0.58273	Val accuracy: 0.56600
Epoch  1	Batch accuracy: 0.71636	Val accuracy: 0.73340
Epoch  2	Batch accuracy: 0.79818	Val accuracy: 0.79740
Epoch  3	Batch accuracy: 0.82091	Val accuracy: 0.82940
Epoch  4	Batch accuracy: 0.82727	Val accuracy: 0.84940
Epoch  5	Batch accuracy: 0.83909	Val accuracy: 0.86220
Epoch  6	Batch accuracy: 0.86182	Val accuracy: 0.87000
Epoch  7	Batch accuracy: 0.84727	Val accuracy: 0.87740
Epoch  8	Batch accuracy: 0.87909	Val accuracy: 0.88400
Epoch  9	Batch accuracy: 0.87818	Val accuracy: 0.88680
Epoch 10	Batch accuracy: 0.88727	Val accuracy: 0.89040
Epoch 11	Batch accuracy: 0.88727	Val accuracy: 0.89480
Epoch 12	Batch accuracy: 0.89273	Val accuracy: 0.89780
Epoch 13	Batch accuracy: 0.90000	Val accuracy: 0.90120
Epoch 14	Batch accuracy: 0.90182	Val accuracy: 0.90440
Epoch 15	Batch accuracy: 0.90182	Val accuracy: 0.90500
Epoch 16	Batch accuracy: 0.91000	Val accuracy: 0.90900
Epoch 17	Batch accuracy: 0.89909	Val accuracy: 0.90980
Epoch 18	B

Use model:

In [75]:
with tf.Session() as sess:
    saver.restore(sess, save_path)
    X_new = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ../models/mnist_NN_model/mnist_NN-model


In [86]:
import pandas as pd

results = pd.DataFrame({"Actual": y_test[:20], "Prediction": y_pred})
results["Correct"] = results["Actual"] == results["Prediction"]
results

Unnamed: 0,Actual,Prediction,Correct
0,7,7,True
1,2,2,True
2,1,1,True
3,0,0,True
4,4,4,True
5,1,1,True
6,4,4,True
7,9,9,True
8,5,6,False
9,9,9,True


### Using ``tf.layer.dense`` in place of ``neuron_layer``

In [27]:
reset_graph()

In [28]:
n_inputs = 28 * 28  # MNIST image
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

learning_rate = 0.1

In [29]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

file_writer = tf.summary.FileWriter(log_dir(prefix="TF_dense-mnistNN"), tf.get_default_graph())

In [30]:
n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print("Epoch {:2}\tBatch accuracy: {:.5f}\tVal accuracy: {:.5f}".format(epoch, accuracy_batch, accuracy_val))
    save_path = saver.save(sess, log_dir("TF_dense-mnistNN", model=True, subdirname="TF_dense_mnistNN"))

Epoch  0	Batch accuracy: 0.85818	Val accuracy: 0.85580
Epoch  1	Batch accuracy: 0.90545	Val accuracy: 0.89960
Epoch  2	Batch accuracy: 0.90091	Val accuracy: 0.91140
Epoch  3	Batch accuracy: 0.92000	Val accuracy: 0.92160
Epoch  4	Batch accuracy: 0.92000	Val accuracy: 0.92960
Epoch  5	Batch accuracy: 0.92182	Val accuracy: 0.92520
Epoch  6	Batch accuracy: 0.94091	Val accuracy: 0.93860
Epoch  7	Batch accuracy: 0.95182	Val accuracy: 0.93940
Epoch  8	Batch accuracy: 0.95545	Val accuracy: 0.94300
Epoch  9	Batch accuracy: 0.95273	Val accuracy: 0.94620
Epoch 10	Batch accuracy: 0.94818	Val accuracy: 0.94880
Epoch 11	Batch accuracy: 0.95545	Val accuracy: 0.95000
Epoch 12	Batch accuracy: 0.94182	Val accuracy: 0.95100
Epoch 13	Batch accuracy: 0.95091	Val accuracy: 0.95440
Epoch 14	Batch accuracy: 0.95636	Val accuracy: 0.95540
Epoch 15	Batch accuracy: 0.95455	Val accuracy: 0.95680
Epoch 16	Batch accuracy: 0.95364	Val accuracy: 0.95820
Epoch 17	Batch accuracy: 0.96000	Val accuracy: 0.95980
Epoch 18	B

## Batch Normalization

1. $\mu_B = \dfrac{1}{m_B}\sum\limits_{i=1}^{m_B}\mathbf{x}^{(i)}$
2. $\sigma_B^2 = \dfrac{1}{m_B}\sum\limits_{i=1}^{m_B}(\mathbf{x}^{(i)}-\mu_B)^2$
3. $\hat{\mathbf{x}}^{(i)}=\dfrac{\mathbf{x}^{(i)}-\mu_B}{\sqrt{\mu_B^2+\varepsilon}}$
4. $\mathbf{z}^{(i)} = \gamma\hat{\mathbf{x}}^{(i)} + \beta$


- $\mu_B$ - empirical average value (counted on mini-batch B)
- $\sigma_B$ - empirical standart deviation (counted on mini-batch B)
- $m_B$ - number of samples in mini-batch B
- $\hat{\mathbf{x}}^{(i)}$ - centered around 0 and normalized input
- $\gamma$ - scalability parameter for a layer
- $\beta$ - shift (bias) value for a layer
- $\varepsilon$ - smoothing term (usually $10^{-5}$)
- $\mathbf{z}^{(i)}$ - BN operation output (scaled and shifted version of inputs)

In [31]:
reset_graph()

Creating a graph:

In [32]:
from functools import partial

In [33]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
outputs = 10
learning_rate = 0.1

In [34]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn"):
    my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=0.9)
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
    bn1 = my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1)
    hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2)
    logits_before_bn = tf.layers.dense(bn2_act, outputs, name="logits")
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

file_writer = tf.summary.FileWriter(log_dir("BatchNormalized_model"), tf.get_default_graph())

Start learning:

In [36]:
n_epochs = 20
batch_size = 200

To update $\gamma$ and $\beta$ we have to evaluate some extra operations to calculate them (in addition to training_op node evaluation)

In [40]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops], feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print("Epoch {:2}\tAccuracy: {:.5f}".format(epoch, accuracy_val))
    saver.save(sess, log_dir("BatchNorm", model=True, subdirname="BatchNorm_model"))

Epoch  0	Accuracy: 0.93520
Epoch  1	Accuracy: 0.95460
Epoch  2	Accuracy: 0.96260
Epoch  3	Accuracy: 0.96900
Epoch  4	Accuracy: 0.97100
Epoch  5	Accuracy: 0.97100
Epoch  6	Accuracy: 0.97380
Epoch  7	Accuracy: 0.97320
Epoch  8	Accuracy: 0.97420
Epoch  9	Accuracy: 0.97200
Epoch 10	Accuracy: 0.97620
Epoch 11	Accuracy: 0.97640
Epoch 12	Accuracy: 0.97760
Epoch 13	Accuracy: 0.97860
Epoch 14	Accuracy: 0.97760
Epoch 15	Accuracy: 0.97800
Epoch 16	Accuracy: 0.97780
Epoch 17	Accuracy: 0.97940
Epoch 18	Accuracy: 0.97800
Epoch 19	Accuracy: 0.98060


## Gradient Clipping

In [44]:
reset_graph()

In [42]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

learning_rate = 0.1

In [45]:
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(dtype=tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    threshold = 1.0
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    grads_and_vars = optimizer.compute_gradients(loss)
    capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
    training_op = optimizer.apply_gradients(capped_gvs)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

file_writer = tf.summary.FileWriter(log_dir("GradientClipping_model"), tf.get_default_graph())

In [46]:
n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print("Epoch {:2}\tValid accuracy: {:.5f}".format(epoch, accuracy_val))
    saver.save(sess, log_dir("GradientClipping", model=True, subdirname="GradClipp_model"))

Epoch  0	Valid accuracy: 0.91540
Epoch  1	Valid accuracy: 0.94820
Epoch  2	Valid accuracy: 0.95920
Epoch  3	Valid accuracy: 0.95180
Epoch  4	Valid accuracy: 0.96900
Epoch  5	Valid accuracy: 0.95860
Epoch  6	Valid accuracy: 0.97240
Epoch  7	Valid accuracy: 0.97440
Epoch  8	Valid accuracy: 0.97500
Epoch  9	Valid accuracy: 0.97680
Epoch 10	Valid accuracy: 0.97640
Epoch 11	Valid accuracy: 0.97600
Epoch 12	Valid accuracy: 0.96900
Epoch 13	Valid accuracy: 0.97720
Epoch 14	Valid accuracy: 0.97940
Epoch 15	Valid accuracy: 0.97360
Epoch 16	Valid accuracy: 0.97560
Epoch 17	Valid accuracy: 0.97980
Epoch 18	Valid accuracy: 0.98000
Epoch 19	Valid accuracy: 0.97980
