### Setup

### Q8 Deep Learning

In [2]:
import tensorflow as tf
import numpy as np
from functools import partial

n_inputs = 28 * 28
n_hidden_0_to_4 = 100
learning_rate_0_4 = 0.01
learning_rate_5_9 = 0.01

(X_train_raw, y_train_raw), (X_test_raw, y_test_raw) = tf.keras.datasets.mnist.load_data()
X_train_raw = X_train_raw.astype(np.float32).reshape(-1, n_inputs) / 255
X_test_raw = X_test_raw.astype(np.float32).reshape(-1, n_inputs) / 255
y_train_raw = y_train_raw.astype(np.int32)
y_test_raw = y_test_raw.astype(np.int32)

random_indexes_train = np.random.permutation(len(X_train_raw))
X_train_raw = X_train_raw[random_indexes_train]
y_train_raw = y_train_raw[random_indexes_train]
random_indexes_test = np.random.permutation(len(X_test_raw))
X_test = X_test_raw[random_indexes_test]
y_test = y_test_raw[random_indexes_test]

X_train = X_train_raw[5000:]
y_train = y_train_raw[5000:]
X_val = X_train_raw[0:5000]
y_val = y_train_raw[0:5000]

X_train_0_to_4 = X_train[y_train <= 4]
y_train_0_to_4 = y_train[y_train <= 4]
X_val_0_to_4 = X_val[y_val <= 4]
y_val_0_to_4 = y_val[y_val <= 4]
X_test_0_to_4 = X_test[y_test <= 4]
y_test_0_to_4 = y_test[y_test <= 4]

X_train_5_to_9 = X_train[y_train >= 5][:100]
y_train_5_to_9 = y_train[y_train >= 5][:100] - 5
X_val_5_to_9 = X_val[y_val >= 5][:30]
y_val_5_to_9 = y_val[y_val >= 5][:30] - 5
X_test_5_to_9 = X_test[y_test >= 5]
y_test_5_to_9 = y_test[y_test >= 5] - 5

  from ._conv import register_converters as _register_converters


In [3]:
he_init = tf.contrib.layers.variance_scaling_initializer()
def dnn(inputs, n_layers, training, n_neurons=100, activation=tf.nn.elu, initializer=he_init, batch_norm_momentum=None, name="", dropout_rate=None):
    with tf.name_scope("%s_dnn" % (name)):
        for layer in range(n_layers):
            if (dropout_rate is not None):
                inputs = tf.layers.dropout(inputs, dropout_rate, training=training)
            inputs = tf.layers.dense(inputs, n_neurons, kernel_initializer=initializer, name="%s_hidden%d" % (name, layer + 1))
            if (batch_norm_momentum is not None):
                inputs = tf.layers.batch_normalization(inputs, training=training, momentum=batch_norm_momentum)
            inputs = activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

In [None]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn_0_to_4"):
    dnn_outputs = dnn(X, n_layers=5, n_neurons=140, training=training, name="dnn_0_to_4")
    logits_0_to_4 = tf.layers.dense(dnn_outputs, 5, name="logits_0_to_4", kernel_initializer=he_init)
    
with tf.name_scope("loss_0_to_4"):
    xentropy_0_to_4 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_0_to_4, labels=y, name="xentropy_0_to_4") 
    loss_0_to_4 = tf.reduce_mean(xentropy_0_to_4, name="loss_0_to_4")
    loss_0_to_4_summary = tf.summary.scalar("loss_0_to_4_summary", loss_0_to_4)
    
with tf.name_scope("train_0_to_4"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_0_4)
    training_op_0_to_4 = optimizer.minimize(loss_0_to_4, name="training_op_0_to_4")
    
with tf.name_scope("eval_0_to_4"):
    y_pred_0_to_4 = tf.nn.in_top_k(logits_0_to_4, y, 1)
    accuracy_0_to_4 = tf.reduce_mean(tf.cast(y_pred_0_to_4, tf.float32))
    accuracy_0_to_4_summary = tf.summary.scalar("accuracy_0_to_4_summary", accuracy_0_to_4)

init = tf.global_variables_initializer()
saver = tf.train.Saver()


In [None]:
n_epochs = 1000
batch_size = 500
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_0_to_4) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_0_to_4))
            X_batch, y_batch = X_train_0_to_4[rdm_idx[:batch_size]], y_train_0_to_4[rdm_idx[:batch_size]]
            sess.run(training_op_0_to_4, feed_dict={X: X_batch, y: y_batch})
        
        loss_0_to_4_val, accuracy_0_to_4_val = sess.run([loss_0_to_4, accuracy_0_to_4], feed_dict={X: X_val_0_to_4, y: y_val_0_to_4})
        
        if (loss_0_to_4_val < best_loss):
            saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_0_to_4_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_0_to_4_val * 100, loss_0_to_4_val, best_loss))

0	Accuracy: 96.59%	Loss: 0.112043	Best Loss: 0.112043
1	Accuracy: 98.02%	Loss: 0.076041	Best Loss: 0.076041
2	Accuracy: 98.17%	Loss: 0.070869	Best Loss: 0.070869
3	Accuracy: 98.02%	Loss: 0.080411	Best Loss: 0.070869
4	Accuracy: 98.25%	Loss: 0.059391	Best Loss: 0.059391
5	Accuracy: 98.69%	Loss: 0.051953	Best Loss: 0.051953


In [None]:
with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    accuracy_0_to_4_test = sess.run(accuracy_0_to_4, feed_dict={X: X_test_0_to_4, y: y_test_0_to_4})
    print("Accuracy for test: {:.2f}%".format(accuracy_0_to_4_test*100))

#### Add Batch Normalization

In [6]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn_0_to_4_bn"):
    dnn_outputs_bn = dnn(X, n_layers=5, n_neurons=140, training=training, batch_norm_momentum=0.99, name="dnn_0_to_4_bn")
    logits_0_to_4_bn = tf.layers.dense(dnn_outputs_bn, 5, name="logits_0_to_4_bn", kernel_initializer=he_init)
    
with tf.name_scope("loss_0_to_4_bn"):
    xentropy_0_to_4_bn = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_0_to_4_bn, labels=y, name="xentropy_0_to_4_bn") 
    loss_0_to_4_bn = tf.reduce_mean(xentropy_0_to_4_bn, name="loss_0_to_4_bn")
    loss_0_to_4_summary_bn = tf.summary.scalar("loss_0_to_4_summary_bn", loss_0_to_4_bn)
    
with tf.name_scope("train_0_to_4_bn"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_0_4)
    training_op_0_to_4_bn = optimizer.minimize(loss_0_to_4_bn, name="training_op_0_to_4_bn")
    
with tf.name_scope("eval_0_to_4_bn"):
    y_pred_0_to_4_bn = tf.nn.in_top_k(logits_0_to_4_bn, y, 1)
    accuracy_0_to_4_bn = tf.reduce_mean(tf.cast(y_pred_0_to_4_bn, tf.float32))
    accuracy_0_to_4_summary_bn = tf.summary.scalar("accuracy_0_to_4_summary_bn", accuracy_0_to_4_bn)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [7]:
n_epochs = 1000
batch_size = 500
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_0_to_4) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_0_to_4))
            X_batch, y_batch = X_train_0_to_4[rdm_idx[:batch_size]], y_train_0_to_4[rdm_idx[:batch_size]]
            sess.run([training_op_0_to_4_bn, extra_update_ops], feed_dict={X: X_batch, y: y_batch, training: True})
        
        loss_0_to_4_val, accuracy_0_to_4_val = sess.run([loss_0_to_4_bn, accuracy_0_to_4_bn], feed_dict={X: X_val_0_to_4, y: y_val_0_to_4})
        
        if (loss_0_to_4_val < best_loss):
            saver.save(sess, "./my_mnist_model_0_to_4_bn.ckpt")
            best_loss = loss_0_to_4_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_0_to_4_val * 100, loss_0_to_4_val, best_loss))

0	Accuracy: 97.08%	Loss: 0.212742	Best Loss: 0.212742
1	Accuracy: 97.47%	Loss: 0.101429	Best Loss: 0.101429
2	Accuracy: 97.51%	Loss: 0.116535	Best Loss: 0.101429
3	Accuracy: 98.33%	Loss: 0.062070	Best Loss: 0.062070
4	Accuracy: 98.33%	Loss: 0.072230	Best Loss: 0.062070
5	Accuracy: 98.17%	Loss: 0.076383	Best Loss: 0.062070
6	Accuracy: 98.44%	Loss: 0.050841	Best Loss: 0.050841
7	Accuracy: 98.37%	Loss: 0.073162	Best Loss: 0.050841
8	Accuracy: 98.91%	Loss: 0.050356	Best Loss: 0.050356
9	Accuracy: 97.98%	Loss: 0.103679	Best Loss: 0.050356
10	Accuracy: 98.48%	Loss: 0.067878	Best Loss: 0.050356
11	Accuracy: 98.64%	Loss: 0.050112	Best Loss: 0.050112
12	Accuracy: 99.03%	Loss: 0.044172	Best Loss: 0.044172
13	Accuracy: 98.29%	Loss: 0.081067	Best Loss: 0.044172
14	Accuracy: 98.25%	Loss: 0.078278	Best Loss: 0.044172
15	Accuracy: 98.76%	Loss: 0.071035	Best Loss: 0.044172
16	Accuracy: 98.33%	Loss: 0.074129	Best Loss: 0.044172
17	Accuracy: 98.68%	Loss: 0.056202	Best Loss: 0.044172
18	Accuracy: 98.21%	

In [8]:
with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4_bn.ckpt")
    accuracy_0_to_4_bn_test = sess.run(accuracy_0_to_4_bn, feed_dict={X: X_test_0_to_4, y: y_test_0_to_4})
    print("Accuracy for test: {:.2f}%".format(accuracy_0_to_4_bn_test*100))
    accuracy_0_to_4_bn_train = sess.run(accuracy_0_to_4_bn, feed_dict={X: X_train_0_to_4, y: y_train_0_to_4})
    print("Accuracy for training: {:.2f}%".format(accuracy_0_to_4_bn_train*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4_bn.ckpt
Accuracy for test: 99.16%
Accuracy for training: 99.97%


Seems we are overfitting as training set accuracy is higher than test set accuracy

#### Adding Dropout

In [9]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn_0_to_4_dropout"):
    dnn_outputs_dropout = dnn(X, n_layers=5, n_neurons=140, training=training, batch_norm_momentum=0.99, name="dnn_0_to_4_dropout", dropout_rate=0.5)
    logits_0_to_4_dropout = tf.layers.dense(dnn_outputs_dropout, 5, name="logits_0_to_4_dropout", kernel_initializer=he_init)
    
with tf.name_scope("loss_0_to_4_dropout"):
    xentropy_0_to_4_dropout = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_0_to_4_dropout, labels=y, name="xentropy_0_to_4_dropout") 
    loss_0_to_4_dropout = tf.reduce_mean(xentropy_0_to_4_dropout, name="loss_0_to_4_dropout")
    
with tf.name_scope("train_0_to_4_dropout"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_0_4)
    training_op_0_to_4_dropout = optimizer.minimize(loss_0_to_4_dropout, name="training_op_0_to_4_dropout")
    
with tf.name_scope("eval_0_to_4_dropout"):
    y_pred_0_to_4_dropout = tf.nn.in_top_k(logits_0_to_4_dropout, y, 1)
    accuracy_0_to_4_dropout = tf.reduce_mean(tf.cast(y_pred_0_to_4_dropout, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [10]:
n_epochs = 1000
batch_size = 500
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_0_to_4) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_0_to_4))
            X_batch, y_batch = X_train_0_to_4[rdm_idx[:batch_size]], y_train_0_to_4[rdm_idx[:batch_size]]
            sess.run([training_op_0_to_4_dropout, extra_update_ops], feed_dict={X: X_batch, y: y_batch, training: True})
        
        loss_0_to_4_val, accuracy_0_to_4_val = sess.run([loss_0_to_4_dropout, accuracy_0_to_4_dropout], feed_dict={X: X_val_0_to_4, y: y_val_0_to_4})
        
        if (loss_0_to_4_val < best_loss):
            saver.save(sess, "./my_mnist_model_0_to_4_dropout.ckpt")
            best_loss = loss_0_to_4_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_0_to_4_val * 100, loss_0_to_4_val, best_loss))

0	Accuracy: 96.42%	Loss: 0.341176	Best Loss: 0.341176
1	Accuracy: 97.12%	Loss: 0.171205	Best Loss: 0.171205
2	Accuracy: 97.20%	Loss: 0.141679	Best Loss: 0.141679
3	Accuracy: 97.40%	Loss: 0.126951	Best Loss: 0.126951
4	Accuracy: 97.59%	Loss: 0.099626	Best Loss: 0.099626
5	Accuracy: 97.82%	Loss: 0.086024	Best Loss: 0.086024
6	Accuracy: 98.09%	Loss: 0.077669	Best Loss: 0.077669
7	Accuracy: 97.98%	Loss: 0.072782	Best Loss: 0.072782
8	Accuracy: 98.06%	Loss: 0.069851	Best Loss: 0.069851
9	Accuracy: 97.90%	Loss: 0.077855	Best Loss: 0.069851
10	Accuracy: 97.90%	Loss: 0.066210	Best Loss: 0.066210
11	Accuracy: 97.90%	Loss: 0.072184	Best Loss: 0.066210
12	Accuracy: 98.48%	Loss: 0.059408	Best Loss: 0.059408
13	Accuracy: 98.33%	Loss: 0.061974	Best Loss: 0.059408
14	Accuracy: 98.52%	Loss: 0.055014	Best Loss: 0.055014
15	Accuracy: 98.52%	Loss: 0.059049	Best Loss: 0.055014
16	Accuracy: 98.60%	Loss: 0.054451	Best Loss: 0.054451
17	Accuracy: 98.64%	Loss: 0.053936	Best Loss: 0.053936
18	Accuracy: 98.60%	

In [11]:
with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4_dropout.ckpt")
    accuracy_0_to_4_dropout_test = sess.run(accuracy_0_to_4_dropout, feed_dict={X: X_test_0_to_4, y: y_test_0_to_4})
    print("Accuracy for test: {:.2f}%".format(accuracy_0_to_4_dropout_test*100))
    accuracy_0_to_4_dropout_train = sess.run(accuracy_0_to_4_dropout, feed_dict={X: X_train_0_to_4, y: y_train_0_to_4})
    print("Accuracy for training: {:.2f}%".format(accuracy_0_to_4_dropout_train*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4_dropout.ckpt
Accuracy for test: 99.42%
Accuracy for training: 99.64%


### Q9 Transfer Learning

In [12]:
tf.reset_default_graph()

saver = tf.train.import_meta_graph("./my_mnist_model_0_to_4_bn.ckpt.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

# DNN
hidden_5_to_9 = tf.get_default_graph().get_tensor_by_name("dnn_0_to_4_bn/dnn_0_to_4_bn_dnn/hidden5_out:0")
logits_5_to_9 = tf.layers.dense(hidden_5_to_9, 5, name="logits_5_to_9", kernel_initializer=he_init)

# Loss
xentropy_5_to_9 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_5_to_9, labels=y)
loss_5_to_9 = tf.reduce_mean(xentropy_5_to_9, name="loss_5_to_9")

# Train
trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits_5_to_9")
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_5_9)
training_op_5_to_9 = optimizer.minimize(loss_5_to_9, var_list=trainable_variables, name="training_op_5_to_9")

# Eval
correct = tf.nn.in_top_k(logits_5_to_9, y, 1)
accuracy_5_to_9 = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy_5_to_9")

init = tf.global_variables_initializer()
saver_5_to_9 = tf.train.Saver()

In [13]:
import time
n_epochs = 1000
batch_size = 20
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0

with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./my_mnist_model_0_to_4_bn.ckpt")
        
    t0 = time.time()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_5_to_9) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_5_to_9))
            X_batch, y_batch = X_train_5_to_9[rdm_idx[:batch_size]], y_train_5_to_9[rdm_idx[:batch_size]]
            sess.run(training_op_5_to_9, feed_dict={X: X_batch, y: y_batch})
        
        loss_5_to_9_val, accuracy_5_to_9_val = sess.run([loss_5_to_9, accuracy_5_to_9], feed_dict={X: X_val_5_to_9, y: y_val_5_to_9})
        
        if (loss_5_to_9_val < best_loss):
            saver_5_to_9.save(sess, "./my_mnist_model_5_to_9.ckpt")
            best_loss = loss_5_to_9_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_5_to_9_val * 100, loss_5_to_9_val, best_loss))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    saver_5_to_9.restore(sess, "./my_mnist_model_5_to_9.ckpt")
    accuracy_5_to_9_test = sess.run(accuracy_5_to_9, feed_dict={X: X_test_5_to_9, y: y_test_5_to_9})
    print("Accuracy for test: {:.2f}%".format(accuracy_5_to_9_test*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4_bn.ckpt
0	Accuracy: 43.33%	Loss: 1.503489	Best Loss: 1.503489
1	Accuracy: 50.00%	Loss: 1.350548	Best Loss: 1.350548
2	Accuracy: 56.67%	Loss: 1.161270	Best Loss: 1.161270
3	Accuracy: 53.33%	Loss: 1.181116	Best Loss: 1.161270
4	Accuracy: 50.00%	Loss: 1.272534	Best Loss: 1.161270
5	Accuracy: 56.67%	Loss: 1.246152	Best Loss: 1.161270
6	Accuracy: 43.33%	Loss: 1.233158	Best Loss: 1.161270
7	Accuracy: 53.33%	Loss: 1.093383	Best Loss: 1.093383
8	Accuracy: 50.00%	Loss: 1.023241	Best Loss: 1.023241
9	Accuracy: 43.33%	Loss: 1.049706	Best Loss: 1.023241
10	Accuracy: 60.00%	Loss: 1.087927	Best Loss: 1.023241
11	Accuracy: 60.00%	Loss: 1.093096	Best Loss: 1.023241
12	Accuracy: 60.00%	Loss: 1.005928	Best Loss: 1.005928
13	Accuracy: 60.00%	Loss: 0.977106	Best Loss: 0.977106
14	Accuracy: 63.33%	Loss: 0.929329	Best Loss: 0.929329
15	Accuracy: 66.67%	Loss: 0.922735	Best Loss: 0.922735
16	Accuracy: 63.33%	Loss: 0.922297	Best Loss: 0.922297
1

#### Caching lower layers

In [14]:
n_epochs = 1000
batch_size = 20
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0

with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./my_mnist_model_0_to_4_bn.ckpt")
    t0 = time.time()
    
    hidden5_train_cache = sess.run(hidden_5_to_9, feed_dict={X: X_train_5_to_9, y: y_train_5_to_9})
    hidden5_val_cache = sess.run(hidden_5_to_9, feed_dict={X: X_val_5_to_9, y: y_val_5_to_9})
    
    for epoch in range(n_epochs):
        for batch_index in range(len(hidden5_train_cache) // batch_size):
            rdm_idx = np.random.permutation(len(hidden5_train_cache))
            X_batch, y_batch = hidden5_train_cache[rdm_idx[:batch_size]], y_train_5_to_9[rdm_idx[:batch_size]]
            sess.run(training_op_5_to_9, feed_dict={hidden_5_to_9: X_batch, y: y_batch})
        
        loss_5_to_9_val, accuracy_5_to_9_val = sess.run([loss_5_to_9, accuracy_5_to_9], feed_dict={hidden_5_to_9: hidden5_val_cache, y: y_val_5_to_9})
        
        if (loss_5_to_9_val < best_loss):
            saver_5_to_9.save(sess, "./my_mnist_model_5_to_9_cache.ckpt")
            best_loss = loss_5_to_9_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_5_to_9_val * 100, loss_5_to_9_val, best_loss))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))
    
    
with tf.Session() as sess:
    saver_5_to_9.restore(sess, "./my_mnist_model_5_to_9_cache.ckpt")
    accuracy_5_to_9_test = sess.run(accuracy_5_to_9, feed_dict={X: X_test_5_to_9, y: y_test_5_to_9})
    print("Accuracy for test: {:.2f}%".format(accuracy_5_to_9_test*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4_bn.ckpt
0	Accuracy: 40.00%	Loss: 1.379673	Best Loss: 1.379673
1	Accuracy: 53.33%	Loss: 1.243368	Best Loss: 1.243368
2	Accuracy: 43.33%	Loss: 1.176059	Best Loss: 1.176059
3	Accuracy: 60.00%	Loss: 1.071881	Best Loss: 1.071881
4	Accuracy: 53.33%	Loss: 1.295100	Best Loss: 1.071881
5	Accuracy: 46.67%	Loss: 1.189524	Best Loss: 1.071881
6	Accuracy: 50.00%	Loss: 1.107791	Best Loss: 1.071881
7	Accuracy: 56.67%	Loss: 1.065170	Best Loss: 1.065170
8	Accuracy: 46.67%	Loss: 1.100391	Best Loss: 1.065170
9	Accuracy: 53.33%	Loss: 1.135660	Best Loss: 1.065170
10	Accuracy: 46.67%	Loss: 1.116057	Best Loss: 1.065170
11	Accuracy: 50.00%	Loss: 1.075023	Best Loss: 1.065170
12	Accuracy: 56.67%	Loss: 1.064219	Best Loss: 1.064219
13	Accuracy: 53.33%	Loss: 1.077805	Best Loss: 1.064219
14	Accuracy: 53.33%	Loss: 1.069044	Best Loss: 1.064219
15	Accuracy: 46.67%	Loss: 1.079646	Best Loss: 1.064219
16	Accuracy: 56.67%	Loss: 1.056279	Best Loss: 1.056279
1

#### Using only 4 layers

In [15]:
tf.reset_default_graph()

saver = tf.train.import_meta_graph("./my_mnist_model_0_to_4_bn.ckpt.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

# DNN
hidden_5_to_9 = tf.get_default_graph().get_tensor_by_name("dnn_0_to_4_bn/dnn_0_to_4_bn_dnn/hidden4_out:0")
logits_5_to_9 = tf.layers.dense(hidden_5_to_9, 5, name="logits_5_to_9", kernel_initializer=he_init)

# Loss
xentropy_5_to_9 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_5_to_9, labels=y)
loss_5_to_9 = tf.reduce_mean(xentropy_5_to_9, name="loss_5_to_9")

# Train
trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|logits_5_to_9")
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_5_9)
training_op_5_to_9 = optimizer.minimize(loss_5_to_9, var_list=trainable_variables, name="training_op_5_to_9")

# Eval
correct = tf.nn.in_top_k(logits_5_to_9, y, 1)
accuracy_5_to_9 = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy_5_to_9")

init = tf.global_variables_initializer()
saver_5_to_9 = tf.train.Saver()

In [16]:
n_epochs = 1000
batch_size = 20
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0

with tf.Session() as sess:
    init.run()
    saver.restore(sess, "./my_mnist_model_0_to_4_bn.ckpt")
        
    t0 = time.time()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_5_to_9) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_5_to_9))
            X_batch, y_batch = X_train_5_to_9[rdm_idx[:batch_size]], y_train_5_to_9[rdm_idx[:batch_size]]
            sess.run(training_op_5_to_9, feed_dict={X: X_batch, y: y_batch})
        
        loss_5_to_9_val, accuracy_5_to_9_val = sess.run([loss_5_to_9, accuracy_5_to_9], feed_dict={X: X_val_5_to_9, y: y_val_5_to_9})
        
        if (loss_5_to_9_val < best_loss):
            saver_5_to_9.save(sess, "./my_mnist_model_5_to_9_four_layers.ckpt")
            best_loss = loss_5_to_9_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_5_to_9_val * 100, loss_5_to_9_val, best_loss))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))
with tf.Session() as sess:
    saver_5_to_9.restore(sess, "./my_mnist_model_5_to_9_four_layers.ckpt")
    accuracy_5_to_9_test = sess.run(accuracy_5_to_9, feed_dict={X: X_test_5_to_9, y: y_test_5_to_9})
    print("Accuracy for test: {:.2f}%".format(accuracy_5_to_9_test*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4_bn.ckpt
0	Accuracy: 43.33%	Loss: 1.362749	Best Loss: 1.362749
1	Accuracy: 46.67%	Loss: 1.084748	Best Loss: 1.084748
2	Accuracy: 56.67%	Loss: 1.011841	Best Loss: 1.011841
3	Accuracy: 53.33%	Loss: 0.985958	Best Loss: 0.985958
4	Accuracy: 60.00%	Loss: 0.992126	Best Loss: 0.985958
5	Accuracy: 53.33%	Loss: 1.067675	Best Loss: 0.985958
6	Accuracy: 50.00%	Loss: 1.064475	Best Loss: 0.985958
7	Accuracy: 56.67%	Loss: 1.043837	Best Loss: 0.985958
8	Accuracy: 60.00%	Loss: 1.054271	Best Loss: 0.985958
9	Accuracy: 60.00%	Loss: 1.026646	Best Loss: 0.985958
10	Accuracy: 63.33%	Loss: 0.998456	Best Loss: 0.985958
11	Accuracy: 56.67%	Loss: 0.965752	Best Loss: 0.965752
12	Accuracy: 63.33%	Loss: 0.862083	Best Loss: 0.862083
13	Accuracy: 70.00%	Loss: 0.822880	Best Loss: 0.822880
14	Accuracy: 66.67%	Loss: 0.886024	Best Loss: 0.822880
15	Accuracy: 60.00%	Loss: 0.926282	Best Loss: 0.822880
16	Accuracy: 60.00%	Loss: 0.964440	Best Loss: 0.822880
1

In [17]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

# DNN
dnn_outputs = dnn(X, n_layers=5, n_neurons=140, training=training, name="dnn_5_to_9")
logits_5_to_9 = tf.layers.dense(dnn_outputs, 5, name="logits_5_to_9", kernel_initializer=he_init)

# Loss
xentropy_5_to_9 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_5_to_9, labels=y)
loss_5_to_9 = tf.reduce_mean(xentropy_5_to_9, name="loss_5_to_9")

# Train
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_5_9)
training_op_5_to_9 = optimizer.minimize(loss_5_to_9, name="training_op_5_to_9")

# Eval
correct = tf.nn.in_top_k(logits_5_to_9, y, 1)
accuracy_5_to_9 = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy_5_to_9")

init = tf.global_variables_initializer()
saver_5_to_9 = tf.train.Saver()

In [18]:
n_epochs = 1000
batch_size = 20
early_stoping_threshold = 20
best_loss = np.infty
steps_from_best_loss = 0

with tf.Session() as sess:
    init.run()
    t0 = time.time()
    for epoch in range(n_epochs):
        for batch_index in range(len(X_train_5_to_9) // batch_size):
            rdm_idx = np.random.permutation(len(X_train_5_to_9))
            X_batch, y_batch = X_train_5_to_9[rdm_idx[:batch_size]], y_train_5_to_9[rdm_idx[:batch_size]]
            sess.run(training_op_5_to_9, feed_dict={X: X_batch, y: y_batch})
        
        loss_5_to_9_val, accuracy_5_to_9_val = sess.run([loss_5_to_9, accuracy_5_to_9], feed_dict={X: X_val_5_to_9, y: y_val_5_to_9})
        
        if (loss_5_to_9_val < best_loss):
            saver_5_to_9.save(sess, "./my_mnist_model_5_to_9_scratch.ckpt")
            best_loss = loss_5_to_9_val
            steps_from_best_loss = 0
        else:
            steps_from_best_loss += 1
            
        if (steps_from_best_loss >= early_stoping_threshold):
            print("Early Stopping!")
            break;

        print("{}\tAccuracy: {:.2f}%\tLoss: {:.6f}\tBest Loss: {:.6f}".format(epoch, accuracy_5_to_9_val * 100, loss_5_to_9_val, best_loss))
    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))
with tf.Session() as sess:
    saver_5_to_9.restore(sess, "./my_mnist_model_5_to_9_scratch.ckpt")
    accuracy_5_to_9_test = sess.run(accuracy_5_to_9, feed_dict={X: X_test_5_to_9, y: y_test_5_to_9})
    print("Accuracy for test: {:.2f}%".format(accuracy_5_to_9_test*100))

0	Accuracy: 50.00%	Loss: 8.320246	Best Loss: 8.320246
1	Accuracy: 83.33%	Loss: 0.344862	Best Loss: 0.344862
2	Accuracy: 70.00%	Loss: 1.861371	Best Loss: 0.344862
3	Accuracy: 63.33%	Loss: 2.058497	Best Loss: 0.344862
4	Accuracy: 90.00%	Loss: 0.294264	Best Loss: 0.294264
5	Accuracy: 86.67%	Loss: 0.379326	Best Loss: 0.294264
6	Accuracy: 90.00%	Loss: 0.316537	Best Loss: 0.294264
7	Accuracy: 90.00%	Loss: 0.244286	Best Loss: 0.244286
8	Accuracy: 86.67%	Loss: 0.592166	Best Loss: 0.244286
9	Accuracy: 80.00%	Loss: 0.861799	Best Loss: 0.244286
10	Accuracy: 83.33%	Loss: 0.791724	Best Loss: 0.244286
11	Accuracy: 90.00%	Loss: 0.517244	Best Loss: 0.244286
12	Accuracy: 83.33%	Loss: 0.960161	Best Loss: 0.244286
13	Accuracy: 86.67%	Loss: 0.639712	Best Loss: 0.244286
14	Accuracy: 96.67%	Loss: 0.280998	Best Loss: 0.244286
15	Accuracy: 90.00%	Loss: 0.342864	Best Loss: 0.244286
16	Accuracy: 86.67%	Loss: 0.704786	Best Loss: 0.244286
17	Accuracy: 86.67%	Loss: 0.888516	Best Loss: 0.244286
18	Accuracy: 86.67%	