In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [17]:
#restore HW2 model's tensor by name

tf.reset_default_graph()
restore_saver = tf.train.import_meta_graph( "./model/Team11_HW2.ckpt.meta")
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

#get the softmax layer
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
learning_rate = 0.01
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")

#exclude other trainable variable except softmax layer
training_op = optimizer.minimize(loss, var_list=output_layer_vars)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()


HW3.1 train softmax only

In [18]:
import time

n_epochs = 1000
#early stop if no progress in 20 epochs
early_stop_trigger_step = 20
early_stop_cnt = 0
batch_size = 30


with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/Team11_HW2.ckpt")
    
    #initialize softmax layer
    for var in output_layer_vars:
        var.initializer.run()
    
    best_loss = np.infty
    
    #create timer for benchmark
    t0 = time.time()
    for epoch in range(n_epochs):
        
        #random select training data
        rnd_idx = np.random.permutation(len(X_train2)) #create 1~500 rnd
        for rnd_idxs in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_idxs], y_train2[rnd_idxs]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        
        if loss_val < best_loss:
            early_stop_cnt = 0
            best_loss = loss_val
        else:
            early_stop_cnt += 1
            if early_stop_cnt > early_stop_trigger_step:
                save_path = five_frozen_saver.save(sess, "./Team11_HW3_1.ckpt")
                print("Early stop triggered")
                break
        print("{}\tValid loss: {:.4f}\tBest loss: {:.4f}\tACC: {:.2f}".format(
            epoch, loss_val, best_loss, acc_val ))

    t1 = time.time()
    print("Training time: {:.1f}sec".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team11_HW3_1.ckpt")
    acc_test = sess.run(accuracy,feed_dict={X: X_test2, y: y_test2})
    print("test acc: {:.4f}".format(acc_test ))

INFO:tensorflow:Restoring parameters from ./model/Team11_HW2.ckpt
0	Valid loss: 1.2603	Best loss: 1.2603	ACC: 0.50
1	Valid loss: 1.2864	Best loss: 1.2603	ACC: 0.49
2	Valid loss: 1.2394	Best loss: 1.2394	ACC: 0.51
3	Valid loss: 1.2301	Best loss: 1.2301	ACC: 0.51
4	Valid loss: 1.1631	Best loss: 1.1631	ACC: 0.55
5	Valid loss: 1.2169	Best loss: 1.1631	ACC: 0.46
6	Valid loss: 1.2673	Best loss: 1.1631	ACC: 0.52
7	Valid loss: 1.0506	Best loss: 1.0506	ACC: 0.55
8	Valid loss: 1.1567	Best loss: 1.0506	ACC: 0.48
9	Valid loss: 1.1438	Best loss: 1.0506	ACC: 0.53
10	Valid loss: 1.0230	Best loss: 1.0230	ACC: 0.61
11	Valid loss: 1.0987	Best loss: 1.0230	ACC: 0.55
12	Valid loss: 1.1874	Best loss: 1.0230	ACC: 0.49
13	Valid loss: 1.2647	Best loss: 1.0230	ACC: 0.45
14	Valid loss: 1.2307	Best loss: 1.0230	ACC: 0.54
15	Valid loss: 1.1329	Best loss: 1.0230	ACC: 0.59
16	Valid loss: 1.1388	Best loss: 1.0230	ACC: 0.56
17	Valid loss: 1.1173	Best loss: 1.0230	ACC: 0.56
18	Valid loss: 1.2515	Best loss: 1.0230	ACC:

HW3.2 cache 5th layer's output
We found that training time speed up almost 2x if cache method is applied

In [19]:
   
n_epochs = 1000
#early stop if no progress in 20 epochs
early_stop_trigger_step = 20
early_stop_cnt = 0
batch_size = 30

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/Team11_HW2.ckpt")
    for var in output_layer_vars:
        var.initializer.run()

    #cache 5th layer before training
    hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")
    h5_train_cache = sess.run(hidden5_out,feed_dict={X: X_train2, y: y_train2})
    h5_valid_cache = sess.run(hidden5_out,feed_dict={X: X_valid2, y: y_valid2})
    
    best_loss = np.infty
    
    #create timer for benchmark
    t0 = time.time()
    #start training and print each epoch
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2)) 
        #get 5th layer from HW2 and feed as input of softmax layer
        for rnd_idxs in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = h5_train_cache[rnd_idxs], y_train2[rnd_idxs]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: h5_valid_cache, y: y_valid2})
        if loss_val < best_loss:
            best_loss = loss_val
            early_stop_cnt = 0
        else:
            early_stop_cnt += 1
            if early_stop_cnt > early_stop_trigger_step:
                save_path = five_frozen_saver.save(sess, "./Team11_HW3_2.ckpt")
                print("Early stop triggered")
                break
        print("{}\tValid loss: {:.4f}\tBest loss: {:.4f}\tACC: {:.2f}".format(
            epoch, loss_val, best_loss, acc_val))

    t1 = time.time()
    print("Training time: {:.1f}sec".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team11_HW3_2.ckpt")
    acc_test = sess.run(accuracy,feed_dict={X: X_test2, y: y_test2})
    print("test acc: {:.4f}".format(acc_test))

INFO:tensorflow:Restoring parameters from ./model/Team11_HW2.ckpt
0	Valid loss: 1.2686	Best loss: 1.2686	ACC: 0.51
1	Valid loss: 1.1892	Best loss: 1.1892	ACC: 0.51
2	Valid loss: 1.1515	Best loss: 1.1515	ACC: 0.55
3	Valid loss: 1.2110	Best loss: 1.1515	ACC: 0.51
4	Valid loss: 1.1369	Best loss: 1.1369	ACC: 0.52
5	Valid loss: 1.1092	Best loss: 1.1092	ACC: 0.53
6	Valid loss: 1.1373	Best loss: 1.1092	ACC: 0.56
7	Valid loss: 1.1340	Best loss: 1.1092	ACC: 0.54
8	Valid loss: 1.1098	Best loss: 1.1092	ACC: 0.57
9	Valid loss: 1.1918	Best loss: 1.1092	ACC: 0.49
10	Valid loss: 1.1162	Best loss: 1.1092	ACC: 0.54
11	Valid loss: 1.1755	Best loss: 1.1092	ACC: 0.51
12	Valid loss: 1.2031	Best loss: 1.1092	ACC: 0.51
13	Valid loss: 1.1190	Best loss: 1.1092	ACC: 0.53
14	Valid loss: 1.1454	Best loss: 1.1092	ACC: 0.51
15	Valid loss: 1.1927	Best loss: 1.1092	ACC: 0.53
16	Valid loss: 1.1284	Best loss: 1.1092	ACC: 0.54
17	Valid loss: 1.2051	Best loss: 1.1092	ACC: 0.50
18	Valid loss: 1.1251	Best loss: 1.1092	ACC:

HW3.3 Load HW2 model again, now use 4th hidden layer's output and feed them into new softmax layer:
Result better than 3_1 and 3_2

In [30]:
reset_graph()

n_outputs = 5
#get tensor from HW2
restore_saver = tf.train.import_meta_graph("./model/Team11_HW2.ckpt.meta")
he_init = tf.contrib.layers.variance_scaling_initializer()
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

#get 4th hidden layer before training
#here we connect 4th hidden layer output to new softmax layer
hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

#exclude other trainable value and keep only softmax layer trainable
#define optimizer and loss function
loss = tf.reduce_mean(xentropy, name="loss")
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
learning_rate = 0.01
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [31]:
n_epochs = 1000
batch_size = 30
#early stop if no progress in 20 epochs
early_stop_trigger_step = 20
early_stop_cnt = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/Team11_HW2.ckpt")
    
#     for var in output_layer_vars:
#         var.initializer.run()
    
    t0 = time.time()
    #start training and print each epoch
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_idxs in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_idxs], y_train2[rnd_idxs]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            best_loss = loss_val
            early_stop_cnt = 0
        else:
            early_stop_cnt += 1
            if early_stop_cnt > early_stop_trigger_step:
                save_path = four_frozen_saver.save(sess, "./Team11_HW3_3.ckpt")
                print("Early stop triggered")
                break
        print("{}\tValid loss: {:.4f}\tBest loss: {:.4f}\tACC: {:.2f}".format(
            epoch, loss_val, best_loss, acc_val))
    t1 = time.time()
    print("Training time: {:.1f}sec".format(t1 - t0))
with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./Team11_HW3_3.ckpt")
    #use full instance per class for testing set
    acc_test = sess.run(accuracy,feed_dict={X: X_test2, y: y_test2})
    print("test acc:  {:.4f}".format(acc_test))

INFO:tensorflow:Restoring parameters from ./model/Team11_HW2.ckpt
0	Valid loss: 2.9443	Best loss: 2.9443	ACC: 0.55
1	Valid loss: 2.0043	Best loss: 2.0043	ACC: 0.46
2	Valid loss: 2.2452	Best loss: 2.0043	ACC: 0.47
3	Valid loss: 1.9935	Best loss: 1.9935	ACC: 0.53
4	Valid loss: 1.7248	Best loss: 1.7248	ACC: 0.53
5	Valid loss: 2.1877	Best loss: 1.7248	ACC: 0.47
6	Valid loss: 2.6916	Best loss: 1.7248	ACC: 0.50
7	Valid loss: 2.5146	Best loss: 1.7248	ACC: 0.47
8	Valid loss: 1.6377	Best loss: 1.6377	ACC: 0.53
9	Valid loss: 1.9110	Best loss: 1.6377	ACC: 0.52
10	Valid loss: 1.7411	Best loss: 1.6377	ACC: 0.49
11	Valid loss: 2.0451	Best loss: 1.6377	ACC: 0.46
12	Valid loss: 2.0910	Best loss: 1.6377	ACC: 0.38
13	Valid loss: 2.0179	Best loss: 1.6377	ACC: 0.45
14	Valid loss: 1.7109	Best loss: 1.6377	ACC: 0.47
15	Valid loss: 2.0010	Best loss: 1.6377	ACC: 0.55
16	Valid loss: 1.7463	Best loss: 1.6377	ACC: 0.47
17	Valid loss: 1.5427	Best loss: 1.5427	ACC: 0.47
18	Valid loss: 2.0850	Best loss: 1.5427	ACC:

HW3.4  Unfreeze top 2 layers.
Result testing set accuracy is better than all method above

In [46]:
learning_rate = 0.01
unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [47]:
n_epochs = 1000
batch_size = 30
#early stop if no progress in 20 epochs
early_stop_trigger_step = 20
early_stop_cnt = 0


with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./Team11_HW3_3.ckpt")
    best_loss = np.infty
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_idxs in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_idxs], y_train2[rnd_idxs]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            best_loss = loss_val
            early_stop_cnt = 0
        else:
            early_stop_cnt += 1
            if early_stop_cnt > early_stop_trigger_step:
                save_path = two_frozen_saver.save(sess, "./Team11_HW3_4.ckpt")
                print("Early stop triggered")
                break
        print("{}\tValid loss: {:.4f}\tBest loss: {:.4f}\tACC: {:.2f}".format(
            epoch, loss_val, best_loss, acc_val))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./Team11_HW3_4.ckpt")
    acc_test = sess.run(accuracy,feed_dict={X: X_test2, y: y_test2})
    print("test acc:  {:.4f}".format(acc_test))

INFO:tensorflow:Restoring parameters from ./Team11_HW3_3.ckpt
0	Valid loss: 2.2020	Best loss: 2.2020	ACC: 0.49
1	Valid loss: 1.7333	Best loss: 1.7333	ACC: 0.53
2	Valid loss: 1.8713	Best loss: 1.7333	ACC: 0.49
3	Valid loss: 1.6853	Best loss: 1.6853	ACC: 0.43
4	Valid loss: 1.1794	Best loss: 1.1794	ACC: 0.57
5	Valid loss: 1.4784	Best loss: 1.1794	ACC: 0.48
6	Valid loss: 1.8281	Best loss: 1.1794	ACC: 0.45
7	Valid loss: 1.1877	Best loss: 1.1794	ACC: 0.55
8	Valid loss: 1.3865	Best loss: 1.1794	ACC: 0.47
9	Valid loss: 1.0211	Best loss: 1.0211	ACC: 0.54
10	Valid loss: 1.3133	Best loss: 1.0211	ACC: 0.55
11	Valid loss: 1.2680	Best loss: 1.0211	ACC: 0.50
12	Valid loss: 1.2298	Best loss: 1.0211	ACC: 0.52
13	Valid loss: 1.1892	Best loss: 1.0211	ACC: 0.58
14	Valid loss: 1.1658	Best loss: 1.0211	ACC: 0.54
15	Valid loss: 1.1435	Best loss: 1.0211	ACC: 0.48
16	Valid loss: 1.0880	Best loss: 1.0211	ACC: 0.57
17	Valid loss: 1.1188	Best loss: 1.0211	ACC: 0.59
18	Valid loss: 1.0563	Best loss: 1.0211	ACC: 0.5