In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import time
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [4]:
n_epochs   = 1000         # Spec 要求 1000 epochs
early_stop_limit = 20     # Spec 要求，利用 early stop 節省時間
batch_size = 20           # 設定 batch，節省 GPU memory 使用率

In [5]:
"""
用 function 取代重複的部分，也就是訓練的過程，傳入 training dataset 和 validation dataset 以及 cache 參數來決定是否快取
快取的部分是針對 feed_dict 做更動，如果啟用快取功能，則 feed_dict 會直接輸入第五層結果，若無則是 X, y。
"""
def run(sess, train_data, train_label, X_valid, y_valid, cache=False):
    best_loss = np.infty # 預設無限大，後面訓練過程若有更小值則取代
    early_stop = 0       # 紀錄 early_stop 的次數
    # summary_writer = tf.summary.FileWriter('tensorboard/', graph=tf.get_default_graph())

    # 利用 cache 判斷是否要做快取，因為 sess.run 的部分都相同，因此針對 feed_dict 來做更動
    if cache:
        hidden5_train = sess.run(hidden5_out, feed_dict={X: train_data})
        hidden5_valid = sess.run(hidden5_out, feed_dict={X: X_valid})
        val_feed = {hidden5_out: hidden5_valid, y: y_valid}
    else:
        val_feed = {X: X_valid, y: y_valid}
        
    # 1000 epochs
    for epoch in range(n_epochs):
        
        # 使用 batch 來讓執行的過程使用較小的 memory
        rnd_idx = np.random.permutation(len(train_data))
        for rnd_indices in np.array_split(rnd_idx, len(train_data) // batch_size):
            
            # 利用 cache 判斷是否要做快取，因為 sess.run 的部分都相同，因此針對 feed_dict 來做更動
            if cache:
                train_feed = {hidden5_out: hidden5_train[rnd_indices], y: train_label[rnd_indices]}
            else:
                train_feed = {X: train_data[rnd_indices], y: train_label[rnd_indices]}
            sess.run(training_op, feed_dict=train_feed)

             
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict=val_feed)
        
        # 設定 early stop 機制讓已經收斂的 model 提早結束，節省時間
        if loss_val < best_loss:
            saver.save(sess, checkpoint_dir)
            best_loss = loss_val
            early_stop = 0
        else:
            early_stop += 1
            if early_stop > early_stop_limit:
                print("Early stopping!")
                break
                
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

In [6]:
#####
#
#     Homework 3.1 Softmax only
#
#
#     基本上結果會是：Final test accuracy: 84.80% 上下，時間約 13 秒左右。
#
#####

In [7]:
reset_graph()

restore_saver = tf.train.import_meta_graph("save/Team07_HW2.ckpt.meta")            # 回復 HW2 完成的 graph 及其 model

X         = tf.get_default_graph().get_tensor_by_name("X:0")                       # 利用 name 抓出 tf 變數
y         = tf.get_default_graph().get_tensor_by_name("y:0")
loss      = tf.get_default_graph().get_tensor_by_name("calc_loss/loss:0")          # 因為在 HW2 中有用到 with scope，因此有前綴
Y_proba   = tf.get_default_graph().get_tensor_by_name("DNN/Y_proba:0")
logits    = Y_proba.op.inputs[0]

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits") # 可從所有變數中抓出可以更新（訓練）的變數，利用 scope 抓出該層的變數
optimizer         = tf.train.AdamOptimizer(learning_rate=0.01, name="Adam2")
training_op       = optimizer.minimize(loss, var_list=output_layer_vars)                # 再從 optimizer 這裡限制要被更新的變數有哪些

correct  = tf.equal(tf.argmax(logits, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

saver  = tf.train.Saver()
config = tf.ConfigProto(device_count = {'GPU': 1}) # 指定 GPU id，限制使用它

In [8]:
checkpoint_dir = 'save/Team07_HW3_1.ckpt' # 新的 model 儲存位置

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    restore_saver.restore(sess, "save/Team07_HW2.ckpt") # 從 HW2 存的 ckpt 中回復訓練好的變數
    for var in output_layer_vars:                       # 針對會被更新的變數做 initial
        var.initializer.run()

    t0 = time.time()
    run(sess, X_train2, y_train2, X_valid2, y_valid2, False) # 開始訓練，不使用 cache
    
    print("Total training time: {:.1f}s".format(time.time() - t0))

with tf.Session(config=config) as sess:
    saver.restore(sess, checkpoint_dir) # 從訓練過程中儲存的 ckpt 取回 model
    acc_test = sess.run(accuracy, feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from save/Team07_HW2.ckpt
0	Validation loss: 1.305937	Best loss: 1.305937	Accuracy: 59.33%
1	Validation loss: 1.257764	Best loss: 1.257764	Accuracy: 63.33%
2	Validation loss: 1.150286	Best loss: 1.150286	Accuracy: 75.33%
3	Validation loss: 1.128988	Best loss: 1.128988	Accuracy: 77.33%
4	Validation loss: 1.103042	Best loss: 1.103042	Accuracy: 80.67%
5	Validation loss: 1.108267	Best loss: 1.103042	Accuracy: 80.00%
6	Validation loss: 1.070573	Best loss: 1.070573	Accuracy: 82.00%
7	Validation loss: 1.059471	Best loss: 1.059471	Accuracy: 85.33%
8	Validation loss: 1.066965	Best loss: 1.059471	Accuracy: 85.33%
9	Validation loss: 1.091169	Best loss: 1.059471	Accuracy: 83.33%
10	Validation loss: 1.076296	Best loss: 1.059471	Accuracy: 83.33%
11	Validation loss: 1.057558	Best loss: 1.057558	Accuracy: 85.33%
12	Validation loss: 1.058213	Best loss: 1.057558	Accuracy: 85.33%
13	Validation loss: 1.066885	Best loss: 1.057558	Accuracy: 84.00%
14	Validation loss: 1.0

In [9]:
##### 
#
#     Homework 3.2 Cache
#
#
#     基本上結果一樣會是：Final test accuracy: 84.80% 上下，而時間約快 0.2 - 0.5 秒，
#     因為 early stop 的原因，epoch 次數不會太多，因此時間差距不會被凸顯出來，
#     若是到 1000 epochs ，則可以快至少 6 秒。
#
#####

In [10]:
reset_graph()

restore_saver = tf.train.import_meta_graph("save/Team07_HW2.ckpt.meta")            # 回復 HW2 完成的 graph 及其 model

X         = tf.get_default_graph().get_tensor_by_name("X:0")                       # 利用 name 抓出 tf 變數
y         = tf.get_default_graph().get_tensor_by_name("y:0")
loss      = tf.get_default_graph().get_tensor_by_name("calc_loss/loss:0")          # 因為在 HW2 中有用到 with scope，因此有前綴
Y_proba   = tf.get_default_graph().get_tensor_by_name("DNN/Y_proba:0")
logits    = Y_proba.op.inputs[0]

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits") # 可從所有變數中抓出可以更新（訓練）的變數，利用 scope 抓出該層的變數
optimizer         = tf.train.AdamOptimizer(learning_rate=0.01, name="Adam2")
training_op       = optimizer.minimize(loss, var_list=output_layer_vars)                # 再從 optimizer 這裡限制要被更新的變數有哪些

correct  = tf.equal(tf.argmax(logits, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

saver  = tf.train.Saver()
config = tf.ConfigProto(device_count = {'GPU': 1}) # 指定 GPU id，限制使用它

In [11]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("DNN/hidden5/Elu:0")       # 第1-5層因為都不會更動，所以可直接拿第五層的結果做 softmax 層的計算

In [12]:
checkpoint_dir="save/Team07_HW3_2.ckpt"

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    restore_saver.restore(sess, "save/Team07_HW2.ckpt") # 從 HW2 存的 ckpt 中回復訓練好的變數
    for var in output_layer_vars:                       # 針對會被更新的變數做 initial
        var.initializer.run()

    t0 = time.time()
    run(sess, X_train2, y_train2, X_valid2, y_valid2, True) # 開始訓練，不使用 cache
    
    print("Total training time: {:.1f}s".format(time.time() - t0))

with tf.Session(config=config) as sess:
    saver.restore(sess, checkpoint_dir) # 從訓練過程中儲存的 ckpt 取回 model
    acc_test = sess.run(accuracy, feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from save/Team07_HW2.ckpt
0	Validation loss: 1.305937	Best loss: 1.305937	Accuracy: 59.33%
1	Validation loss: 1.257764	Best loss: 1.257764	Accuracy: 63.33%
2	Validation loss: 1.150286	Best loss: 1.150286	Accuracy: 75.33%
3	Validation loss: 1.128988	Best loss: 1.128988	Accuracy: 77.33%
4	Validation loss: 1.103041	Best loss: 1.103041	Accuracy: 80.67%
5	Validation loss: 1.108267	Best loss: 1.103041	Accuracy: 80.00%
6	Validation loss: 1.070573	Best loss: 1.070573	Accuracy: 82.00%
7	Validation loss: 1.059471	Best loss: 1.059471	Accuracy: 85.33%
8	Validation loss: 1.066965	Best loss: 1.059471	Accuracy: 85.33%
9	Validation loss: 1.091168	Best loss: 1.059471	Accuracy: 83.33%
10	Validation loss: 1.076296	Best loss: 1.059471	Accuracy: 83.33%
11	Validation loss: 1.057558	Best loss: 1.057558	Accuracy: 85.33%
12	Validation loss: 1.058213	Best loss: 1.057558	Accuracy: 85.33%
13	Validation loss: 1.066885	Best loss: 1.057558	Accuracy: 84.00%
14	Validation loss: 1.0

In [13]:
#####
#
#     Homework 3.3 4 layer instead
#
#
#     基本上結果會是：Final test accuracy: 87.22% 上下，進步 3%。
#
#####

In [14]:
reset_graph()

restore_saver = tf.train.import_meta_graph("save/Team07_HW2.ckpt.meta")            # 回復 HW2 完成的 graph 及其 model

X         = tf.get_default_graph().get_tensor_by_name("X:0")                       # 利用 name 抓出 tf 變數
y         = tf.get_default_graph().get_tensor_by_name("y:0")
hidden4   = tf.get_default_graph().get_tensor_by_name("DNN/hidden4/Elu:0")
logits    = tf.layers.dense(hidden4, 5, name="new_logits")
Y_proba   = tf.nn.softmax(logits)

entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(entropy)

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits") # 可從所有變數中抓出可以更新（訓練）的變數，利用 scope 抓出該層的變數
optimizer         = tf.train.AdamOptimizer(learning_rate=0.01, name="Adam2")
training_op       = optimizer.minimize(loss, var_list=output_layer_vars)                # 再從 optimizer 這裡限制要被更新的變數有哪些

correct  = tf.equal(tf.argmax(logits, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

saver  = tf.train.Saver()
config = tf.ConfigProto(device_count = {'GPU': 1}) # 指定 GPU id，限制使用它

In [15]:
checkpoint_dir="save/Team07_HW3_3.ckpt"

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    restore_saver.restore(sess, "save/Team07_HW2.ckpt") # 從 HW2 存的 ckpt 中回復訓練好的變數
    for var in output_layer_vars:                       # 針對會被更新的變數做 initial
        var.initializer.run()

    t0 = time.time()
    run(sess, X_train2, y_train2, X_valid2, y_valid2, False) # 開始訓練，不使用 cache
    
    print("Total training time: {:.1f}s".format(time.time() - t0))

with tf.Session(config=config) as sess:
    saver.restore(sess, checkpoint_dir) # 從訓練過程中儲存的 ckpt 取回 model
    acc_test = sess.run(accuracy, feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from save/Team07_HW2.ckpt
0	Validation loss: 0.680522	Best loss: 0.680522	Accuracy: 75.33%
1	Validation loss: 0.516078	Best loss: 0.516078	Accuracy: 82.67%
2	Validation loss: 0.476066	Best loss: 0.476066	Accuracy: 84.00%
3	Validation loss: 0.417379	Best loss: 0.417379	Accuracy: 86.00%
4	Validation loss: 0.405972	Best loss: 0.405972	Accuracy: 86.00%
5	Validation loss: 0.425633	Best loss: 0.405972	Accuracy: 86.00%
6	Validation loss: 0.381811	Best loss: 0.381811	Accuracy: 89.33%
7	Validation loss: 0.401535	Best loss: 0.381811	Accuracy: 86.67%
8	Validation loss: 0.404661	Best loss: 0.381811	Accuracy: 86.67%
9	Validation loss: 0.395030	Best loss: 0.381811	Accuracy: 86.00%
10	Validation loss: 0.383258	Best loss: 0.381811	Accuracy: 86.67%
11	Validation loss: 0.377392	Best loss: 0.377392	Accuracy: 89.33%
12	Validation loss: 0.384404	Best loss: 0.377392	Accuracy: 88.67%
13	Validation loss: 0.386250	Best loss: 0.377392	Accuracy: 88.67%
14	Validation loss: 0.4

In [16]:
#####
#
#     Homework 3.4 Bonus
#
#
#     基本上結果會是：Final test accuracy: 88.75% 上下，比 3.3 進步 1%。
#
#####

In [17]:
reset_graph()

restore_saver = tf.train.import_meta_graph("save/Team07_HW2.ckpt.meta")            # 回復 HW2 完成的 graph 及其 model

X         = tf.get_default_graph().get_tensor_by_name("X:0")                       # 利用 name 抓出 tf 變數
y         = tf.get_default_graph().get_tensor_by_name("y:0")
loss      = tf.get_default_graph().get_tensor_by_name("calc_loss/loss:0")          # 因為在 HW2 中有用到 with scope，因此有前綴
accuracy  = tf.get_default_graph().get_tensor_by_name("calc_accuracy/accuracy:0")

hidden4   = tf.get_default_graph().get_tensor_by_name("DNN/hidden4/Elu:0")
logits    = tf.layers.dense(hidden4, 5, name="new_logits")
Y_proba   = tf.nn.softmax(logits)

entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(entropy)

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[12]|new_logits") # 可從所有變數中抓出可以更新（訓練）的變數，利用 scope 抓出該層的變數
optimizer         = tf.train.AdamOptimizer(learning_rate=0.01, name="Adam2")
training_op       = optimizer.minimize(loss, var_list=output_layer_vars)                # 再從 optimizer 這裡限制要被更新的變數有哪些

correct  = tf.equal(tf.argmax(logits, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

saver  = tf.train.Saver()
config = tf.ConfigProto(device_count = {'GPU': 1}) # 指定 GPU id，限制使用它

In [18]:
checkpoint_dir="save/Team07_HW3_4.ckpt"

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    restore_saver.restore(sess, "save/Team07_HW2.ckpt") # 從 HW2 存的 ckpt 中回復訓練好的變數
    for var in output_layer_vars:                       # 針對會被更新的變數做 initial
        var.initializer.run()

    t0 = time.time()
    run(sess, X_train2, y_train2, X_valid2, y_valid2, False) # 開始訓練，不使用 cache
    
    print("Total training time: {:.1f}s".format(time.time() - t0))

with tf.Session(config=config) as sess:
    saver.restore(sess, checkpoint_dir) # 從訓練過程中儲存的 ckpt 取回 model
    acc_test = sess.run(accuracy, feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from save/Team07_HW2.ckpt
0	Validation loss: 0.554901	Best loss: 0.554901	Accuracy: 86.00%
1	Validation loss: 0.457852	Best loss: 0.457852	Accuracy: 88.67%
2	Validation loss: 0.655102	Best loss: 0.457852	Accuracy: 85.33%
3	Validation loss: 0.489796	Best loss: 0.457852	Accuracy: 86.00%
4	Validation loss: 0.509009	Best loss: 0.457852	Accuracy: 90.00%
5	Validation loss: 0.489723	Best loss: 0.457852	Accuracy: 92.67%
6	Validation loss: 0.463676	Best loss: 0.457852	Accuracy: 93.33%
7	Validation loss: 0.474216	Best loss: 0.457852	Accuracy: 92.67%
8	Validation loss: 0.479849	Best loss: 0.457852	Accuracy: 93.33%
9	Validation loss: 0.591258	Best loss: 0.457852	Accuracy: 90.00%
10	Validation loss: 0.538405	Best loss: 0.457852	Accuracy: 92.00%
11	Validation loss: 0.643287	Best loss: 0.457852	Accuracy: 90.00%
12	Validation loss: 0.634465	Best loss: 0.457852	Accuracy: 90.67%
13	Validation loss: 0.682020	Best loss: 0.457852	Accuracy: 84.67%
14	Validation loss: 0.9