## Notebook settings

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import KFold  # import sklearn Kfold to implement cross-validation


# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

## Prepare data

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5

X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5

X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

## Hyper parameters

In [4]:
lr = 5e-3
batch_size = 16
epochs = 1000
saturate_limit = 20  # for applying early stopping

## HW 3.1 - Softmax-only transfer-learning

### Model

In [5]:
# reset the graph
tf.reset_default_graph()

# load HW2 pre-trained model
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

inputs_num = 784
outputs_num = 5

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-1')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# create hw3-1 saver
five_frozen_saver = tf.train.Saver()


### Training

In [6]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = X_train2[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = X_train2[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
                    
            sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
        # validate
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        
        if best_loss > loss:
            save_path = five_frozen_saver.save(sess, "./pretrained_hw3_1_weights/Team59_HW3_1.ckpt")
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
        
    t1 = time.time()
    
# print the test data accurancy
with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./pretrained_hw3_1_weights/Team59_HW3_1.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print('=' * 60)
    print('Test data accurancy', acc_test)
    print('Time:', (t1-t0))
    print('=' * 60, '\n')
        

1 	 Validation loss: 1.32267 	 Best loss: 1.32267 	 Accuracy: 0.42
2 	 Validation loss: 0.872994 	 Best loss: 0.872994 	 Accuracy: 0.7
3 	 Validation loss: 0.733053 	 Best loss: 0.733053 	 Accuracy: 0.766667
4 	 Validation loss: 0.673144 	 Best loss: 0.673144 	 Accuracy: 0.78
5 	 Validation loss: 0.639253 	 Best loss: 0.639253 	 Accuracy: 0.8
6 	 Validation loss: 0.615424 	 Best loss: 0.615424 	 Accuracy: 0.8
7 	 Validation loss: 0.598731 	 Best loss: 0.598731 	 Accuracy: 0.793333
8 	 Validation loss: 0.58612 	 Best loss: 0.58612 	 Accuracy: 0.8
9 	 Validation loss: 0.576396 	 Best loss: 0.576396 	 Accuracy: 0.806667
10 	 Validation loss: 0.568662 	 Best loss: 0.568662 	 Accuracy: 0.8
11 	 Validation loss: 0.562381 	 Best loss: 0.562381 	 Accuracy: 0.793333
12 	 Validation loss: 0.557183 	 Best loss: 0.557183 	 Accuracy: 0.806667
13 	 Validation loss: 0.552812 	 Best loss: 0.552812 	 Accuracy: 0.813333
14 	 Validation loss: 0.549086 	 Best loss: 0.549086 	 Accuracy: 0.82
15 	 Validatio

## HW 3.2 - Caching the 5th layer

### Model

In [7]:
# reset the graph
tf.reset_default_graph()

# load HW2 pre-trained model
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-2')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# print(tf.get_default_graph().get_operations())

# get 5th layer output
dense5_output = graph.get_tensor_by_name('logits/MatMul:0')  # logits layer
dense5_output.op.inputs[0] # 5th layer output shape(?, 128)

# create hw3-2 saver
five_cache_saver = tf.train.Saver()


### Trainging

In [8]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
    
    # feed the X_train2 to get the 5th output
    logits_train_input = sess.run(dense5_output.op.inputs[0], feed_dict={X: X_train2})
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = logits_train_input[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = logits_train_input[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
            
            sess.run(training_op, feed_dict={dense5_output.op.inputs[0]: batch_x, y: batch_y})
        
        # validate
        logits_valid_input = sess.run(dense5_output.op.inputs[0], feed_dict={X: X_valid2})        
        loss, acc = sess.run([loss_function, accuracy], feed_dict={dense5_output.op.inputs[0]: logits_valid_input,
                                                                   y: y_valid2})
        
        if best_loss > loss:
            save_path = five_cache_saver.save(sess, "./pretrained_hw3_2_weights/Team59_HW3_2.ckpt")
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
        
    t1 = time.time()
    
# print the test data accurancy
with tf.Session() as sess:
    five_cache_saver.restore(sess, "./pretrained_hw3_2_weights/Team59_HW3_2.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print('=' * 60)
    print('Test data accurancy', acc_test)
    print('Time:', (t1-t0))
    print('=' * 60, '\n')
        

1 	 Validation loss: 1.32267 	 Best loss: 1.32267 	 Accuracy: 0.42
2 	 Validation loss: 0.872994 	 Best loss: 0.872994 	 Accuracy: 0.7
3 	 Validation loss: 0.733053 	 Best loss: 0.733053 	 Accuracy: 0.766667
4 	 Validation loss: 0.673144 	 Best loss: 0.673144 	 Accuracy: 0.78
5 	 Validation loss: 0.639253 	 Best loss: 0.639253 	 Accuracy: 0.8
6 	 Validation loss: 0.615424 	 Best loss: 0.615424 	 Accuracy: 0.8
7 	 Validation loss: 0.598731 	 Best loss: 0.598731 	 Accuracy: 0.793333
8 	 Validation loss: 0.58612 	 Best loss: 0.58612 	 Accuracy: 0.8
9 	 Validation loss: 0.576396 	 Best loss: 0.576396 	 Accuracy: 0.806667
10 	 Validation loss: 0.568662 	 Best loss: 0.568662 	 Accuracy: 0.8
11 	 Validation loss: 0.562381 	 Best loss: 0.562381 	 Accuracy: 0.793333
12 	 Validation loss: 0.557183 	 Best loss: 0.557183 	 Accuracy: 0.806667
13 	 Validation loss: 0.552812 	 Best loss: 0.552812 	 Accuracy: 0.813333
14 	 Validation loss: 0.549086 	 Best loss: 0.549086 	 Accuracy: 0.82
15 	 Validatio

### Result
Using cache 5th layer method is faster than Hw3-1.

The accuracy is the same value, because the hidden layers aren't trainable.

## HW 3.3 - 4-layers-only transfer-learning

### Model

In [9]:
# reset the graph 
tf.reset_default_graph()

# load HW2 pre-trained model
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')

# get 4th layer output
dense4_output = graph.get_tensor_by_name('dense4/Elu:0')  # 4th output

logits = tf.layers.dense(dense4_output, outputs_num, kernel_initializer=he_init, name="HW3-3_logits")
Y_proba = tf.nn.softmax(logits, name='Y_probability')
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_function = tf.reduce_mean(xentropy, name='loss')
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="HW3-3_logits")
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name="op_HW3-3")
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# create hw3_3 saver
four_frozen_saver = tf.train.Saver()


### Training

In [10]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = X_train2[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = X_train2[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
            
            sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
        # validate       
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid2,
                                                          y: y_valid2})
        
        if best_loss > loss:
            save_path = four_frozen_saver.save(sess, "./pretrained_hw3_3_weights/Team59_HW3_3.ckpt")
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
        
    t1 = time.time()
    
with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./pretrained_hw3_3_weights/Team59_HW3_3.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print('=' * 60)
    print('Test data accurancy', acc_test)
    print('Time:', (t1-t0))
    print('=' * 60, '\n')

1 	 Validation loss: 1.39984 	 Best loss: 1.39984 	 Accuracy: 0.54
2 	 Validation loss: 0.918001 	 Best loss: 0.918001 	 Accuracy: 0.646667
3 	 Validation loss: 0.764278 	 Best loss: 0.764278 	 Accuracy: 0.76
4 	 Validation loss: 0.704341 	 Best loss: 0.704341 	 Accuracy: 0.766667
5 	 Validation loss: 0.663606 	 Best loss: 0.663606 	 Accuracy: 0.786667
6 	 Validation loss: 0.63692 	 Best loss: 0.63692 	 Accuracy: 0.8
7 	 Validation loss: 0.617356 	 Best loss: 0.617356 	 Accuracy: 0.82
8 	 Validation loss: 0.602766 	 Best loss: 0.602766 	 Accuracy: 0.826667
9 	 Validation loss: 0.591605 	 Best loss: 0.591605 	 Accuracy: 0.82
10 	 Validation loss: 0.582927 	 Best loss: 0.582927 	 Accuracy: 0.82
11 	 Validation loss: 0.576087 	 Best loss: 0.576087 	 Accuracy: 0.82
12 	 Validation loss: 0.570631 	 Best loss: 0.570631 	 Accuracy: 0.82
13 	 Validation loss: 0.566233 	 Best loss: 0.566233 	 Accuracy: 0.82
14 	 Validation loss: 0.562657 	 Best loss: 0.562657 	 Accuracy: 0.82
15 	 Validation lo

### Result

Using the frozen 4 layers method is better than cache 5 layers.

The accurancy value is higher than HW3-1 and HW3-2.

## HW 3.4 - Bonus

### Model

In [11]:
# reset the graph
tf.reset_default_graph()

# Using HW3-3 pre-train model
pretrained_weights_path = './pretrained_hw3_3_weights/Team59_HW3_3.ckpt.meta'
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')

# get 4th layer output
dense4_output = graph.get_tensor_by_name('dense4/Elu:0')  # 4th output

loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

# set the 1th 2th hidden layer and logits trainable
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="dense[12]|HW3-3_logits")
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name="op_HW3-4")
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# create hw3_4 saver
two_frozen_saver = tf.train.Saver()

# print(tf.get_default_graph().get_operations())

### Training

In [12]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = X_train2[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = X_train2[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
            
            sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
        # validate       
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid2,
                                                          y: y_valid2})
        
        if best_loss > loss:
            save_path = two_frozen_saver.save(sess, "./pretrained_hw3_4_weights/Team59_HW3_4.ckpt")
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
        
    t1 = time.time()
    
with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./pretrained_hw3_4_weights/Team59_HW3_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print('=' * 60)
    print('Test data accurancy', acc_test)
    print('Time:', (t1-t0))
    print('=' * 60, '\n')

1 	 Validation loss: 2.89846 	 Best loss: 2.89846 	 Accuracy: 0.2
2 	 Validation loss: 4.11718 	 Best loss: 2.89846 	 Accuracy: 0.533333
3 	 Validation loss: 3.42302 	 Best loss: 2.89846 	 Accuracy: 0.393333
4 	 Validation loss: 2.92182 	 Best loss: 2.89846 	 Accuracy: 0.573333
5 	 Validation loss: 0.829717 	 Best loss: 0.829717 	 Accuracy: 0.746667
6 	 Validation loss: 3.11796 	 Best loss: 0.829717 	 Accuracy: 0.486667
7 	 Validation loss: 1.30313 	 Best loss: 0.829717 	 Accuracy: 0.686667
8 	 Validation loss: 1.12569 	 Best loss: 0.829717 	 Accuracy: 0.766667
9 	 Validation loss: 0.720165 	 Best loss: 0.720165 	 Accuracy: 0.826667
10 	 Validation loss: 0.505747 	 Best loss: 0.505747 	 Accuracy: 0.82
11 	 Validation loss: 0.431741 	 Best loss: 0.431741 	 Accuracy: 0.88
12 	 Validation loss: 0.426699 	 Best loss: 0.426699 	 Accuracy: 0.873333
13 	 Validation loss: 0.406874 	 Best loss: 0.406874 	 Accuracy: 0.886667
14 	 Validation loss: 0.404326 	 Best loss: 0.404326 	 Accuracy: 0.88
1