## Notebook settings

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import KFold  # import sklearn Kfold to implement cross-validation


# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

## Prepare data

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5

X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5

X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

## Hyper parameters

In [4]:
lr = 5e-3
batch_size = 16
epochs = 1000
saturate_limit = 20  # for applying early stopping

## HW 3.1 - Softmax-only transfer-learning

### Model

In [5]:
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'

inputs_num = 784
outputs_num = 5

tf.reset_default_graph()
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-1')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)


### Training

In [6]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = X_train2[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = X_train2[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
                    
            sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
        # validate
        loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        
        if best_loss > loss:
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                t1 = time.time()
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
    
    # print the test data accurancy
    print('=' * 60)
    print('Test data accurancy', sess.run(accuracy,feed_dict={X: X_test2,
                                                              y: y_test2}))
    print('Time:', (t1-t0))
    print('=' * 60, '\n')
        

1 	 Validation loss: 1.32267 	 Best loss: 1.32267 	 Accuracy: 0.42
2 	 Validation loss: 0.872994 	 Best loss: 0.872994 	 Accuracy: 0.7
3 	 Validation loss: 0.733053 	 Best loss: 0.733053 	 Accuracy: 0.766667
4 	 Validation loss: 0.673144 	 Best loss: 0.673144 	 Accuracy: 0.78
5 	 Validation loss: 0.639253 	 Best loss: 0.639253 	 Accuracy: 0.8
6 	 Validation loss: 0.615424 	 Best loss: 0.615424 	 Accuracy: 0.8
7 	 Validation loss: 0.598731 	 Best loss: 0.598731 	 Accuracy: 0.793333
8 	 Validation loss: 0.58612 	 Best loss: 0.58612 	 Accuracy: 0.8
9 	 Validation loss: 0.576396 	 Best loss: 0.576396 	 Accuracy: 0.806667
10 	 Validation loss: 0.568662 	 Best loss: 0.568662 	 Accuracy: 0.8
11 	 Validation loss: 0.562381 	 Best loss: 0.562381 	 Accuracy: 0.793333
12 	 Validation loss: 0.557183 	 Best loss: 0.557183 	 Accuracy: 0.806667
13 	 Validation loss: 0.552812 	 Best loss: 0.552812 	 Accuracy: 0.813333
14 	 Validation loss: 0.549086 	 Best loss: 0.549086 	 Accuracy: 0.82
15 	 Validatio

## HW 3.2 - Caching the 5th layer

### Model

In [7]:
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'

inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

tf.reset_default_graph()
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-2')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# print(tf.get_default_graph().get_operations())

# get 5th layer output
dense5_output = graph.get_tensor_by_name('logits/MatMul:0')  # logits layer
dense5_output.op.inputs[0] # 5th layer output shape(?, 128)


<tf.Tensor 'dense5/Elu:0' shape=(?, 128) dtype=float32>

### Trainging

In [8]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
    
    # feed the X_train2 to get the 5th output
    logits_train_input = sess.run(dense5_output.op.inputs[0], feed_dict={X: X_train2})
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = logits_train_input[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = logits_train_input[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
            
            sess.run(training_op, feed_dict={dense5_output.op.inputs[0]: batch_x, y: batch_y})
        
        # validate
        logits_valid_input = sess.run(dense5_output.op.inputs[0], feed_dict={X: X_valid2})        
        loss, acc = sess.run([loss_function, accuracy], feed_dict={dense5_output.op.inputs[0]: logits_valid_input,
                                                                   y: y_valid2})
        
        if best_loss > loss:
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                t1 = time.time()
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
    
    # print the test data accurancy
    print('=' * 60)
    print('Test data accurancy', sess.run(accuracy,feed_dict={X: X_test2,
                                                              y: y_test2}))
    print('Time:', (t1-t0))
    print('=' * 60, '\n')
        

1 	 Validation loss: 1.32267 	 Best loss: 1.32267 	 Accuracy: 0.42
2 	 Validation loss: 0.872994 	 Best loss: 0.872994 	 Accuracy: 0.7
3 	 Validation loss: 0.733053 	 Best loss: 0.733053 	 Accuracy: 0.766667
4 	 Validation loss: 0.673144 	 Best loss: 0.673144 	 Accuracy: 0.78
5 	 Validation loss: 0.639253 	 Best loss: 0.639253 	 Accuracy: 0.8
6 	 Validation loss: 0.615424 	 Best loss: 0.615424 	 Accuracy: 0.8
7 	 Validation loss: 0.598731 	 Best loss: 0.598731 	 Accuracy: 0.793333
8 	 Validation loss: 0.58612 	 Best loss: 0.58612 	 Accuracy: 0.8
9 	 Validation loss: 0.576396 	 Best loss: 0.576396 	 Accuracy: 0.806667
10 	 Validation loss: 0.568662 	 Best loss: 0.568662 	 Accuracy: 0.8
11 	 Validation loss: 0.562381 	 Best loss: 0.562381 	 Accuracy: 0.793333
12 	 Validation loss: 0.557183 	 Best loss: 0.557183 	 Accuracy: 0.806667
13 	 Validation loss: 0.552812 	 Best loss: 0.552812 	 Accuracy: 0.813333
14 	 Validation loss: 0.549086 	 Best loss: 0.549086 	 Accuracy: 0.82
15 	 Validatio

## HW 3.3 - 4-layers-only transfer-learning

### Model

In [9]:
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'

inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

tf.reset_default_graph()
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-3')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)

# print(tf.get_default_graph().get_operations())

# get 4th layer output
dense4_output = graph.get_tensor_by_name('dense4/Elu:0')  # 4th output
dense4_output


<tf.Tensor 'dense4/Elu:0' shape=(?, 128) dtype=float32>

### Training

In [10]:
import time

t0 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
        
    # parameters
    saturate_count = 0
    best_acc = 0.
    best_loss = 1000.
    best_epoch = -1
    iterations = int(X_train2.shape[0] / batch_size)
    
    # feed the X_train2 to get the 4th output
    logits_train_input = sess.run(dense4_output, feed_dict={X: X_train2})
        
    # training for number of epochs times
    for e in range(1, epochs + 1):
        for i in range(iterations):
            if(i==0):
                batch_x = logits_train_input[:batch_size]
                batch_y = y_train2[:batch_size]
            else:
                batch_x = logits_train_input[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train2[i * batch_size : (i + 1) * batch_size]
            
            sess.run(training_op, feed_dict={dense4_output: batch_x, y: batch_y})
        
        # validate
        logits_valid_input = sess.run(dense4_output, feed_dict={X: X_valid2})        
        loss, acc = sess.run([loss_function, accuracy], feed_dict={dense4_output: logits_valid_input,
                                                                   y: y_valid2})
        
        if best_loss > loss:
            best_acc = acc
            best_loss = loss
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1

            if saturate_count >= saturate_limit:  # stop if saturate
                t1 = time.time()
                break
                
        print(e,'\t', 'Validation loss:', loss, '\t', 'Best loss:', best_loss, '\t','Accuracy:', acc)
    
    # print the test data accurancy
    print('=' * 60)
    print('Test data accurancy', sess.run(accuracy,feed_dict={X: X_test2,
                                                              y: y_test2}))
    print('Time:', (t1-t0))
    print('=' * 60, '\n')

1 	 Validation loss: 1.32267 	 Best loss: 1.32267 	 Accuracy: 0.42
2 	 Validation loss: 0.872994 	 Best loss: 0.872994 	 Accuracy: 0.7
3 	 Validation loss: 0.733053 	 Best loss: 0.733053 	 Accuracy: 0.766667
4 	 Validation loss: 0.673144 	 Best loss: 0.673144 	 Accuracy: 0.78
5 	 Validation loss: 0.639253 	 Best loss: 0.639253 	 Accuracy: 0.8
6 	 Validation loss: 0.615424 	 Best loss: 0.615424 	 Accuracy: 0.8
7 	 Validation loss: 0.598731 	 Best loss: 0.598731 	 Accuracy: 0.793333
8 	 Validation loss: 0.58612 	 Best loss: 0.58612 	 Accuracy: 0.8
9 	 Validation loss: 0.576396 	 Best loss: 0.576396 	 Accuracy: 0.806667
10 	 Validation loss: 0.568662 	 Best loss: 0.568662 	 Accuracy: 0.8
11 	 Validation loss: 0.562381 	 Best loss: 0.562381 	 Accuracy: 0.793333
12 	 Validation loss: 0.557183 	 Best loss: 0.557183 	 Accuracy: 0.806667
13 	 Validation loss: 0.552812 	 Best loss: 0.552812 	 Accuracy: 0.813333
14 	 Validation loss: 0.549086 	 Best loss: 0.549086 	 Accuracy: 0.82
15 	 Validatio

## HW 3.4 - Bonus

### Model

### Training