## Notebook settings

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import KFold  # import sklearn Kfold to implement cross-validation


# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

## Prepare data

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5

X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5

X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

## Hyper parameters

In [4]:
lr = 5e-3
batch_size = 16
epochs = 1000
saturate_limit = 20  # for applying early stopping
fold_num = 5

## HW 3.1 - Softmax-only transfer-learning

### Model

In [5]:
pretrained_weights_path = './pretrained_hw2_weights/Team59_HW2.ckpt.meta'

inputs_num = 784
outputs_num = 5

tf.reset_default_graph()
saver = tf.train.import_meta_graph(pretrained_weights_path)
graph = tf.get_default_graph()

X = graph.get_tensor_by_name('X:0')
y = graph.get_tensor_by_name('y:0')
loss_function = graph.get_tensor_by_name('loss:0')
Y_prob = graph.get_tensor_by_name('Y_probability:0')
logits = Y_prob.op.inputs[0]
accuracy = graph.get_tensor_by_name('accuracy:0')

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='logits')
optimizer = tf.train.AdamOptimizer(learning_rate=lr, name='op_HW3-1')
training_op = optimizer.minimize(loss_function, var_list=output_layer_vars)


In [6]:
# import sklearn Kfold to implement cross-validation
from sklearn.model_selection import KFold

# Merge training and validation set for K-fold cross validation
X_data = np.concatenate((X_train2, X_valid2), axis=0)
y_data = np.concatenate((y_train2, y_valid2), axis=0)

# 10-folder
kf = KFold(n_splits=fold_num, shuffle=True)

for fold_index, fold_indices in enumerate(kf.split(X_data), 1):
    # get data of this fold
    train_fold_indices, test_fold_indices = fold_indices
    X_train_fold, X_valid_fold = X_data[train_fold_indices], X_data[test_fold_indices]
    y_train_fold, y_valid_fold = y_data[train_fold_indices], y_data[test_fold_indices]
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # parameters
        saturate_count = 0
        best_acc = 0.
        best_loss = 1000.
        best_epoch = -1
        iterations = int(X_train_fold.shape[0] / batch_size)
        
        # training for number of epochs times
        for e in range(1, epochs + 1):
            for i in range(iterations):
                if(i==0):
                    batch_x = X_train_fold[:batch_size]
                    batch_y = y_train_fold[:batch_size]
                else:
                    batch_x = X_train_fold[i * batch_size : (i + 1) * batch_size]
                    batch_y = y_train_fold[i * batch_size : (i + 1) * batch_size]
                    
                sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
            # validate
            loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid_fold, y: y_valid_fold})
        
            if best_loss > loss:
                best_acc = acc
                best_loss = loss
                best_epoch = e
                saturate_count = 0
            else:
                saturate_count += 1

                if saturate_count >= saturate_limit:  # stop if saturate
                    break
    
        # print the best result of this fold cross all epochs
        print('*' * 60)
        print('Fold:', fold_index)
        print('Best epoch:', best_epoch)
        print('Best accurancy:', best_acc)
        print('Best loss:', best_loss)
        print('*' * 60, '\n')
    
        # print the test data accurancy
        print('=' * 60)
        test_acc = sess.run(accuracy,feed_dict={X: X_test2,
                                                y: y_test2})
        print("Test data accurancy", test_acc)
        print('=' * 60, '\n')
        

************************************************************
Fold: 1
Best epoch: 37
Best accurancy: 0.861538
Best loss: 0.36461
************************************************************ 

Test data accurancy 0.819173

************************************************************
Fold: 2
Best epoch: 73
Best accurancy: 0.876923
Best loss: 0.362955
************************************************************ 

Test data accurancy 0.827402

************************************************************
Fold: 3
Best epoch: 68
Best accurancy: 0.823077
Best loss: 0.474092
************************************************************ 

Test data accurancy 0.828225

************************************************************
Fold: 4
Best epoch: 101
Best accurancy: 0.884615
Best loss: 0.322719
************************************************************ 

Test data accurancy 0.832545

************************************************************
Fold: 5
Best epoch: 56
Best accurancy: 0.869231
Bes

## HW 3.2 - Caching the 5th layer

## HW 3.3 - 4-layers-only transfer-learning

## HW 3.4 - Bonus