## Notebook settings

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import KFold  # import sklearn Kfold to implement cross-validation


# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

## Prepare data

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [4]:
y_train_one_hot = np.zeros((len(y_train2), 5))
y_train_one_hot[np.arange(len(y_train2)), y_train2] = 1

y_valid_one_hot = np.zeros((len(y_valid2), 5))
y_valid_one_hot[np.arange(len(y_valid2)), y_valid2] = 1

y_test_one_hot = np.zeros((len(y_test2), 5))
y_test_one_hot[np.arange(len(y_test2)), y_test2] = 1

print ("Train Label: ", y_train_one_hot.shape)
print ("Validation Label: ", y_valid_one_hot.shape)
print ("Test Label: ", y_test_one_hot.shape)

Train Label:  (500, 5)
Validation Label:  (150, 5)
Test Label:  (4861, 5)


## Model

In [5]:
def layer(output_dim, input_dim, inputs, layer, dropout_rate, activation=None):
    """
    layer construction function
    """
    W = tf.get_variable("W" + layer, shape=[input_dim, output_dim], initializer=tf.contrib.layers.variance_scaling_initializer())
    b = tf.get_variable("b" + layer, shape=[1, output_dim], initializer=tf.contrib.layers.variance_scaling_initializer())

    XWb = tf.matmul(inputs, W) + b
    XWb = tf.nn.dropout(XWb, (1-dropout_rate))
    if activation is None:
        outputs = XWb
    else:
        outputs = activation(XWb)
    return outputs

class MyModel():
    def __init__(self):
        self.define_layers()
        
    def define_layers(self):
        self.x = tf.placeholder("float", [None, 784])  # input layer
        
        # hidden layers
        self.h1 = layer(output_dim=128, input_dim=784, inputs=self.x, layer="h1", dropout_rate=0, activation=tf.nn.elu)
        self.h2 = layer(output_dim=128, input_dim=128, inputs=self.h1, layer="h2", dropout_rate=0, activation=tf.nn.elu)
        self.h3 = layer(output_dim=128, input_dim=128, inputs=self.h2, layer="h3", dropout_rate=0, activation=tf.nn.elu)
        self.h4 = layer(output_dim=128, input_dim=128, inputs=self.h3, layer="h4", dropout_rate=0, activation=tf.nn.elu)
        self.h5 = layer(output_dim=128, input_dim=128, inputs=self.h4, layer="h5", dropout_rate=0, activation=tf.nn.elu)
        
        # output layers
        self.y_predict = layer(output_dim=5, input_dim=128, inputs=self.h5, layer="output", dropout_rate=0, activation=None)
        self.y_label = tf.placeholder("float", [None, 5])
        
model = MyModel()


## Play on "MyModel"

In [8]:
# hyper parameters
lr = 0.005
batch_size = 256
epochs = 350
saturate_limit = 20  # for applying early stopping
fold_num = 10

# parameters
iterations = int(X_train2.shape[0] / batch_size)
saturate_count = 0
best_acc = 0.
best_epoch = -1

# compare prediction and label
correct_prediction = tf.equal(tf.argmax(model.y_label, 1), tf.argmax(model.y_predict, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# merge the train and validation list
X_data = np.concatenate((X_train2, X_valid2), axis=0)
y_data = np.concatenate((y_train_one_hot, y_valid_one_hot), axis=0)

# set the loss function; tf.nn.sparse_softmax_cross_entropy_with_logits required by the homework spec
loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model.y_predict, labels=model.y_label))

# set the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=lr) \
                    .minimize(loss_function)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for e in range(epochs):
        for i in range(iterations):
            if(i==0):
                batch_x = X_train2[:batch_size]
                batch_y = y_train_one_hot[:batch_size]
            else:
                batch_x = X_train2[i * batch_size : (i + 1) * batch_size]
                batch_y = y_train_one_hot[i * batch_size : (i + 1) * batch_size]

            sess.run(optimizer, feed_dict={model.x: batch_x, model.y_label: batch_y})
        
        # validate
        loss, acc = sess.run([loss_function, accuracy], feed_dict={model.x: X_valid2, model.y_label: y_valid_one_hot})
        
        if best_acc < acc:
            best_acc = acc
            best_epoch = e
            saturate_count = 0
        else:
            saturate_count += 1
            
            if saturate_count >= saturate_limit:  # stop if saturate
                break        
        
    print('*' * 60)
    print('Best epoch:', best_epoch)
    print('Best accurancy:', best_acc)
    print('*' * 60, '\n')
    
    print('=' * 60)
    print("Accurancy", sess.run(accuracy,
                               feed_dict={model.x: X_test2,
                                          model.y_label: y_test_one_hot}))
    print('=' * 60, '\n')


************************************************************
Best epoch: 26
Best accurancy: 0.586667
************************************************************ 

Accurancy 0.556264



## HW 3.1 - Softmax-only transfer-learning

## HW 3.2 - Caching the 5th layer

## HW 3.3 - 4-layers-only transfer-learning

## HW 3.4 - Bonus