## Download the data

In [1]:
import numpy as np
import os
import tensorflow as tf
import pandas as pd

if 'session' in locals() and session is not None:
    print('Close interactive session')
    session.close()

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]
###### Do not modify here ###### 


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


# Get the data shape

In [2]:
print('Train', mnist.train.num_examples, 
      'Validation', mnist.validation.num_examples,
      'Test', mnist.test.num_examples)

print('Train images :', X_train1.shape,
      'Labels :', y_train1.shape)

print('Validation images :', X_valid1.shape,
      'Labels :', y_valid1.shape)

print('Test images :', X_test1.shape,
      'Labels :', y_test1.shape)

Train 55000 Validation 5000 Test 10000
Train images : (28038, 784) Labels : (28038,)
Validation images : (2558, 784) Labels : (2558,)
Test images : (5139, 784) Labels : (5139,)


## Hyper parameters

In [3]:
# hyper parameters
lr = 0.005
batch_size = 256
epochs = 350
saturate_limit = 20  # for applying early stopping
fold_num = 10


## Model

In [4]:
inputs_num = 784
outputs_num = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

def dnn(X):
    # hidden layers
    dense1 = tf.layers.dense(inputs=X, units=128, activation=tf.nn.elu, kernel_initializer=he_init, name='dense1')
    dense2 = tf.layers.dense(inputs=dense1, units=128, activation=tf.nn.elu, kernel_initializer=he_init, name='dense2')
    dense3 = tf.layers.dense(inputs=dense2, units=128, activation=tf.nn.elu, kernel_initializer=he_init, name='dense3')
    dense4 = tf.layers.dense(inputs=dense3, units=128, activation=tf.nn.elu, kernel_initializer=he_init, name='dense4')
    dense5 = tf.layers.dense(inputs=dense4, units=128, activation=tf.nn.elu, kernel_initializer=he_init, name='dense5')
    return dense5
        
X = tf.placeholder(tf.float32, shape=(None, inputs_num), name='X')  # input layer
y = tf.placeholder(tf.int64, shape=(None), name='y')  # label

dnn_outputs = dnn(X)

#import pdb; pdb.set_trace()
logits = tf.layers.dense(inputs=dnn_outputs, units=outputs_num, activation=None, kernel_initializer=he_init, name='logits')
Y_probability = tf.nn.softmax(logits, name='Y_probability')

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_function = tf.reduce_mean(cross_entropy, name='loss')

optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(loss_function, name='training_op')

correct = tf.nn.in_top_k(logits, y, k=1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')


## Training

In [5]:
# import sklearn Kfold to implement cross-validation
from sklearn.model_selection import KFold

# Merge training and validation set for K-fold cross validation
X_data = np.concatenate((X_train1, X_valid1), axis=0)
y_data = np.concatenate((y_train1, y_valid1), axis=0)


# 10-folder
kf = KFold(n_splits=fold_num, shuffle=True)

# To store the model
save_acc = 0.
saver = tf.train.Saver()

for fold_index, fold_indices in enumerate(kf.split(X_data), 1):
    # get data of this fold
    train_fold_indices, test_fold_indices = fold_indices
    X_train_fold, X_valid_fold = X_data[train_fold_indices], X_data[test_fold_indices]
    y_train_fold, y_valid_fold = y_data[train_fold_indices], y_data[test_fold_indices]
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # parameters
        saturate_count = 0
        best_acc = 0.
        best_loss = 1000.
        best_epoch = -1
        iterations = int(X_train_fold.shape[0] / batch_size)
        
        # training for number of epochs times
        for e in range(1, epochs + 1):
            for i in range(iterations):
                if(i==0):
                    batch_x = X_train_fold[:batch_size]
                    batch_y = y_train_fold[:batch_size]
                else:
                    batch_x = X_train_fold[i * batch_size : (i + 1) * batch_size]
                    batch_y = y_train_fold[i * batch_size : (i + 1) * batch_size]
                    
                sess.run(training_op, feed_dict={X: batch_x, y: batch_y})
        
            # validate
            loss, acc = sess.run([loss_function, accuracy], feed_dict={X: X_valid_fold, y: y_valid_fold})
        
            if best_loss > loss:
                best_acc = acc
                best_loss = loss
                best_epoch = e
                saturate_count = 0
            else:
                saturate_count += 1

                if saturate_count >= saturate_limit:  # stop if saturate
                    break
    
        # print the best result of this fold cross all epochs
        print('*' * 60)
        print('Fold:', fold_index)
        print('Best epoch:', best_epoch)
        print('Best accurancy:', best_acc)
        print('Best loss:', best_loss)
        print('*' * 60, '\n')
    
        # print the test data accurancy
        print('=' * 60)
        test_acc = sess.run(accuracy,feed_dict={X: X_test1,
                                                y: y_test1})
        print("Test data accurancy", test_acc)
        print('=' * 60, '\n')
        
        if test_acc > save_acc:
            print('@' * 60)
            print("Test data accurancy", test_acc)
            print('@' * 60, '\n')
            save_acc = test_acc
            save_path = saver.save(sess, "./pretrained_hw2_weights/Team59_HW2.ckpt")
    

************************************************************
Fold: 1
Best epoch: 6
Best accurancy: 0.983007
Best loss: 0.0572412
************************************************************ 

Test data accurancy 0.987546

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Test data accurancy 0.987546
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 

************************************************************
Fold: 2
Best epoch: 18
Best accurancy: 0.99085
Best loss: 0.0389803
************************************************************ 

Test data accurancy 0.993578

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Test data accurancy 0.993578
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 

************************************************************
Fold: 3
Best epoch: 22
Best accurancy: 0.992484
Best loss: 0.0289593
************************************************************ 

Test data accurancy 0.992216

***************************