In [14]:
######################
#Brief:
# 5 layer DNN ,128 neurons per layer
# implement:
#  batch input(for GPU optimize)
#  N-fold cross validation
#  print accuacy,precision,recall
#  Dropout
#  Optimize:
#  batch_norm,specify momentum parameter of Adamoptimizer
#input:
# mnist hand writing dataset 28*28 image(0~5 only)
# output: 0~5 predicted number
# Result:
# We found that
####################
import numpy as np
import os
import tensorflow as tf
from tensorflow.contrib.framework import arg_scope
from tensorflow.contrib.layers import fully_connected, batch_norm

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5] #(28038,784)
y_train1 = mnist.train.labels[mnist.train.labels < 5] #(28038,)

X_valid1 = mnist.validation.images[mnist.validation.labels < 5] #(2558,784)
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5] #(2558,) 

X_test1 = mnist.test.images[mnist.test.labels < 5] #(5139,784)
y_test1 = mnist.test.labels[mnist.test.labels < 5] #(5139,)

###### Do not modify here ###### 

#get next batch in order
def next_batch(batch_size,iteration, data, labels):
    start = batch_size*iteration
    end = batch_size*(iteration+1)
    return data[start:end], labels[start:end]

#const parameters
n_inputs = 784  # MNIST
n_outputs = 5

#adjustable parameters
N_neurons = 128
learning_rate = 0.01
momentum = 0.25
epochs = 2
batch_size = 128  #for GPU optimize
N_fold = 3
dropout = 0.5

# Create the model
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
is_training = tf.placeholder(tf.bool, shape=(), name='is_training')


he_init = tf.contrib.layers.variance_scaling_initializer()
#improving traning speed
batch_norm_params = {
    'is_training': is_training,
    'decay': 0.9,
    'updates_collections': None,
    'scale': True,
}


#5 fully connected layer ,128 neurons per layer with dropout
with arg_scope(
        [fully_connected],
        activation_fn=tf.nn.elu,
        weights_initializer=he_init,
        normalizer_fn=batch_norm,
        normalizer_params=batch_norm_params):
    W1 = fully_connected(X,N_neurons)
    W1_D = tf.nn.dropout(W1,dropout)
    W2 = fully_connected(W1_D,N_neurons)
    W2_D = tf.nn.dropout(W2,dropout)
    W3 = fully_connected(W2_D,N_neurons)
    W3_D = tf.nn.dropout(W3,dropout)
    W4 = fully_connected(W3,N_neurons)
    W4_D = tf.nn.dropout(W4,dropout)
    W5 = fully_connected(W4_D,N_neurons)
    W5_D = tf.nn.dropout(W5,dropout)
    y_hat = fully_connected(W5_D, n_outputs, activation_fn=None)


#Add softmax to output
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_hat)
loss = tf.reduce_mean(cross_entropy, name="loss")

#use AdamOptimizer
optimizer = tf.train.AdamOptimizer(learning_rate, momentum)
train_op = optimizer.minimize(loss)

#use for calculate TP TN FP FN
y_hat_argmax = tf.argmax(y_hat,1) 

init = tf.global_variables_initializer()
saver = tf.train.Saver()

#START N-fold version
from sklearn.model_selection import KFold


k_fold = KFold(n_splits=N_fold)
X_total = np.concatenate((X_train1 ,X_valid1),axis = 0)
y_total = np.concatenate((y_train1 ,y_valid1),axis = 0)
fold_idx = 0


#parameters for caculating accuracy,precision and recall
ACC = np.zeros([N_fold,5])
PRE = np.zeros([N_fold,5])
REC = np.zeros([N_fold,5])
TP = np.zeros(5)
TN = np.zeros(5)
FP = np.zeros(5)
FN = np.zeros(5)

with tf.Session() as sess:
    for train_index, valid_index in k_fold.split(X_total):
        init.run()
        print("fold:",fold_idx)
        for epoch in range(epochs):
            for iteration in range(len(train_index)//batch_size):
                X_batch, y_batch = next_batch(batch_size,iteration,X_total[train_index], y_total[train_index])
                sess.run(train_op, feed_dict={is_training: True, X: X_batch, y: y_batch})
            
            print("epoch:",epoch)
            y_predict = y_hat_argmax.eval(feed_dict={is_training: False, X: X_total[valid_index], y: y_total[valid_index]})
            TP.fill(0)
            TN.fill(0)
            FP.fill(0)
            FN.fill(0)
            for i in range(len(y_predict)):
                #the if else can be simplified , but I prefer this since it's more readable
                #if predict right 
                #increment TP[predict_y] with 1
                #increment TN[other] with 1
                #if wrong
                #increment FP[predict_y] with 1
                #increment FN[y] with 1
                #increment FN[y] with 1
                #increment TN[other] with 1
                if y_predict[i] ==  y_total[valid_index[i]]:
                    TP[y_predict[i]]+=1
                    TN+=1
                    TN[y_predict[i]]-=1
                else:
                    FP[y_predict[i]]+=1
                    FN[y_total[i]]+=1
                    TN+=1
                    TN[y_predict[i]]-=1
                    TN[y_total[i]]-=1
            print("label","\t","accuracy","\t","precision","\t","recall")
            for i in range(5):
                ACC[fold_idx,i] = round((TP[i]+TN[i])/(TP[i]+TN[i]+FP[i]+FN[i]),3)
                PRE[fold_idx,i] = round((TP[i])/(TP[i]+FP[i]),3)
                REC[fold_idx,i] = round((TP[i])/(TP[i]+FN[i]),3)
                print(i,"\t",ACC[fold_idx,i],"\t\t",PRE[fold_idx,i],"\t\t",REC[fold_idx,i])
        fold_idx+=1
         
    print("average result:")
    print("label","\t","accuracy","\t","precision","\t","recall")
    for i in range(5):
        print(i,"\t",np.mean(ACC,0)[i],"\t\t",np.mean(PRE,0)[i],"\t\t",np.mean(REC,0)[i])
    save_path = saver.save(sess, "./model/Team11_HW2.ckpt")

#END N-fold version

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
fold: 0
epoch: 0
label 	 accuracy 	 precision 	 recall
0 	 0.99 		 0.964 		 0.983
1 	 0.983 		 0.944 		 0.983
2 	 0.968 		 0.948 		 0.882
3 	 0.973 		 0.927 		 0.941
4 	 0.988 		 0.975 		 0.961


KeyboardInterrupt: 