In [6]:
######################
#Brief:
    # 5 layer DNN ,128 neurons per layer
    # implement:
    #  batch input(for GPU optimize)
    #  N-fold cross validation
    #  print accuacy,precision,recall
    #  Dropout
    #  Optimize:
    #  batch_norm,specify momentum parameter of Adamoptimizer
#input:
    # mnist hand writing dataset 28*28 image(0~5 only)
    # output: 0~5 predicted number
#Result:
    # We found that 5 layers is unnecessary,the network can 
    # perform well (faster) with only 2 layers
#Training process:
    #input 0~5 hand writting image and it's labels from MNIST
    #image will be flatten to 1x784 array 
    #we preprocess the input with batch norm and feed them into 5 layers DNN 
    #Since ELU activation func is applied ,we use He initialize can prevent gradient from vanish
    #The softmax function will highlight the maximun output and also keep some information of other
    #which will make backward propogation faster and also ensure no weight become starvation
    #The dropout technic will prevent lazy neurons
    #We analyze accuracy,precision,recall by calculating the TP FP TN FN of each label
    #The N-fold cross validation provide us a more reliable way to analyze data
####################
import numpy as np
import os
import tensorflow as tf
from tensorflow.contrib.framework import arg_scope
from tensorflow.contrib.layers import fully_connected, batch_norm

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5] #(28038,784)
y_train1 = mnist.train.labels[mnist.train.labels < 5] #(28038,)

X_valid1 = mnist.validation.images[mnist.validation.labels < 5] #(2558,784)
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5] #(2558,) 

X_test1 = mnist.test.images[mnist.test.labels < 5] #(5139,784)
y_test1 = mnist.test.labels[mnist.test.labels < 5] #(5139,)

###### Do not modify here ###### 

#get next batch in order
def next_batch(batch_size,iteration, data, labels):
    start = batch_size*iteration
    end = batch_size*(iteration+1)
    return data[start:end], labels[start:end]

#const parameters
n_inputs = 784  # MNIST
n_outputs = 5

#adjustable parameters
N_neurons = 128
learning_rate = 0.01
momentum = 0.25
epochs = 10
batch_size = 128  #for GPU optimize
dropout = 0.5

# Create the model
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")


he_init = tf.contrib.layers.variance_scaling_initializer()
#improving traning speed
batch_norm_params = {
    'decay': 0.9,
    'updates_collections': None,
    'scale': True,
}


#5 fully connected layer ,128 neurons per layer with dropout
# with arg_scope(
#         [fully_connected],
#         activation_fn=tf.nn.elu,
#         weights_initializer=he_init,
#         normalizer_fn=batch_norm,
#         normalizer_params=batch_norm_params):
#     W1 = fully_connected(X,N_neurons)
#     W1_D = tf.nn.dropout(W1,dropout)
#     W2 = fully_connected(W1_D,N_neurons)
#     W2_D = tf.nn.dropout(W2,dropout)
#     W3 = fully_connected(W2_D,N_neurons)
#     W3_D = tf.nn.dropout(W3,dropout)
#     W4 = fully_connected(W3_D,N_neurons)
#     W4_D = tf.nn.dropout(W4,dropout)
#     W5 = fully_connected(W4_D,N_neurons)
#     W5_D = tf.nn.dropout(W5,dropout)
#     y_hat = fully_connected(W5_D, n_outputs)

# W1 = tf.layers.dense(X,N_neurons,activation=tf.nn.elu,kernel_initializer=he_init,name = 'hidden1_in')
# Wtest = tf.Variable(tf.random_normal(shape=[N_neurons]), name='test',initial_value=he_init)
W1 = tf.get_variable("dnn_in", shape=[784, N_neurons],initializer=he_init)
W1 = tf.nn.elu(tf.matmul(X,W1),name = "hidden1_in")
W1_B = tf.contrib.layers.batch_norm(W1)
W1_D = tf.nn.dropout(W1_B,dropout)

# W2 = tf.layers.dense(W1_D,N_neurons,activation=tf.nn.elu,kernel_initializer=he_init,name = 'hidden1_out')
W2 = tf.get_variable("hidden1", shape=[N_neurons, N_neurons],initializer=he_init)
W2 =  tf.nn.elu(tf.matmul(W1_D,W2),name="hidden1_out")
W2_B = tf.contrib.layers.batch_norm(W2)
W2_D = tf.nn.dropout(W2_B,dropout)
# W3 = tf.layers.dense(W2_D,N_neurons
#                      ,activation=tf.nn.elu,kernel_initializer=he_init,name = 'hidden2_out')

W3 = tf.get_variable("hidden2", shape=[N_neurons, N_neurons],initializer=he_init)
W3 =  tf.nn.elu(tf.matmul(W2_D,W3),name="hidden2_out")
W3_B = tf.contrib.layers.batch_norm(W3)
W3_D = tf.nn.dropout(W3_B,dropout)

# W4 = tf.layers.dense(W3_D,N_neurons,activation=tf.nn.elu,kernel_initializer=he_init,name = 'hidden3_out')
W4 = tf.get_variable("hidden3", shape=[N_neurons, N_neurons],initializer=he_init)
W4 = tf.nn.elu( tf.matmul(W3_D,W4),name="hidden3_out")
W4_B = tf.contrib.layers.batch_norm(W4)
W4_D = tf.nn.dropout(W4_B,dropout)

# W5 = tf.layers.dense(W4_D,N_neurons,activation=tf.nn.elu,kernel_initializer=he_init,name = 'hidden4_out')
W5 = tf.get_variable("hidden4", shape=[N_neurons, N_neurons],initializer=he_init)
W5 =  tf.nn.elu(tf.matmul(W4_D,W5),name="hidden4_out")
W5_B = tf.contrib.layers.batch_norm(W5)
W5_D = tf.nn.dropout(W5_B,dropout)

y_h = tf.get_variable( "hidden5",shape=[N_neurons,n_outputs],initializer=he_init)
y_hat = tf.matmul(W5_D,y_h, name="hidden5_out")
# y_hat = tf.layers.dense(W5_D,n_outputs,activation=None,kernel_initializer=he_init,name = 'hidden5_out')

#Add softmax to output

my_logits = tf.layers.dense(y_hat,n_outputs,kernel_initializer = he_init,name='logits')
y_proba = tf.nn.softmax(my_logits ,name  = 'Y_proba')
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=my_logits)
loss = tf.reduce_mean(cross_entropy, name="loss")

#use AdamOptimizer
optimizer = tf.train.AdamOptimizer(learning_rate, momentum)
training_op = optimizer.minimize(loss,name = "training_op")

correct = tf.nn.in_top_k(my_logits,y,1)
accuracy = tf.reduce_mean(tf.cast(correct,tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    for epoch in range(epochs):
        for iteration in range(len(X_train1)//batch_size):
            X_batch, y_batch = next_batch(batch_size,iteration,X_train1, y_train1)
            sess.run(training_op, feed_dict={ X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={ X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={ X:  X_valid1, y:  y_valid1})
        print(epoch, "Train accuracy:", acc_train, "Valid accuracy:", acc_valid)
    save_path = saver.save(sess, "./model/Team11_HW2.ckpt")
    acc_test = accuracy.eval(feed_dict={ X:  X_test1, y:  y_test1})
    print( "Test accuracy:", acc_test)


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
0 Train accuracy: 0.953125 Valid accuracy: 0.960907
1 Train accuracy: 0.976563 Valid accuracy: 0.970289
2 Train accuracy: 0.992188 Valid accuracy: 0.972244
3 Train accuracy: 0.976563 Valid accuracy: 0.979672
4 Train accuracy: 0.992188 Valid accuracy: 0.976153
5 Train accuracy: 0.984375 Valid accuracy: 0.980844
6 Train accuracy: 0.992188 Valid accuracy: 0.980844
7 Train accuracy: 0.976563 Valid accuracy: 0.981235
8 Train accuracy: 0.984375 Valid accuracy: 0.979281
9 Train accuracy: 0.976563 Valid accuracy: 0.984363
Test accuracy: 0.985017
