In [17]:
import numpy as np
import os
import tensorflow as tf
import math

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data

print("Loading Data...")
mnist = input_data.read_data_sets("/tmp/data/")
print("Loading finish!")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

# Put training & validation data together 
X_tot = np.concatenate([X_train1, X_valid1], 0)
y_tot = np.concatenate([y_train1, y_valid1], 0)

# add one more layer and return the output of this layer
def add_layer(name, inputs, in_size, out_size, dropout_rate, activation_function=None):
    Weights = tf.get_variable(name, shape=[in_size, out_size], initializer=tf.contrib.layers.variance_scaling_initializer())
    Wx = tf.matmul(inputs, Weights)
    if activation_function is None:
        outputs = Wx
    else:
        outputs = activation_function(Wx)
    outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=True)  # Dropout layer
    return outputs

# Split training and validation set for cross-validation
# X is training+validation dataset, y is training+validation label, val_ratio is 1/number of fold
def train_val_split(X, y, val_ratio, index):
    size,_ = X.shape
    set_size = math.floor(size*val_ratio) # get the set size for validation
    indices = np.arange(set_size*index,set_size*(index+1)) # index for validation
    X_val = X[indices, :] # pick validation set from total set by index
    X_train = np.delete(X, indices, axis=0) # others are training set
    y_val = y[indices] # pick validation label from total set by index
    y_train = np.delete(y, indices, axis=0) # others are training label
    return X_train, X_val, y_train, y_val

# Print out total accuracy rate & precision & recall
def print_accuracy_precision_recall(X_input, y_label, y_predict, accuracy):
    print("Total accuracy:", accuracy)

    y_value = tf.argmax(y_predict, 1)
    false_array = -1*tf.ones(tf.shape(ys), tf.int64)  # Create an array with -1
    print("label", " Precision      ", "Recall")

    for i in range(0, 5):
        condition = tf.equal(y_value, i) # Detect which predicted value is the number we want (0~4)
        prec_total_num = tf.reduce_sum(tf.cast(condition, tf.int32)) # Count the number we want (0~4)
        indices = tf.where(condition, x = ys, y = false_array) # Check whether the corresponding index is the same, same: keep it, not same: -1
        prec_count = tf.reduce_sum(tf.cast(tf.equal(indices, i), tf.int32)) # Count the number which is not -1

        condition = tf.equal(ys, i)
        recall_total_num = tf.reduce_sum(tf.cast(condition, tf.int32))
        indices = tf.where(condition, x = y_value, y = false_array)
        recall_count = tf.reduce_sum(tf.cast(tf.equal(indices, i), tf.int32))
        print(i, "   :", sess.run(prec_count/prec_total_num, feed_dict = {xs: X_test1,  ys: y_test1}), 
              sess.run(recall_count/recall_total_num, feed_dict = {xs: X_test1,  ys: y_test1}))


depth = 1
_, dim = X_train1.shape # image resolution
learning_rate = 0.001
batch_size = 256
validation_fold = 5 
dropout_rate = 0.1
accuracy_threshold = 0.95
training_threshold = 200

# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, dim])
ys = tf.placeholder(tf.int64, [None, ])

# add hidden layer, there are five layers
l1 = add_layer("1",xs, dim, 128, dropout_rate, activation_function=tf.nn.elu)
l2 = add_layer("2", l1, 128, 128, dropout_rate, activation_function=tf.nn.elu)
l3 = add_layer("3", l2, 128, 128, dropout_rate, activation_function=tf.nn.elu)
l4 = add_layer("4", l3, 128, 128, dropout_rate, activation_function=tf.nn.elu)
l5 = add_layer("5", l4, 128, 128, dropout_rate, activation_function=tf.nn.elu)

# output layer
y = tf.nn.softmax(l5)

accuracy_rate = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), ys), tf.float32)) # Calculate accuracy rate

cross_entropy = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = ys, logits = y)) # Cost
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) # Optimizer

rate_sum = 0 # Used to calculate average accuracy rate for cross-validation
sess = tf.InteractiveSession()
saver = tf.train.Saver()

# Start training
for j in range(validation_fold):
    print("Fold:", j+1)
    tf.global_variables_initializer().run() # Initialize variables because different validation set need new weights
    X_train, X_val, y_train, y_val = train_val_split(X_tot, y_tot, 1/validation_fold, j) # Split train and validation set
    train_size = X_train.shape[0]

    epoch = 0
    while True:
        for i in range(int(train_size/batch_size)):
            # get batch data each time
            batch_x = X_train[i*batch_size:(i+1)*batch_size,:]
            batch_y = y_train[i*batch_size:(i+1)*batch_size]
            # training
            sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y})
        accuracy = sess.run(accuracy_rate, feed_dict={xs: X_val, ys: y_val})
        epoch = epoch + 1
        
        # Early stop condition
        if(accuracy > accuracy_threshold):
            print("Accuracy rate is larger than", accuracy_threshold, "in Epoch", epoch, ". Early stop!!!")
            save_path = saver.save(sess, "/tmp/Team48_HW2.ckpt") # save variables to checkpoint
            break
        if(epoch > training_threshold):
            print("Training over", training_threshold, "times. Early stop!!!")
            save_path = saver.save(sess, "/tmp/Team48_HW2.ckpt") # save variables to checkpoint
            break
    
    print_accuracy_precision_recall(X_val, y_val, y, accuracy)
    rate_sum += accuracy  # accumulate accuracy rate
    
print("Average accuracy rate:", rate_sum/validation_fold) # Show average accuracy rate for each cross-validation fold

Loading Data...
Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Loading finish!
Fold: 1
Accuracy rate is larger than 0.95 in Epoch 21 . Early stop!!!
Total accuracy: 0.958327
label  Precision       Recall
0    : 0.944444444444 0.989795918367
1    : 0.972878390201 0.976211453744
2    : 0.959325396825 0.932170542636
3    : 0.947731755424 0.957425742574
4    : 0.981934112646 0.956211812627
Fold: 2
Accuracy rate is larger than 0.95 in Epoch 20 . Early stop!!!
Total accuracy: 0.952934
label  Precision       Recall
0    : 0.953187250996 0.967346938776
1    : 0.965397923875 0.979735682819
2    : 0.935770750988 0.911821705426
3    : 0.943137254902 0.955445544554
4    : 0.964984552008 0.957230142566
Fold: 3
Accuracy rate is larger than 0.95 in Epoch 18 . Early stop!!!
Total accuracy: 0.955549
label  Precision       Recall
0    : 0.951533135509 0.986734693

In [18]:
print("For testing set:")
# Print out Total accuracy rate & precision & recall for testing data set
accuracy = sess.run(accuracy_rate, feed_dict={xs: X_test1, ys: y_test1})
print_accuracy_precision_recall(X_test1, y_test1, y, accuracy)

For testing set:
Total accuracy: 0.953687
label  Precision       Recall
0    : 0.931533269045 0.982653061224
1    : 0.963318777293 0.974449339207
2    : 0.946946946947 0.914728682171
3    : 0.938953488372 0.954455445545
4    : 0.989350372737 0.938900203666


# Explanation
I constructed the Neural Network model based on the spec.

I created a function "add_layer" to build hidden layer. For each layer, there are Weight but no bias. I found the result without bias is better.

Next, I use "sparse_softmax_cross_entropy_with_logits" as my loss function, and use "AdamOptimizer" to update my Weight. Here, my learning rate for Optimizer is 0.001.

For training process, I divided dataset to several minibatch to train, and I set Early stop standard. If the accuracy rate is over accuracy rate threshold, stop training. Besides, I applied 5 fold cross-validation. I combined training & validation dataset and split them to 5 parts. After training, I will get an average accuracy rate.

At last, I will apply the model to testing datsets and get the final accuracy rate & precision & recall as shown at the last.