# Chapter 10: Introduction to Artificial Neural Network

This chapter introduces how to build a MLP using TensorFlow, both with higher level APIs and lower level operators. For advance usage tutorials on TensorFlow, check out the official documentation or other tutorials.

> This jupyter notebook contains my own solution to the coding exercises of the book. For answers to the questions, plase refer to the markdown file under the same folder.

## MLP on MNIST
Requirement: Train a deep MLP on MNIST dataset, and try to get accuracy higher than 98%. Also, try to add some additional functions, such as saving checkpoint, resuming training, adding summary and plotting learning curve, etc.

The complete code is shown below. It is similar to the code of proceeding moon dataset, but the structure of MLP is modified to conform to the requirement of MNIST dataset.

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
import os
from datetime import datetime

# prepare for dataset
dataset = np.load("/Users/Antinomy/.keras/datasets/mnist.npz")
X_train, y_train, X_test, y_test = dataset["x_train"], dataset["y_train"], dataset["x_test"], dataset["y_test"]
X_train = np.reshape(X_train, (-1, 28*28))
X_test = np.reshape(X_test, (-1, 28*28))

# set up hyperparameters
batch_size = 64
num_epochs = 3000
learning_rate = 0.001
num_hidden_1 = 500
num_hidden_2 = 300

num_train_samples, num_features = X_train.shape[0], X_train.shape[1]
num_batches = int(np.ceil(num_train_samples / batch_size))
current_epoch = -1
random_indexes = np.arange(num_train_samples)
np.random.shuffle(random_indexes)

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "/Users/Antinomy/Desktop/tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

# define computation graph
# encapsulation of tensorflow API
def fully_connected_layer(X, num_neurons, activation="sigmoid"):
    with tf.name_scope("fully_connected_layer"):
        layer_output = fully_connected(X, num_neurons, scope="")
        return layer_output

input_tensor = tf.placeholder(tf.float32, shape=(None, num_features), name="input")
label = tf.placeholder(tf.int32, shape=(None,), name="label")
with tf.name_scope("MLP") as scope:
    hidden_1 = fully_connected(input_tensor, num_hidden_1, scope="hidden_1")
    hidden_2 = fully_connected(hidden_1, num_hidden_2, scope="hidden_2")
    logits = fully_connected(hidden_2, 10, scope="outputs", activation_fn=None)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

loss_summary = tf.summary.scalar("crossentropy", loss)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)

    if os.path.exists("/Users/Antinomy/Desktop/tmp/checkpoint"):
        saver.restore(sess, "/Users/Antinomy/Desktop/tmp/my_model.ckpt")

    for i in range(num_epochs):
        for j in range(num_batches):
            if i != current_epoch:
                random_indexes = np.arange(num_train_samples)
                np.random.shuffle(random_indexes)
                current_epoch += 1

            selected_index = random_indexes[j*batch_size:(j+1)*batch_size]
            X_train_batch, y_train_batch = X_train[selected_index], y_train[selected_index]
            if j % 10 == 0:
                summary_str = loss_summary.eval(feed_dict={input_tensor: X_train_batch, label: y_train_batch})
                step = i*num_batches + j
                file_writer.add_summary(summary_str, step)

            sess.run(training_op, feed_dict={input_tensor: X_train_batch, label: y_train_batch})

        print("---------- Epoch %d ----------" % (i))
        print("Loss:", loss.eval(feed_dict={input_tensor: X_train_batch, label: y_train_batch}))
        save_path = saver.save(sess, "/Users/Antinomy/Desktop/tmp/my_model.ckpt")

    saver.save(sess, "/Users/Antinomy/Desktop/tmp/my_final_model.ckpt")
    file_writer.close()

    # check the accuracy of trained model on train and test set
    predictions = np.argmax(logits.eval(feed_dict={input_tensor: X_test, label: y_test}), axis=1)
    print("========== Model Performance ==========")
    print("Accuracy:", accuracy_score(y_test, predictions))
    print("AUC:", roc_auc_score(y_test, predictions))
