# Neural Network Example

Build a 2-hidden layers fully connected neural network (a.k.a multilayer perceptron) with TensorFlow.

- Author: Aymeric Damien
- Project: https://github.com/aymericdamien/TensorFlow-Examples/

## Neural Network Overview

<img src="http://cs231n.github.io/assets/nn1/neural_net2.jpeg" alt="nn" style="width: 400px;"/>

## MNIST Dataset Overview

This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 1. For simplicity, each image has been flatten and converted to a 1-D numpy array of 784 features (28*28).

![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png)

More info: http://yann.lecun.com/exdb/mnist/

In [1]:
from __future__ import print_function

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

import tensorflow as tf

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [53]:
# Parameters
learning_rate = 0.001
num_steps = 5000
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
# X = tf.placeholder("float", [None, num_input])
# Y = tf.placeholder("float", [None, num_classes])

In [59]:
# define layer
def batch_norm_wrapper(inputs, is_training, scale, beta, pop_mean, pop_var, decay = 0.999, epsilon = 1e-3):
    def calc_moments_in_train():
        batch_mean, batch_var = tf.nn.moments(inputs,[0])
        train_mean = tf.assign(pop_mean,
                               pop_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(pop_var,
                              pop_var * decay + batch_var * (1 - decay))
        with tf.control_dependencies([train_mean, train_var]):
            return tf.identity(batch_mean), tf.identity(batch_var)
    
    def calc_moments_in_predict():
        return pop_mean, pop_var
    
    mean_, var_ = tf.cond(is_training, 
                          calc_moments_in_train,
                          calc_moments_in_predict
                         )
    
    return tf.nn.batch_normalization(inputs,
                mean_, var_, beta, scale, epsilon)

def fc_batch_norm_relu(inputs, is_training, weight, biase, scale, beta, pop_mean, pop_var):
    fc = tf.add(tf.matmul(inputs, weight), biase)
    bn = batch_norm_wrapper(fc, is_training = is_training, scale = scale, beta = beta, pop_mean = pop_mean, pop_var = pop_var)
    outputs = tf.nn.relu(bn)
    
    return outputs

def neural_net(x, weights, biases, scales, betas, pop_means, pop_vars, is_training):
    # Hidden fully connected layer with 256 neurons
    layer_1 = fc_batch_norm_relu(x, is_training, weights['h1'], biases['b1'], 
                                 scales['scale_1'], betas['bata_1'], pop_means['pop_mean_1'], pop_vars['pop_var_1'])
    
    layer_2 = fc_batch_norm_relu(layer_1, is_training, weights['h2'], biases['b2'], 
                                 scales['scale_2'], betas['bata_2'], pop_means['pop_mean_2'], pop_vars['pop_var_2'])
    
    layer_3 = fc_batch_norm_relu(layer_2, is_training, weights['h3'], biases['b3'], 
                                 scales['scale_3'], betas['bata_3'], pop_means['pop_mean_3'], pop_vars['pop_var_3'])
    
    layer_4 = fc_batch_norm_relu(layer_3, is_training, weights['h4'], biases['b4'], 
                                 scales['scale_4'], betas['bata_4'], pop_means['pop_mean_4'], pop_vars['pop_var_4'])
    
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
    
    return out_layer

#buid_graph
def build_graph():
    feature_num = 784 # MNIST data input (img shape: 28*28)
    n_hidden_1 = 256 
    n_hidden_2 = 128
    n_hidden_3 = 64
    n_hidden_4 = 32
    num_class = 10 # MNIST total classes (0-9 digits)

    # Store layers weight & bias
    weights = {
#         'h1': tf.get_variable(name="w_h1",
#             shape=[feature_num, n_hidden_1],
#             dtype=tf.float32,
#             initializer=tf.contrib.layers.xavier_initializer()),
        
       'h1': tf.Variable(tf.random_normal([feature_num, n_hidden_1], mean=0.0, stddev = 0.1), name="w_h1"),
#         'h2': tf.get_variable(name="w_h2",
#             shape=[n_hidden_1, n_hidden_2],
#             dtype=tf.float32,
#             initializer=tf.contrib.layers.xavier_initializer()),
       'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0.0, stddev = 0.1), name="w_h2"),
#         'h3': tf.get_variable(name="w_h3",
#             shape=[n_hidden_2, n_hidden_3],
#             dtype=tf.float32,
#             initializer=tf.contrib.layers.xavier_initializer()),
       'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], mean=0.0, stddev = 0.1), name="w_h3"),
#         'h4': tf.get_variable(name="w_h4",
#             shape=[n_hidden_3, n_hidden_4],
#             dtype=tf.float32,
#             initializer=tf.contrib.layers.xavier_initializer()),
       'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], mean=0.0, stddev = 0.1), name="w_h4"),
#         'out': tf.get_variable(name="w_out",
#             shape=[n_hidden_4, num_class],
#             dtype=tf.float32,
#             initializer=tf.contrib.layers.xavier_initializer()),
       'out': tf.Variable(tf.random_normal([n_hidden_4, num_class], mean=0.0, stddev = 0.1), name="w_out")
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden_1]), name="b_1"),
        'b2': tf.Variable(tf.random_normal([n_hidden_2]), name="b_2"),
        'b3': tf.Variable(tf.random_normal([n_hidden_3]), name="b_3"),
        'b4': tf.Variable(tf.random_normal([n_hidden_4]), name="b_4"),
        'out': tf.Variable(tf.random_normal([num_class] ), name="b_out")
    }

    scales = {
        'scale_1':tf.Variable(tf.ones([n_hidden_1]), name="scale_1"),
        'scale_2':tf.Variable(tf.ones([n_hidden_2]), name="scale_2"),
        'scale_3':tf.Variable(tf.ones([n_hidden_3]), name="scale_3"),
        'scale_4':tf.Variable(tf.ones([n_hidden_4]), name="scale_4")
    }

    betas = {
        'bata_1':tf.Variable(tf.zeros([n_hidden_1]), name = "beta_1"),
        'bata_2':tf.Variable(tf.zeros([n_hidden_2]), name = "beta_2"),
        'bata_3':tf.Variable(tf.zeros([n_hidden_3]), name = "beta_3"),
        'bata_4':tf.Variable(tf.zeros([n_hidden_4]), name = "beta_4")
    }

    pop_means = {
        'pop_mean_1': tf.Variable(tf.zeros([n_hidden_1]),name = "pop_mean_1", trainable=False),
        'pop_mean_2': tf.Variable(tf.zeros([n_hidden_2]),name = "pop_mean_2", trainable=False),
        'pop_mean_3': tf.Variable(tf.zeros([n_hidden_3]),name = "pop_mean_3", trainable=False),
        'pop_mean_4': tf.Variable(tf.zeros([n_hidden_4]),name = "pop_mean_4", trainable=False)
    }

    pop_vars = {
        'pop_var_1':tf.Variable(tf.ones([n_hidden_1]),name = "pop_var_1", trainable=False),
        'pop_var_2':tf.Variable(tf.ones([n_hidden_2]),name = "pop_var_2", trainable=False),
        'pop_var_3':tf.Variable(tf.ones([n_hidden_3]),name = "pop_var_3", trainable=False),
        'pop_var_4':tf.Variable(tf.ones([n_hidden_4]),name = "pop_var_4", trainable=False)
    }

    learning_rate = tf.placeholder(tf.float32)
    is_training = tf.placeholder(tf.bool)
    
    with tf.name_scope("input"):
        X = tf.placeholder(tf.float32,[None, feature_num],name="X")
        Y = tf.placeholder(tf.float32,[None, num_class],name="Y")

    with tf.name_scope("logits"):
        logits = neural_net(X,weights, biases, scales, betas, pop_means, pop_vars, is_training)

    with tf.name_scope("loss"):
        # Define loss and optimizer
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=logits, labels=Y))

    with tf.name_scope("train_op"):
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    with tf.name_scope("eval"):
    # Evaluate model (with test logits, for dropout to be disabled)
        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    return X, Y, logits, loss, train_op, correct_pred, accuracy, learning_rate, is_training

In [60]:
# Store layers weight & bias
# weights = {
#     'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
#     'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
#     'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
# }
# biases = {
#     'b1': tf.Variable(tf.random_normal([n_hidden_1])),
#     'b2': tf.Variable(tf.random_normal([n_hidden_2])),
#     'out': tf.Variable(tf.random_normal([num_classes]))
# }

In [61]:
# # Create model
# def neural_net(x):
#     # Hidden fully connected layer with 256 neurons
#     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
#     # Hidden fully connected layer with 256 neurons
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     # Output fully connected layer with a neuron for each class
#     out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
#     return out_layer

In [62]:
# # Construct model
# logits = neural_net(X)

# # Define loss and optimizer
# loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
#     logits=logits, labels=Y))
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
# train_op = optimizer.minimize(loss_op)

# # Evaluate model (with test logits, for dropout to be disabled)
# correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# # Initialize the variables (i.e. assign their default value)
# init = tf.global_variables_initializer()

In [63]:
# Start training
graph_bn = tf.Graph()
with graph_bn.as_default():
    with tf.Session(graph = graph_bn) as sess:
        X, Y, logits, loss, train_op, correct_pred, accuracy, learning_rate, is_training = build_graph()
        # Run the initializer
        sess.run(tf.global_variables_initializer())

        for step in range(1, num_steps+1):
            batch_x, batch_y = mnist.train.next_batch(batch_size)

            # Run optimization op (backprop)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y,learning_rate:0.001,is_training:True})
            if step % display_step == 0 or step == 1:
                # Calculate batch loss and accuracy
                loss_, acc = sess.run([loss, accuracy], feed_dict={X: batch_x,
                                                                  Y: batch_y,
                                                                  is_training:False})
                print("Step " + str(step) + ", Minibatch Loss= " , \
                      "{}".format(str(loss_)) + ", Training Accuracy= " + \
                      "{}".format(str(acc)))
#                 print (loss)

        print("Optimization Finished!")

        # Calculate accuracy for MNIST test images
        print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: mnist.test.images,
                                          Y: mnist.test.labels,
                                          is_training:False}))

Step 1, Minibatch Loss=  2.83873, Training Accuracy= 0.0703125
Step 100, Minibatch Loss=  2.01634, Training Accuracy= 0.304688
Step 200, Minibatch Loss=  1.98795, Training Accuracy= 0.25
Step 300, Minibatch Loss=  1.93425, Training Accuracy= 0.320312
Step 400, Minibatch Loss=  1.65757, Training Accuracy= 0.367188
Step 500, Minibatch Loss=  1.55434, Training Accuracy= 0.390625
Step 600, Minibatch Loss=  1.25784, Training Accuracy= 0.546875
Step 700, Minibatch Loss=  0.972251, Training Accuracy= 0.625
Step 800, Minibatch Loss=  1.00943, Training Accuracy= 0.65625
Step 900, Minibatch Loss=  0.741371, Training Accuracy= 0.757812
Step 1000, Minibatch Loss=  0.516295, Training Accuracy= 0.851562
Step 1100, Minibatch Loss=  0.502909, Training Accuracy= 0.835938
Step 1200, Minibatch Loss=  0.247361, Training Accuracy= 0.953125
Step 1300, Minibatch Loss=  0.173175, Training Accuracy= 0.960938
Step 1400, Minibatch Loss=  0.101278, Training Accuracy= 0.96875
Step 1500, Minibatch Loss=  0.125064, 