# TensorFlow Assignment: Multi-Layer Perceptron (MLP)

**[Duke Community Standard](http://integrity.duke.edu/standard.html): By typing your name below, you are certifying that you have adhered to the Duke Community Standard in completing this assignment.**

Name: Pratyush Sharma

### Multi-layer Perceptron

Build a 2-layer MLP for MNIST digit classfication. Feel free to play around with the model architecture and see how the training time/performance changes, but to begin, try the following:

Image (784 dimensions) -> fully connected layer (500 hidden units)  -> nonlinearity (ReLU) -> fully connected layer (100 hidden units) -> nonlinearity (ReLU) -> fully connected (10 hidden units) -> softmax

In [7]:
### YOUR CODE HERE
from __future__ import print_function
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import trange
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# Suppress warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

#import data
mnist = input_data.read_data_sets("datasets/MNIST_data/", one_hot=True)

# Parameters
initial_learning_rate = 0.005
batch_size = 100
training_epochs = 15
display_step = 1
#as cost is decreasing but accuracy not increasing, with changing learning rate, so might be over fitting 
#introducing regularization
regularizer_rate = 0.1

# Network Parameters
n_hidden_1 = 500 # 1st layer number of neurons
n_hidden_2 = 100 # 2nd layer number of neurons
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=(1/tf.sqrt(float(n_input))))),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=(1/tf.sqrt(float(n_hidden_1))))),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}


# Create model
def multilayer_perceptron(x):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']    
    return out_layer

# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=logits)) \
        + regularizer_rate*(tf.reduce_sum(tf.square(biases['b1'])) + tf.reduce_sum(tf.square(biases['b2'])))

#variable learning rate
#learning_rate = tf.train.exponential_decay(initial_learning_rate, 0, 5, 0.85, staircase=True)
train_op = tf.train.GradientDescentOptimizer(initial_learning_rate).minimize(loss_op)


# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    pred = tf.nn.softmax(logits)  
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print('Test accuracy: {0}'.format(sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels})))
    


Extracting datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost=36.926825510
Epoch: 0002 cost=12.347427425
Epoch: 0003 cost=4.301328437
Epoch: 0004 cost=1.599599931
Epoch: 0005 cost=0.677947677
Epoch: 0006 cost=0.355084258
Epoch: 0007 cost=0.234796006
Epoch: 0008 cost=0.185010598
Epoch: 0009 cost=0.160352414
Epoch: 0010 cost=0.145003398
Epoch: 0011 cost=0.134433167
Epoch: 0012 cost=0.125563737
Epoch: 0013 cost=0.118221244
Epoch: 0014 cost=0.111891830
Epoch: 0015 cost=0.105996255
Optimization Finished!
Test accuracy: 0.9660999774932861
