## Importing the data from TensorFlow

In [9]:
## Credits @https://leonardoaraujosantos.gitbooks.io/artificial-inteligence/content/multi_layer_perceptron_mnist.html

In [18]:
## Importing the data from tensorflow itself.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets("MNIST_data/", one_hot=True) 

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Defining the Network Parameters

In [19]:
## Defining some network parameters
hidden_1 = 512
hidden_2 = 128
input_size = 784
classes = 10

## Defining the placeholders\
x = tf.placeholder(tf.float32, [None, 784])    ## Placeholders for x's.
y_act = tf.placeholder(tf.float32, [None, 10]) ## Placeholders for y's.

### Weights Initialization Using two different Techniques

In [21]:
## Weights initialization

## Initialization of weights for sigmoid units using Xavier/Glorot initialization technique.
weights_sig = {
    'h1':tf.Variable(tf.random_normal([input_size, hidden_1],stddev=0.039, mean=0)), ## 84x512 # sqrt(2/(784+512)) = 0.039
    'h2':tf.Variable(tf.random_normal([hidden_1, hidden_2],stddev=0.055, mean=0)),   ##512x128 # sqrt(2/(512+128)) = 0.055
    'out':tf.Variable(tf.random_normal([hidden_2, classes],stddev=0.120, mean=0))   ##128x10
}

## Initialization of weights for relu units using HE initialization technique.
weights_relu = {
    'h1':tf.Variable(tf.random_normal([input_size, hidden_1],stddev=0.062, mean=0)), ##784*512
    'h2':tf.Variable(tf.random_normal([hidden_1, hidden_2],stddev=0.125, mean=0)),   ##512*128
    'out':tf.Variable(tf.random_normal([hidden_2, classes],stddev=0.120, mean=0))   ##128*10
}

## Addition of biases.
biases = {
    'b1':tf.Variable(tf.random_normal([hidden_1])),    ##512*1
    'b2':tf.Variable(tf.random_normal([hidden_2])),    ##128*1
    'out':tf.Variable(tf.random_normal([classes]))     ##10*1
}

In [22]:
## Trainng Parameters
training_epochs = 15
learning_rate = 0.001
batch_size = 100
display_step = 1

### Creating Model 1 with Sigmoidal Activations

In [37]:
## Create model 1 with Sigmoidal activation functions.
def multilayer_perceptron(x, weights, biases):
    
    ## printing some of the shapes, weights of the data
    print('x',x.get_shape(), 'W[h1]:', weights['h1'].get_shape(), 'b1:',biases['b1'].get_shape())
    
    ## Defining the 1st layer with sigmoid activation
    layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer1 = tf.nn.sigmoid(layer1)
    print('layer_1:',layer1.get_shape(), 'W[h2]:', weights['h2'].get_shape(), 'b2:',biases['b2'].get_shape())
    
    ## defining the 2nd layer with sigmoid activation
    layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2'])
    layer2 = tf.nn.sigmoid(layer2)
    print('layer_1:',layer2.get_shape(), 'W[h2]:', weights['out'].get_shape(), 'b3:',biases['out'].get_shape())
    
    ## Output layer with sigmoid activation
    out_layer = tf.matmul(layer2, weights['out']) + biases['out']     
    out_layer = tf.nn.sigmoid(out_layer)
    print('out_layer:',out_layer.get_shape())

    return out_layer

### Running the model

In [33]:
## Deifing the optimizers and cost function and actually running the model
pred = multilayer_perceptron(x, weights_sig, biases)

## defining the loss function
cost_fn = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred, labels = y_act))

## Optimizers
adam = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost_fn)

## Running the model
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    xs, ytrs, ytes = [], [], []
    for epoch in range(training_epochs):
        train_avg_cost = 0.
        test_avg_cost = 0.
        total_batch = int(data.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = data.train.next_batch(batch_size)
            
            _, c, w = sess.run([adam, cost_fn, weights_sig], feed_dict={x:batch_xs, y_act: batch_ys})
            train_avg_cost = train_avg_cost + c / total_batch
            c = sess.run(cost_fn, feed_dict={x: data.test.images, y_act:data.test.labels})
            test_avg_cost = test_avg_cost + c / total_batch
            
        xs.append(epoch)
        ytrs.append(train_avg_cost)
        ytes.append(test_avg_cost)
        
        if epoch%display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "train cost={:.9f}".format(train_avg_cost), "test cost={:.9f}".format(test_avg_cost))
        
        ## Prediction on the test data
        predictions = tf.equal(tf.argmax(pred,1), tf.argmax(y_act,1))
        accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32))
        print("Accuracy:", accuracy.eval({x: data.test.images, y_act: data.test.labels}))

x (?, 784) W[h1]: (784, 512) b1: (512,)
layer_1: (?, 512) W[h2]: (512, 128) b2: (128,)
layer_1: (?, 128) W[h2]: (128, 10) b3: (10,)
out_layer: (?, 10)
Epoch: 0001 train cost=1.686316505 test cost=1.679869107
Accuracy: 0.9226
Epoch: 0002 train cost=1.536799784 test cost=1.534815071
Accuracy: 0.9394
Epoch: 0003 train cost=1.514831131 test cost=1.515243659
Accuracy: 0.9497
Epoch: 0004 train cost=1.503162249 test cost=1.504582683
Accuracy: 0.9575
Epoch: 0005 train cost=1.494824588 test cost=1.497273717
Accuracy: 0.9631
Epoch: 0006 train cost=1.488420636 test cost=1.492361705
Accuracy: 0.9674
Epoch: 0007 train cost=1.484014179 test cost=1.489046976
Accuracy: 0.9711
Epoch: 0008 train cost=1.480320408 test cost=1.486407732
Accuracy: 0.9727
Epoch: 0009 train cost=1.477567188 test cost=1.484454814
Accuracy: 0.9754
Epoch: 0010 train cost=1.475278144 test cost=1.483601665
Accuracy: 0.9757
Epoch: 0011 train cost=1.473371278 test cost=1.482388913
Accuracy: 0.9763
Epoch: 0012 train cost=1.471868505 

### Creating Model 2 with Relu Activation Units

In [38]:
## Creating the Relu model
def multilayer_perceptron_relu(x, weights, biases):

    print( 'x:', x.get_shape(), 'W[h1]:', weights['h1'].get_shape(), 'b1:', biases['b1'].get_shape())        
    layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 
    layer1 = tf.nn.relu(layer1)
    print( 'layer_1:', layer1.get_shape(), 'W[h2]:', weights['h2'].get_shape(), 'b2', biases['b2'].get_shape())        
    
    # Hidden layer with ReLu activation
    layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2']) 
    layer2 = tf.nn.relu(layer2)
    print( 'layer_2:', layer2.get_shape(), 'W[out]:', weights['out'].get_shape(), 'b3:', biases['out'].get_shape())        
    
    # Output layer with Sigmoid activation
    out_layer = tf.matmul(layer2, weights['out']) + biases['out']   
    out_layer = tf.nn.sigmoid(out_layer)
    print('out_layer:',out_layer.get_shape())

    return out_layer

### Running the Model

In [41]:
## Deifing the optimizers and cost function and actually running the model
pred_relu = multilayer_perceptron_relu(x, weights_relu, biases)

## Defining the loss function
cost_relu = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred_relu, labels = y_act))

## Defining the adam optimizer
adam = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost_relu)

## Running the model
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    xs, ytrs, ytes = [], [], []
    for epoch in range(training_epochs):
        train_avg_cost = 0.
        test_avg_cost = 0.
        total_batch = int(data.train.num_examples/batch_size)

        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = data.train.next_batch(batch_size)

            # We are usinng adam optimizer
            _, c, w = sess.run([adam, cost_fn, weights_sig], feed_dict={x:batch_xs, y_act: batch_ys})
            train_avg_cost = train_avg_cost + c / total_batch
            c = sess.run(cost_fn, feed_dict={x: data.test.images, y_act:data.test.labels})
            test_avg_cost = test_avg_cost + c / total_batch

        xs.append(epoch)
        ytrs.append(train_avg_cost)
        ytes.append(test_avg_cost)
       
        if epoch%display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "train cost={:.9f}".format(train_avg_cost), "test cost={:.9f}".format(test_avg_cost))


    # we are calculating the final accuracy on the test data
    predictions = tf.equal(tf.argmax(pred_relu,1), tf.argmax(y_act,1))
    accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32))
    print("Accuracy:", accuracy.eval({x: data.test.images, y_act: data.test.labels}))

x: (?, 784) W[h1]: (784, 512) b1: (512,)
layer_1: (?, 512) W[h2]: (512, 128) b2 (128,)
layer_2: (?, 128) W[out]: (128, 10) b3: (10,)
out_layer: (?, 10)
Epoch: 0001 train cost=1.557785965 test cost=1.551697966
Epoch: 0002 train cost=1.499787403 test cost=1.499863870
Epoch: 0003 train cost=1.488317022 test cost=1.491868765
Epoch: 0004 train cost=1.482088075 test cost=1.487862669
Epoch: 0005 train cost=1.478141230 test cost=1.484900213
Epoch: 0006 train cost=1.475489794 test cost=1.484098081
Epoch: 0007 train cost=1.473881273 test cost=1.482593612
Epoch: 0008 train cost=1.471833188 test cost=1.481582415
Epoch: 0009 train cost=1.470682928 test cost=1.481288508
Epoch: 0010 train cost=1.470501536 test cost=1.481520288
Epoch: 0011 train cost=1.469389922 test cost=1.481111101
Epoch: 0012 train cost=1.468386085 test cost=1.480540403
Epoch: 0013 train cost=1.467927486 test cost=1.480505963
Epoch: 0014 train cost=1.467917865 test cost=1.480059120
Epoch: 0015 train cost=1.467357543 test cost=1.479