# Week 4, let's apply DNN to MNIST dataset with various settings


In [1]:
# Import packages 

import tensorflow as tf
import random
import matplotlib.pyplot as plt

In [2]:
## Download MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# Check if Python is working in the right pathway and environment...
import sys
sys.executable

'/Users/wahn/anaconda3/envs/tf/bin/python'

# Set DNN parameters here

In [4]:
learning_rate = 0.001      # learning rate
training_epochs = 15       # training epochs
batch_size = 100           # batch size
num_nodes = 64            # number of nodes / layer (5 layers in total)
my_keep_prob = 0.7        # keep probability

## Model 1

* Number of hidden layers = 5 
* Number of nodes/layer = 64 (`num_nodes` = 64)
* Activation function = ReLU
* Initializer = Xavier initalizer
* Optimizer = Adam optimizer
* User Dropout = TRUE
* Keep probability = 0.7

In [5]:
# input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)


## Some links for various settings
* Xavier initialization: http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
    
    Goal: "We want the variance of the input and output to be the same"
    
    
* Some tests with various dropout rates: https://medium.com/@amarbudhiraja/https-medium-com-amarbudhiraja-learning-less-to-learn-better-dropout-in-deep-machine-learning-74334da4bfc5


# Weights & bias for NN layers (5 layers)

In [6]:
# http://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
# Layer 1
W1 = tf.get_variable("W1", shape=[784, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())    # xavier initializer
b1 = tf.Variable(tf.random_normal([num_nodes]))                             
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)                                      # use ReLU
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)                                 # use Dropbox with `keep_prob` < 1


In [7]:
# Layer 2
W2 = tf.get_variable("W2", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([num_nodes]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

In [8]:
# Layer 3
W3 = tf.get_variable("W3", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([num_nodes]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

In [9]:
# Layer 4
W4 = tf.get_variable("W4", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([num_nodes]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

In [10]:
# Layer 5
W5 = tf.get_variable("W5", shape=[num_nodes, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5

In [11]:
# define cost/loss & optimizer
# tf.nn.softmax_cross_entropy_with_logits is deprecated --> use `tf.nn.softmax_cross_entropy_with_logits_v2`
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(   #
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [12]:
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [13]:
# train this model #1
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: my_keep_prob}   # use `my_keep_prob` 
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning Finished!')

Epoch: 0001 cost = 1.071918408
Epoch: 0002 cost = 0.475803408
Epoch: 0003 cost = 0.382823207
Epoch: 0004 cost = 0.334470262
Epoch: 0005 cost = 0.299208660
Epoch: 0006 cost = 0.279621112
Epoch: 0007 cost = 0.268361372
Epoch: 0008 cost = 0.248870114
Epoch: 0009 cost = 0.245629756
Epoch: 0010 cost = 0.227052749
Epoch: 0011 cost = 0.221657728
Epoch: 0012 cost = 0.216935219
Epoch: 0013 cost = 0.207610966
Epoch: 0014 cost = 0.200019358
Epoch: 0015 cost = 0.200395236
Learning Finished!


# Test model and check accuracy

In [14]:
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))   # always set `keep_prob` to 1 here


Accuracy: 0.9671


# Get one sample and check the prediction on the sample

In [15]:
r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))

# plt.imshow(mnist.test.images[r:r + 1].
#           reshape(28, 28), cmap='Greys', interpolation='nearest')
# plt.show()

Label:  [7]
Prediction:  [7]


## Model 2
### Difference from Model 1: Dropout rate (keep probability = 0.2 instead of 0.7)
* Number of hidden layers = 5 
* Number of nodes/layer = 64 (`num_nodes` = 64)
* Activation function = ReLU
* Initializer = Xavier initalizer
* Optimizer = Adam optimizer
* User Dropout = TRUE
* Keep probability = 0.2

In [16]:
my_keep_prob = 0.2

# Layer 1
m2_W1 = tf.get_variable("m2_W1", shape=[784, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
m2_b1 = tf.Variable(tf.random_normal([num_nodes]))
m2_L1 = tf.nn.relu  (tf.matmul(X, m2_W1) + m2_b1)
m2_L1 = tf.nn.dropout(m2_L1, keep_prob=keep_prob)
# Layer 2
m2_W2 = tf.get_variable("m2_W2", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
m2_b2 = tf.Variable(tf.random_normal([num_nodes]))
m2_L2 = tf.nn.relu(tf.matmul(m2_L1, m2_W2) + m2_b2)
m2_L2 = tf.nn.dropout(m2_L2, keep_prob=keep_prob)
# Layer 3
m2_W3 = tf.get_variable("m2_W3", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
m2_b3 = tf.Variable(tf.random_normal([num_nodes]))
m2_L3 = tf.nn.relu(tf.matmul(m2_L2, m2_W3) + m2_b3)
m2_L3 = tf.nn.dropout(m2_L3, keep_prob=keep_prob)
# Layer 4
m2_W4 = tf.get_variable("m2_W4", shape=[num_nodes, num_nodes],
                     initializer=tf.contrib.layers.xavier_initializer())
m2_b4 = tf.Variable(tf.random_normal([num_nodes]))
m2_L4 = tf.nn.relu(tf.matmul(m2_L3, m2_W4) + m2_b4)
m2_L4 = tf.nn.dropout(m2_L4, keep_prob=keep_prob)
# Layer 5
m2_W5 = tf.get_variable("m2_W5", shape=[num_nodes, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
m2_b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(m2_L4, m2_W5) + m2_b5

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(   # use `tf.nn.softmax_cross_entropy_with_logits_v2`
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# train model #2
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: my_keep_prob}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning Finished!')

Epoch: 0001 cost = 3.642640914
Epoch: 0002 cost = 2.507637128
Epoch: 0003 cost = 2.400959353
Epoch: 0004 cost = 2.326699578
Epoch: 0005 cost = 2.222945050
Epoch: 0006 cost = 2.133800762
Epoch: 0007 cost = 2.044822040
Epoch: 0008 cost = 1.977253184
Epoch: 0009 cost = 1.922394416
Epoch: 0010 cost = 1.895181823
Epoch: 0011 cost = 1.877573472
Epoch: 0012 cost = 1.849309535
Epoch: 0013 cost = 1.840729435
Epoch: 0014 cost = 1.823032065
Epoch: 0015 cost = 1.806731970
Learning Finished!


In [17]:
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))


Accuracy: 0.2813
