## Sofmax classifier for MNIST

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
learning_rate = 0.001
training_epochs = 101
batch_size = 100
display_step = 20

In [3]:
# tf Graph Input
x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes

# Create model

# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# construct model
activation = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(activation), reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # Gradient descent

In [4]:
init = tf.global_variables_initializer()

mnist = input_data.read_data_sets("train_data/MNIST_data/", one_hot=True)

sess = tf.Session()
sess.run(init)

Extracting train_data/MNIST_data/train-images-idx3-ubyte.gz
Extracting train_data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting train_data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting train_data/MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
# Training cycle
for epoch in range(training_epochs) :
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)
    # Loop over all batches
    for i in range(total_batch) :
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
        # Compute average loss
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    
print ("Optimization Finished!")

Epoch: 0001 cost= 2.045250847
Epoch: 0021 cost= 0.587260515
Epoch: 0041 cost= 0.477556967
Epoch: 0061 cost= 0.431832330
Epoch: 0081 cost= 0.405301124
Epoch: 0101 cost= 0.387421326
Optimization Finished!


In [6]:
# Test model
correct_prediction = tf.equal(tf.argmax(activation, 1), tf.argmax(y, 1))
# Calcuate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}, session=sess))

Accuracy:  0.9034


## Neural Nets (NN) for MNIST

In [7]:
# X = tf.placeholder("float", [None, 784])
# Y = tf.placeholder("float", [None, 10])

# Store layers weight & bias
W1 = tf.Variable(tf.random_normal([784, 256]))
W2 = tf.Variable(tf.random_normal([256, 256]))
W3 = tf.Variable(tf.random_normal([256, 10]))

B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))

# Construct model
L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), B1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2)) # Hidden layer with ReLU activation
hypothesis = tf.add(tf.matmul(L2, W3), B3) # No need to use softmax here

# Define loss and optimizer
cost  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer

In [8]:
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

In [9]:
# Training cycle
for epoch in range(training_epochs) :
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)
    # Loop over all batches
    for i in range(total_batch) :
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
        # Compute average loss
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    
print ("Optimization Finished!")

Epoch: 0001 cost= 166.966616943
Epoch: 0021 cost= 0.183849186
Epoch: 0041 cost= 0.054312433
Epoch: 0061 cost= 0.028181199
Epoch: 0081 cost= 0.022873562
Epoch: 0101 cost= 0.004413743
Optimization Finished!


In [10]:
# Test model
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
# Calcuate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}, session=sess))

Accuracy:  0.9693


## Xavier initialization

In [11]:
import math

In [12]:
# http://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
def xavier_init(n_inputs, n_outputs, uniform=True):
  """Set the parameter initialization using the method described.
  This method is designed to keep the scale of the gradients roughly the same
  in all layers.
  Xavier Glorot and Yoshua Bengio (2010):
           Understanding the difficulty of training deep feedforward neural
           networks. International conference on artificial intelligence and
           statistics.
  Args:
    n_inputs: The number of input nodes into each output.
    n_outputs: The number of output nodes for each input.
    uniform: If true use a uniform distribution, otherwise use a normal.
  Returns:
    An initializer.
  """
  if uniform:
    # 6 was used in the paper.
    init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
    return tf.random_uniform_initializer(-init_range, init_range)
  else:
    # 3 gives us approximately the same limits as above since this repicks
    # values greater than 2 standard deviations from the mean.
    stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
    return tf.truncated_normal_initializer(stddev=stddev)

In [13]:
# Store layers weight & bias
W1 = tf.get_variable("W1", shape=[784, 256], initializer=xavier_init(784, 256))
W2 = tf.get_variable("W2", shape=[256, 256], initializer=xavier_init(256, 256))
W3 = tf.get_variable("W3", shape=[256, 10], initializer=xavier_init(256, 10))

In [14]:
B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))

# Construct model
L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), B1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2)) # Hidden layer with ReLU activation
hypothesis = tf.add(tf.matmul(L2, W3), B3) # No need to use softmax here

# Define loss and optimizer
cost  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer

In [15]:
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

In [16]:
# Training cycle
for epoch in range(training_epochs) :
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)
    # Loop over all batches
    for i in range(total_batch) :
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
        # Compute average loss
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    
print ("Optimization Finished!")

Epoch: 0001 cost= 0.308614404
Epoch: 0021 cost= 0.001740475
Epoch: 0041 cost= 0.001272665
Epoch: 0061 cost= 0.000479724
Epoch: 0081 cost= 0.000000357
Epoch: 0101 cost= 0.000000002
Optimization Finished!


In [17]:
# Test model
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
# Calcuate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}, session=sess))

Accuracy:  0.9844


## More deep & dropout

In [18]:
# Store layers weight & bias
W1 = tf.get_variable("W9", shape=[784, 512], initializer=xavier_init(784, 512))
W2 = tf.get_variable("W10", shape=[512, 256], initializer=xavier_init(512, 256))
W3 = tf.get_variable("W11", shape=[256, 128], initializer=xavier_init(256, 128))
W4 = tf.get_variable("W12", shape=[128, 64], initializer=xavier_init(128, 64))
W5 = tf.get_variable("W13", shape=[64, 10], initializer=xavier_init(64, 10))

# W1 = tf.Variable(tf.random_normal([784, 256]))
# W2 = tf.Variable(tf.random_normal([256, 256]))
# W3 = tf.Variable(tf.random_normal([256, 256]))
# W4 = tf.Variable(tf.random_normal([256, 256]))
# W5 = tf.Variable(tf.random_normal([256, 10]))

B1 = tf.Variable(tf.random_normal([512]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([128]))
B4 = tf.Variable(tf.random_normal([64]))
B5 = tf.Variable(tf.random_normal([10]))

In [19]:
# construct model
dropout_rate = tf.placeholder("float")
_L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), B1)) # Hidden layer with ReLU activation
L1 = tf.nn.dropout(_L1, dropout_rate)
_L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2)) # Hidden layer with ReLU activation
L2 = tf.nn.dropout(_L2, dropout_rate)
_L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), B3)) # Hidden layer with ReLU activation
L3 = tf.nn.dropout(_L3, dropout_rate)
_L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), B4)) # Hidden layer with ReLU activation
L4 = tf.nn.dropout(_L4, dropout_rate)

hypothesis = tf.add(tf.matmul(L4, W5), B5) # No need to use softmax here

In [20]:
# Define loss and optimizer
cost  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer

init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

In [21]:
for epoch in range(training_epochs) :
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)
    # Loop over all batches
    for i in range(total_batch) :
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_rate: 0.7})
        # Compute average loss
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_rate: 0.7})/total_batch
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    
print ("Optimization Finished!")

Epoch: 0001 cost= 0.630215467
Epoch: 0021 cost= 0.035704602
Epoch: 0041 cost= 0.022299135
Epoch: 0061 cost= 0.017929360
Epoch: 0081 cost= 0.014924662
Epoch: 0101 cost= 0.013403191
Optimization Finished!


In [22]:
# Test model
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
# Calcuate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy: ", accuracy.eval({x: mnist.test.images, y: mnist.test.labels, dropout_rate: 1.0}, session=sess))

Accuracy:  0.9848
