# Series 8: Neural Networks

## Problem 2 (Convolutional Neural Networks)

This version employs the more involved solution described in the advanced TensorFlow tutorial here: https://www.tensorflow.org/versions/r0.8/tutorials/mnist/pros/index.html.

In [4]:
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/MNIST_data', one_hot=True)

Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
sess = tf.InteractiveSession()

In [10]:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

### Utility functions

In [5]:
# The initial random values are slightly positively biased to avoid
# dead neurons (which can happen when using ReLU neurons).

def weight_variable(shape):
    # Normal distribution with values 2+ standard deviations from the
    # mean dropped and re-picked.
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [7]:
def conv2d(x, W):
    # 4 elements in 'stride' to account for all 4 dimensions of the input
    # i.e. batch, width, height, channel.
    # Note: the size of the convolution matrix is given by the size of the
    # 'W' matrix.
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    # Max pool 2x2 over width and height, but not over batches or channels.
    # Stride in w and h is also 2, which means that the max pooling does
    # not have overlapping regions.
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

### First Convolutional Layer

In [8]:
# 5x5 convolution window which outputs a 32-dimensional vector.
# Every one of these 32 dimensions may end up learning a particular
# simplistic feature, evolving automagically into an e.g. horizontal
# line or corner detector.
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

In [11]:
# The images in the prebuilt dataset are already flattened.
# We have to un-flatten them before we can apply 2d convolution on them.
IMG_WIDTH = 28
IMG_HEIGHT = 28
IMG_CHANNELS = 1
x_image = tf.reshape(x, [-1, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS])

In [12]:
# Add the bias after convolution.
# Max-pool after the ReLU activation.
# TODO(andrei) What happens if we remove the bias?
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

### Second Convolutional Layer

In [13]:
# Stack on another 5x5 convolutional layer over the previous
# one's 32-dimension-per-image outputs.
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# Recap:
#
# 28 x 28 ---- conv1 -----> (28 x 32) x (28 x 32) 
#         - max-pool-2x2 -> (14 x 32) x (14 x 32)
#         ---- conv2 -----> (14 x 64) x (14 x 64)
#         - max-pool-2x2 -> (7  x 64) x ( 7 x 64)

# This reduces out "image size" to 7x7 currently.

### Densely Connected Layer

In [14]:
# 1024 fully-connected neurons to handle the second layer's output.
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

# Take the output of the previous layer ('h_pool2'), reshape it, and
# plug it into a fully connected linear model.
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [15]:
# Add option to toggle dropout.
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

### Final Softmax

In [16]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

In [None]:
BATCH_SIZE = 50
PROGRESS_EVERY = 100
EPOCHS = 20000

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv),
                                reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.initialize_all_variables())
for i in range(EPOCHS):
    batch = mnist.train.next_batch(BATCH_SIZE)
    if i % PROGRESS_EVERY == 0:
        # Remember: no dropout when evaluating.
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g" % (i, train_accuracy))

    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

# Remember: no dropout when evaluating.
print("test accuracy %g" % accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0.12
step 100, training accuracy 0.8
step 200, training accuracy 0.92
step 300, training accuracy 0.84
step 400, training accuracy 0.98
step 500, training accuracy 0.92
step 600, training accuracy 0.96
step 700, training accuracy 0.96
step 800, training accuracy 0.88
