In [1]:
#Link: https://www.tensorflow.org/get_started/mnist/pros

In [3]:
import tensorflow as tf
import numpy as np

In [8]:
#Load MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
#Starting the interactive session
sess = tf.InteractiveSession()

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz


In [15]:
#Placeholders for the input images and output classes
#None indicates that the first dimension (the batch size) can be of any size
x = tf.placeholder(tf.float32,shape=[None,784])
y_= tf.placeholder(tf.float32,shape=[None,10])
#Variables
W = tf.Variable(tf.zeros([784,10])) #we have 784 input features and 10 outputs
b = tf.Variable(tf.zeros([10]))
sess.run(tf.global_variables_initializer())

In [22]:
#Specifying the model
y = x @ W + b
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_,logits=y))

In [23]:
#Training the model
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#running 1000 steps
for _ in range(1000):
    batch = mnist.train.next_batch(100)
    train_step.run(feed_dict={x:batch[0],y_:batch[1]})

In [26]:
#Evaluating the model
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})

0.91900003

In [27]:
#Helper functions for weight initialization
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [28]:
#Convolution and Pooling
def conv2d(x,W):
    return tf.nn.conv2d(x,W,strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

**Stride** controls how much the filter moves in each step. 

**Padding** is about how to handle the decrease in spatial dimensions.

**Pooling** is reducing the number of features with some form of aggregation, usually max or mean.

Sources:

https://adeshpande3.github.io/A-Beginner's-Guide-To-Understanding-Convolutional-Neural-Networks-Part-2/

http://ufldl.stanford.edu/tutorial/supervised/Pooling/

In [30]:
#First convolutional layer
W_conv1 = weight_variable([5,5,1,32]) #dimensions: (patch size x,y), #input channels, #output channels 
b_conv1 = bias_variable([32])

x_image = tf.reshape(x,[-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1)+b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#Second layer
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#Densely connected layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [31]:
#Dropout to reduce overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

Dropout does not make a big difference here, but it effectively reduces overfitting when training large neural networks.

In [32]:
#Readout layer
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

In [40]:
#Training and evaluating the model
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(10000):
        batch = mnist.train.next_batch(50)
        if i % 500 == 0:
            train_accuracy = accuracy.eval(feed_dict={
              x: batch[0], y_: batch[1], keep_prob: 1.0})
            print('step %d, training accuracy %g' % (i, train_accuracy))
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
        
    print('test accuracy %g' % accuracy.eval(feed_dict={
      x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0.14
step 500, training accuracy 0.8
step 1000, training accuracy 0.88
step 1500, training accuracy 0.88
step 2000, training accuracy 0.8
step 2500, training accuracy 0.82
step 3000, training accuracy 0.92
step 3500, training accuracy 0.94
step 4000, training accuracy 0.96
step 4500, training accuracy 0.92
step 5000, training accuracy 0.88
step 5500, training accuracy 0.88
step 6000, training accuracy 0.88
step 6500, training accuracy 0.9
step 7000, training accuracy 0.96
step 7500, training accuracy 0.92
step 8000, training accuracy 0.9
step 8500, training accuracy 0.86
step 9000, training accuracy 0.88
step 9500, training accuracy 0.92
test accuracy 0.915
