# Tensorflow tutorial
MNIST data set

In [1]:
# Loading data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
# starting session
import tensorflow as tf
sess = tf.InteractiveSession()


In [7]:
# placeholder variables, not yet initialized
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])


In [5]:
# variables, with values
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
sess.run(tf.global_variables_initializer())

In [6]:
# regression model 
y = tf.matmul(x,W) + b

# loss fonction
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))

# or
#y = tf.nn.softmax(tf.matmul(x, W) + b)
#y_ = tf.placeholder(tf.float32, [None, 10])
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))


In [7]:
# train model
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)


In [8]:
for i in range(1000):
    batch = mnist.train.next_batch(100)
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})


In [10]:
# evaluate model
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))


0.9134


## Convolutional neural network

Using ReLU (Rectifier Linear Unit) f(x) = max(0,x)

https://en.wikipedia.org/wiki/Rectifier_(neural_networks)

In [3]:
# handy functions for ReLU initialization

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)



Layer 1

In [4]:
# stride of 1
# 0 padding, output size = input size
def conv2d(x, W):
    # SAME automatically add padding to have same size
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# pooling= max pooling block of 2x2
def max_pool_2x2(x):
    # SAME automatically add padding to have same size
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


In [5]:
# Layer 1, convolution + max pooling
# 5x5 patch sizes 1=number of input channel, 32=number of output channel (features)
W_conv1 = weight_variable([5, 5, 1, 32]) 
# bias vector for each feature of output channel
b_conv1 = bias_variable([32])


In [8]:
# reshape X to 4d tensor, (28,28) real image size (width, height)
# -1 is used to infer shape
# 28x28 is initial size of image (784)
# 1 is an extra dimension
x_image = tf.reshape(x, [-1,28,28,1])


In [9]:
# convolution and pooling
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# output is 14x14 with 32 channels

Layer 2

In [12]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# output is now 7x7 with 64 channel

Densely connected

In [13]:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

# reshape tensor from pooling to a batch of vectors
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
# mult by weight, add bias and ReLU
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


dropout

https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf

In [14]:
# to reduce overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)


readout

In [15]:
# add a layer similar to softmax
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

Train and Test

In [16]:
# train
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

    

step 0, training accuracy 0.08
step 100, training accuracy 0.88
step 200, training accuracy 0.88
step 300, training accuracy 0.82
step 400, training accuracy 0.96
step 500, training accuracy 0.9
step 600, training accuracy 0.98
step 700, training accuracy 0.96
step 800, training accuracy 0.9
step 900, training accuracy 1
step 1000, training accuracy 0.98
step 1100, training accuracy 0.94
step 1200, training accuracy 0.98
step 1300, training accuracy 0.94
step 1400, training accuracy 0.96
step 1500, training accuracy 1
step 1600, training accuracy 0.96
step 1700, training accuracy 0.96
step 1800, training accuracy 0.94
step 1900, training accuracy 0.96
step 2000, training accuracy 0.96
step 2100, training accuracy 0.98
step 2200, training accuracy 0.94
step 2300, training accuracy 1
step 2400, training accuracy 0.96
step 2500, training accuracy 1
step 2600, training accuracy 0.98
step 2700, training accuracy 1
step 2800, training accuracy 0.96
step 2900, training accuracy 1
step 3000, t

In [31]:
# test
#print("test accuracy %g"%accuracy.eval(feed_dict={
#    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

# for low memory
acc = list()
for i in range(100):
    testSet = mnist.test.next_batch(200)
    acc.append(accuracy.eval(feed_dict={ x: testSet[0], y_: testSet[1], keep_prob: 1.0}))
    #print("test accuracy %g"%accuracy.eval(feed_dict={ x: testSet[0], y_: testSet[1], keep_prob: 1.0}))
    

mean_acc = sum(acc)/len(acc)
print("test accuracy %g"%mean_acc)

test accuracy 0.9923
