# Import packages and data

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# Read data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


# Build computation graph - CNN

In [2]:
# Prepare training sample
x = tf.placeholder(tf.float32, shape=[None, 784])
y_truth = tf.placeholder(tf.float32, shape=[None, 10])

# Model componnents
def weight_variable(shape):
    initial_value = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial_value)

def bias_variable(shape):
    initial_value = tf.constant(0.1, shape=shape)
    return tf.Variable(initial_value)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# 1st convolution layer
W_conv1 = weight_variable([5, 5, 1, 32])
#W_conv1_shaped = tf.reshape(W_conv1, [-1, 5, 5, 1])
#print W_conv1_shaped.get_shape()
#tf.image_summary('filter1', W_conv1_shaped, 32)
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1,28,28,1])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_conv1_CH1 = h_conv1[:,:,:,:1]
#print h_conv1_CH1.get_shape()
tf.image_summary('convol_layer1_CH1', h_conv1_CH1, 3)
h_pool1 = max_pool_2x2(h_conv1)
#h_pool1_CH1 = h_pool1[:,:,:,:1]
#tf.image_summary('pool_layer1_CH1', h_pool1_CH1, 10)

# 2nd convolution layer
W_conv2 = weight_variable([5, 5, 32, 64])
#W_conv2_shaped = tf.reshape(W_conv2, [-1, 5, 5, 1])
#print W_conv2_shaped.get_shape()
#tf.image_summary('filter2', W_conv2_shaped, 32)
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_conv2_CH1 = h_conv2[:,:,:,:1]
tf.image_summary('convol_layer2_CH1', h_conv2_CH1, 3)
h_pool1 = max_pool_2x2(h_conv1)
h_pool2 = max_pool_2x2(h_conv2)

# Fully-connected layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Readout
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_predict = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

# Train the model - CNN

In [3]:
learning_rate = 1e-4
mini_batch = 50
iterations = 501
WITH_2NORM = 1
KEEP_PRO = 0.5

# Training method
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_predict, y_truth))

# Declare keep_pro_ constant
KEEP_PRO_ = 0.0

if not WITH_2NORM:
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    tf.scalar_summary('loss', cross_entropy)
    KEEP_PRO_ = KEEP_PRO
else:
    regularization = tf.nn.l2_loss(W_fc2) + tf.nn.l2_loss(b_fc2)
    loss = cross_entropy + regularization
    tf.scalar_summary('loss', loss)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    KEEP_PRO_ = 1.0

# Prediction and Testing method
prediction_result = tf.equal(tf.argmax(y_predict,1), tf.argmax(y_truth,1))
accuracy = tf.reduce_mean(tf.cast(prediction_result, tf.float32))
tf.scalar_summary('accuracy', accuracy)

# Add summary writer
merged = tf.merge_all_summaries()
#train_writer = tf.train.SummaryWriter('MNIST_data/', sess.graph)

# Initialize variables
init_op = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init_op)
train_writer = tf.train.SummaryWriter('MNIST_data/', sess.graph)
if not WITH_2NORM:
    print "Using Dropout regularization, dropout %f".format(KEEP_PRO_)
else:
    print "Using L2-norm regularization."

# Run training
print "Traning ..."
for i in range(iterations):
    batch_x, batch_y = mnist.train.next_batch(mini_batch)
    if i >= 100 and i%100 == 0:
        train_accuracy = sess.run(accuracy, feed_dict={x: batch_x, y_truth: batch_y, keep_prob: 1.0})
        print "step {0}, training accuracy {1}".format(i,train_accuracy)
    if i >= 10 and i%10 == 0:
        summary, _ = sess.run([merged, train_step], feed_dict={x: batch_x, y_truth: batch_y, keep_prob: KEEP_PRO_})
        train_writer.add_summary(summary, i)
    else:
        sess.run(train_step, feed_dict={x: batch_x, y_truth: batch_y, keep_prob: KEEP_PRO_})
train_writer.flush()
train_writer.close()
print "Training done!"

Using L2-norm regularization.
Traning ...
step 100, training accuracy 0.839999973774
step 200, training accuracy 0.879999995232
step 300, training accuracy 0.860000014305
step 400, training accuracy 0.980000019073
step 500, training accuracy 0.939999997616
Training done!


# Test

In [4]:
# Run testing
print "Testing ..."
print 'Test Accuracy: ', sess.run(accuracy, feed_dict={x: mnist.test.images, y_truth: mnist.test.labels, keep_prob: 1.0})

sess.close()

Testing ...
Test Accuracy:  0.9446
