# Network with convolutional layers

In [7]:
import tensorflow as tf
import numpy as np
from utils import *

%matplotlib inline

In [8]:
# Training parameters
num_iterations = 20000
batch_size = 50
learning_rate = 1e-3

In [10]:
# Loading data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# Initialize session
sess = tf.InteractiveSession()

# Inputs and outputs placeholder
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

# Dropout prob placeholder
dropout_rate = tf.placeholder(tf.float32)

# Reshape vector in order to have image shape
x_image = tf.reshape(x, [-1,28,28,1])

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


Network:

In [11]:
# First convolutional hidden layer
W_conv1 = tf.Variable(tf.truncated_normal([3,3,1,32], stddev=0.1))
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

# Pooling layer
h_pool1 = max_pool_2x2(h_conv1)

# Second convolutional hidden layer
W_conv2 = tf.Variable(tf.truncated_normal([3, 3, 32, 64], stddev=0.1))
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

# Pooling layer
h_pool2 = max_pool_2x2(h_conv2)

# Softmax layer
W_fc1 = weight_variable([7 * 7 * 64, 10])
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 10], stddev=0.1))
b_fc1 = bias_variable([10])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
y_conv = tf.matmul(h_pool2_flat, W_fc1) + b_fc1

In [None]:
# Training and test nodes
J = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))

train_step = tf.train.AdamOptimizer(learning_rate).minimize(J)

correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.global_variables_initializer())

train_accuracy_list=[]
train_accuracy_index=[]
test_accuracy_list=[]
test_accuracy_index=[]

for i in range(num_iterations):
  batch = mnist.train.next_batch(batch_size)
  if i%100 == 0:
    train_accuracy = accuracy.eval(feed_dict={
        x:batch[0], y_: batch[1]})
    print("step %d, training accuracy %g"%(i, train_accuracy))
    train_accuracy_list.append(train_accuracy)
    train_accuracy_index.append(i)
  if i%500 == 0:
    test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})
    print("------------------------------->>test accuracy %g"%test_accuracy)
    test_accuracy_list.append(test_accuracy)
    test_accuracy_index.append(i)
    
  train_step.run(feed_dict={x: batch[0], y_: batch[1]})

print("test accuracy %g"%accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels}))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

step 0, training accuracy 0.22
------------------------------->>test accuracy 0.1026
step 100, training accuracy 0.88
step 200, training accuracy 0.88
step 300, training accuracy 0.94
step 400, training accuracy 0.94
step 500, training accuracy 1
------------------------------->>test accuracy 0.966
step 600, training accuracy 0.94
step 700, training accuracy 1
step 800, training accuracy 1
step 900, training accuracy 0.92
step 1000, training accuracy 0.98
------------------------------->>test accuracy 0.9774
step 1100, training accuracy 0.94
step 1200, training accuracy 0.96
step 1300, training accuracy 0.96
step 1400, training accuracy 0.98
step 1500, training accuracy 0.98
------------------------------->>test accuracy 0.9811
step 1600, training accuracy 1
step 1700, training accuracy 0.98
step 180

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_accuracy_index, train_accuracy_list)
plt.plot(test_accuracy_index, test_accuracy_list)
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend(['Train set', 'Validation set'],loc='lower right')
plt.show()

In [9]:
all_vars=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

num_param=sum([np.prod(np.array(var.op.outputs[0].shape.as_list())) for var in all_vars])

print('Number of variables in the network: ', num_param)

Number of variables in the network:  50186
