# CNN to predict MNIST images using TensorFlow

In [1]:
# imports 
%matplotlib inline
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
data = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


## Building TF graph

### Define placeholders

In [38]:
# create an input node, shape is 28*28 for the number of pixels and None for batch_size(as in variable batch size)
x = tf.placeholder(tf.float32, shape=[None, 28*28], name='x')

# we reshape x to a 4d tensor of size [-1, 28, 28, 1], 28 for image height and width, 1 for channel
x_train = tf.reshape(x, [-1, 28, 28, 1])

# the expected output 
y_ = tf.placeholder(tf.float32, shape=[None, 10], name='y_')
y_cls = tf.arg_max(y_, dimension=1)

### Conv layer 1

The first conv layer uses 32 filters of shape 5x5.

In [12]:
# init weights and bias

# here we get truncated_normal values with a std deviation of 0.1
# the shape is - [5, 5, 1, 16] for 5x5 filter, 1 input channel (as mnist is grey-scale), and 16 output channels
W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 16], stddev=0.1))

# bias for each of the 16 output channels
b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]))

In [14]:
# create a conv layer
# input is x_train, specifies the input to the layer
# filers are the weights 
# stride is [1, 1, 1, 1], the first and last are always 1, first for image number and the last for input channel
# for a 2x2 stride, use [1, 2, 2, 1]
# padding = same, the input image is padded with 0, to make output of same size
conv1 = tf.nn.conv2d(input=x_train, filter=W_conv1, strides=[1, 1, 1, 1], padding='SAME')

# add the bias 
conv1_plus_b = conv1 + b_conv1

# max pool this layer
# the value is the layer to pool upon
# ksize?
pool1 = tf.nn.max_pool(value=conv1_plus_b, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# apply relu
# basically computers max(x, 0) for each x
relu1 = tf.nn.relu(pool1)

In [15]:
relu1

<tf.Tensor 'Relu_2:0' shape=(?, 14, 14, 16) dtype=float32>

### Conv layer 2

Same as above, build second conv layer

In [17]:
# weight and bias
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]))

# build layer
conv2 = tf.nn.conv2d(input=relu1, filter=W_conv2, strides=[1, 1, 1, 1], padding='SAME')
conv2_plus_b = conv2 + b_conv2
pool2 = tf.nn.max_pool(value=conv2_plus_b, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
relu2 = tf.nn.relu(pool2)

In [18]:
relu2

<tf.Tensor 'Relu_3:0' shape=(?, 7, 7, 32) dtype=float32>

### Flatten layer

The conv layer 2 outputs a 4d tensor, but we need a 2d tensor for the fully connected layer, thus this flatten layer

In [23]:
flat_layer = tf.reshape(relu2, [-1, 7 * 7 * 32])

In [24]:
flat_layer

<tf.Tensor 'Reshape_3:0' shape=(?, 1568) dtype=float32>

### Fully connected layer 1

Our first fully connected layer, with 128 neurons.

In [27]:
# weight and bias
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 32, 128], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[128]))

# fc layer
math_fc1 = tf.matmul(flat_layer, W_fc1) + b_fc1
fc1 = tf.nn.relu(math_fc1)

In [28]:
fc1

<tf.Tensor 'Relu_4:0' shape=(?, 128) dtype=float32>

### Dropout layer

This layer randomly drops a few neurons, to reduce overfitting

In [29]:
keep_prob = tf.placeholder(tf.float32)
drop_out = tf.nn.dropout(fc1, keep_prob)

In [30]:
drop_out

<tf.Tensor 'dropout/mul:0' shape=(?, 128) dtype=float32>

### Fully connected layer 2

Scrond FC layer, has 10 neurons, each for one of the digits

In [32]:
# weight and bias
W_fc2 = tf.Variable(tf.truncated_normal([128 ,10], stddev=0.1))
b_fc2 = tf.Variable(tf.constant(0.1, shape=[10]))

# fc layer
fc2 = tf.matmul(drop_out, W_fc2) + b_fc2

In [33]:
fc2

<tf.Tensor 'add_7:0' shape=(?, 10) dtype=float32>

### Softmax layer

Add a softmax layer to normalize the output, and use argmax to get digit

In [35]:
y_pred = tf.nn.softmax(fc2)
y_pred_cls = tf.arg_max(y_pred, dimension=1)

In [36]:
y_pred_cls

<tf.Tensor 'ArgMax:0' shape=(?,) dtype=int64>

We have finished building out CNN

### Defining cost function and optimizer

In [39]:
# cost (to minimize)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=fc2, labels=y_)
cost = tf.reduce_mean(cross_entropy)

# using adamoptimizer
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)

# pref measures
correct_pred = tf.equal(y_pred_cls, y_cls)
acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Training

In [41]:
session = tf.Session()
session.run(tf.global_variables_initializer())

In [42]:
batch_size = 1024

In [44]:
# helper to optimize
tot_iter = 0

def optimize(num_iter):
    global tot_iter
    
    for i in range(tot_iter, tot_iter + num_iter):
        
        x_batch, y_batch = data.train.next_batch(batch_size)
        f_d = {x: x_batch, y_: y_batch}
        
        session.run(optimizer, feed_dict=f_d)
        
        if i % 100 == 0:
            a = session.run(acc, f_d)
            msg = "Iteration: {0:>6}, Acc: {1:6.1%}"
            print(msg.format(i + 1, a))
            
        tot_iter += 1

In [None]:
def acc_test():
    a = session.run(acc, feed_dict={
        x = 
    })