# MNIST: CNN

> **Credit**: [TensorFlow and deep learning, without a PhD](https://cloud.google.com/blog/products/gcp/learn-tensorflow-and-deep-learning-without-a-phd) (Part 11~13) <br />
>
> Part one: [MNIST with Feed-forward Neural Network]() <br />
> If you have any questions, please read my notes [**here**](). Otherwise, enjoy :)

##### MNIST

In [1]:
import tensorflow as tf
import mnistdata 
mnist = mnistdata.read_data_sets("data", one_hot=True, reshape=False)

# Suppress warnings
import warnings
warnings.filterwarnings('ignore') 

INFO:tensorflow:Tensorflow version 1.11.0


In [2]:
# neural network structure for this sample:
#
# · · · · · · · · · ·      (input data, 1-deep)                 X [batch, 28, 28, 1]
# @ @ @ @ @ @ @ @ @ @   -- conv. layer 5x5x1=>4 stride 1        W1 [5, 5, 1, 4]        B1 [4]
# ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                           Y1 [batch, 28, 28, 4]
#   @ @ @ @ @ @ @ @     -- conv. layer 5x5x4=>8 stride 2        W2 [5, 5, 4, 8]        B2 [8]
#   ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                             Y2 [batch, 14, 14, 8]
#     @ @ @ @ @ @       -- conv. layer 4x4x8=>12 stride 2       W3 [4, 4, 8, 12]       B3 [12]
#     ∶∶∶∶∶∶∶∶∶∶∶                                               Y3 [batch, 7, 7, 12] => reshaped to YY [batch, 7*7*12]
#      \x/x\x\x/        -- fully connected layer (relu)         W4 [7*7*12, 200]       B4 [200]
#       · · · ·                                                 Y4 [batch, 200]
#       \x/x\x/         -- fully connected layer (softmax)      W5 [200, 10]           B5 [10]
#        · · ·                                                  Y [batch, 10]

##### Define TensorFlow Variables and Placeholders

In [3]:
# Placeholder for training data
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

# Placeholder for correct labels
Y_ = tf.placeholder(tf.float32, [None, 10])

# Placeholder for learning rate as it will decay over time
lr = tf.placeholder(tf.float32)

# Placeholder for steps as it contributes to variable learning rate
step = tf.placeholder(tf.int32)

# Probability of keeping a node during dropout, 0.75 at training time and 1.0 at test time
pkeep = tf.placeholder(tf.float32)

##### Size/Depth of Each Layer

In [4]:
# Three conv layers with their channel counts, plus a fully connected layer
C1 = 6 # first conv layer output depth
C2 = 12 # second conv layer output depth
C3 = 24 # third conv layer output depth
N = 200 # size of fully connected layer

##### Weights and Biases

In [5]:
# These are the variables what we want to learn 
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, C1], stddev=0.1))  # 6x6 patch, 1 input channel, K output channels
B1 = tf.Variable(tf.constant(0.1, tf.float32, [C1]))
W2 = tf.Variable(tf.truncated_normal([5, 5, C1, C2], stddev=0.1))
B2 = tf.Variable(tf.constant(0.1, tf.float32, [C2]))
W3 = tf.Variable(tf.truncated_normal([4, 4, C2, C3], stddev=0.1))
B3 = tf.Variable(tf.constant(0.1, tf.float32, [C3]))

W4 = tf.Variable(tf.truncated_normal([7 * 7 * C3, N], stddev=0.1))
B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]))
W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))

##### Construct the Model

In [6]:
stride = 1  # output is 28x28
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
stride = 2  # output is 14x14
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
stride = 2  # output is 7x7
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)

# Reshape the output from the third conv layer before feeding into the fully connected layer
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * C3])

Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
YY4 = tf.nn.dropout(Y4, pkeep) # Adding dropout for better performance
Ylogits = tf.matmul(YY4, W5) + B5
Y = tf.nn.softmax(Ylogits)

##### Loss Function, Accuracy, Optimizer, and Learning Rate Decay

In [7]:
import math

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

lr = 0.0001 +  tf.train.exponential_decay(0.003, step, 2000, 1/math.e)
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

##### Let's Rock

In [8]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [9]:
# What we care about:
# - a: accuracy
# - c: cross-entropy

def training_step(i, update_test_data, update_train_data):
    batch_X, batch_Y = mnist.train.next_batch(100)
    if update_train_data:
        a, c = sess.run([accuracy, cross_entropy],
                        feed_dict={X: batch_X, Y_: batch_Y, pkeep: 1.0, step: i})
    if update_test_data:
        a, c = sess.run([accuracy, cross_entropy],
                        feed_dict={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0})
        
        print(str(i) 
              + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) 
              + " ********* test accuracy:" + str(a) 
              + " test loss: " + str(c))
        
    sess.run(train_step, {X: batch_X, Y_: batch_Y, pkeep: 0.75, step: i})

for i in range(10000+1):
    training_step(i, i % 100 == 0, i % 20 == 0)

0: ********* epoch 1 ********* test accuracy:0.102 test loss: 234.20418
100: ********* epoch 1 ********* test accuracy:0.9428 test loss: 18.609976
200: ********* epoch 1 ********* test accuracy:0.9631 test loss: 11.312636
300: ********* epoch 1 ********* test accuracy:0.9774 test loss: 7.8123035
400: ********* epoch 1 ********* test accuracy:0.9772 test loss: 7.245
500: ********* epoch 1 ********* test accuracy:0.9783 test loss: 6.7397647
600: ********* epoch 2 ********* test accuracy:0.9831 test loss: 5.443881
700: ********* epoch 2 ********* test accuracy:0.981 test loss: 5.5480223
800: ********* epoch 2 ********* test accuracy:0.983 test loss: 5.283968
900: ********* epoch 2 ********* test accuracy:0.9849 test loss: 4.8893423
1000: ********* epoch 2 ********* test accuracy:0.9868 test loss: 4.399448
1100: ********* epoch 2 ********* test accuracy:0.9862 test loss: 4.1706843
1200: ********* epoch 3 ********* test accuracy:0.9877 test loss: 3.5469286
1300: ********* epoch 3 ********* 

Result: 99% Accuracy!