# MNIST: ANN

> **Credit**: [TensorFlow and deep learning, without a PhD](https://cloud.google.com/blog/products/gcp/learn-tensorflow-and-deep-learning-without-a-phd) (Part 1~10) <br />
>
> If you have any questions, please read my notes [**here**](). Otherwise, enjoy :)

##### MNIST

In [22]:
import tensorflow as tf
import mnistdata 
mnist = mnistdata.read_data_sets("data", one_hot=True, reshape=False)

# Suppress warnings
import warnings
warnings.filterwarnings('ignore') 

In [3]:
# neural network with 5 layers
#
# · · · · · · · · · ·          (input data, flattened pixels)       X [batch, 784]   # 784 = 28*28
# \x/x\x/x\x/x\x/x\x/ ✞     -- fully connected layer (relu+dropout) W1 [784, 200]      B1[200]
#  · · · · · · · · ·                                                Y1 [batch, 200]
#   \x/x\x/x\x/x\x/ ✞       -- fully connected layer (relu+dropout) W2 [200, 100]      B2[100]
#    · · · · · · ·                                                  Y2 [batch, 100]
#     \x/x\x/x\x/ ✞         -- fully connected layer (relu+dropout) W3 [100, 60]       B3[60]
#      · · · · ·                                                    Y3 [batch, 60]
#       \x/x\x/ ✞           -- fully connected layer (relu+dropout) W4 [60, 30]        B4[30]
#        · · ·                                                      Y4 [batch, 30]
#         \x/               -- fully connected layer (softmax)      W5 [30, 10]        B5[10]
#          ·                                                        Y5 [batch, 10]

##### Define TensorFlow Variables and Placeholders

In [4]:
# Placeholder for training data
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

# Placeholder for correct labels
Y_ = tf.placeholder(tf.float32, [None, 10])

# Placeholder for learning rate as it will decay over time
lr = tf.placeholder(tf.float32)

# Placeholder for steps as it contributes to variable learning rate
step = tf.placeholder(tf.int32)

# Probability of keeping a node during dropout, 0.75 at training time and 1.0 at test time
pkeep = tf.placeholder(tf.float32)

##### Size of Each Layer

In [5]:
INPUT = 784
H1 = 200
H2 = 100
H3 = 60
H4 = 30
OUTPUT = 10

##### Weights and Biases

In [6]:
W1 = tf.Variable(tf.truncated_normal([INPUT, H1], stddev=0.1))
B1 = tf.Variable(tf.ones([H1])/10)
W2 = tf.Variable(tf.truncated_normal([H1, H2], stddev=0.1))
B2 = tf.Variable(tf.ones([H2])/10)
W3 = tf.Variable(tf.truncated_normal([H2, H3], stddev=0.1))
B3 = tf.Variable(tf.ones([H3])/10)
W4 = tf.Variable(tf.truncated_normal([H3, H4], stddev=0.1))
B4 = tf.Variable(tf.ones([H4])/10)
W5 = tf.Variable(tf.truncated_normal([H4, OUTPUT], stddev=0.1))
B5 = tf.Variable(tf.zeros([OUTPUT]))

##### Layers with Dropout

In [7]:
XX = tf.reshape(X, [-1, 784])

Y1 = tf.nn.relu(tf.matmul(XX, W1) + B1)
Y1d = tf.nn.dropout(Y1, pkeep)

Y2 = tf.nn.relu(tf.matmul(Y1d, W2) + B2)
Y2d = tf.nn.dropout(Y2, pkeep)

Y3 = tf.nn.relu(tf.matmul(Y2d, W3) + B3)
Y3d = tf.nn.dropout(Y3, pkeep)

Y4 = tf.nn.relu(tf.matmul(Y3d, W4) + B4)
Y4d = tf.nn.dropout(Y4, pkeep)

Ylogits = tf.matmul(Y4d, W5) + B5
Y = tf.nn.softmax(Ylogits)

##### Loss Function, Accuracy, Optimizer, and Learning Rate Decay

In [11]:
import math

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

lr = 0.0001 +  tf.train.exponential_decay(0.003, step, 2000, 1/math.e)
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

##### Let's Rock

In [12]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [23]:
# What we care about:
# - a: accuracy
# - c: cross-entropy

def training_step(i, update_test_data, update_train_data):
    batch_X, batch_Y = mnist.train.next_batch(100)
    if update_train_data:
        a, c = sess.run([accuracy, cross_entropy],
                        feed_dict={X: batch_X, Y_: batch_Y, pkeep: 1.0, step: i})
    if update_test_data:
        a, c = sess.run([accuracy, cross_entropy],
                        feed_dict={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0})
        
        print(str(i) 
              + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) 
              + " ********* test accuracy:" + str(a) 
              + " test loss: " + str(c))
        
    sess.run(train_step, {X: batch_X, Y_: batch_Y, pkeep: 0.75, step: i})

for i in range(10000+1):
    training_step(i, i % 100 == 0, i % 20 == 0)

0: ********* epoch 1 ********* test accuracy:0.9813 test loss: 8.780099
100: ********* epoch 1 ********* test accuracy:0.9691 test loss: 13.901305
200: ********* epoch 1 ********* test accuracy:0.9699 test loss: 12.695408
300: ********* epoch 1 ********* test accuracy:0.9691 test loss: 12.054609
400: ********* epoch 1 ********* test accuracy:0.9692 test loss: 11.150914
500: ********* epoch 1 ********* test accuracy:0.9712 test loss: 11.232132
600: ********* epoch 2 ********* test accuracy:0.9743 test loss: 10.223649
700: ********* epoch 2 ********* test accuracy:0.9768 test loss: 10.650112
800: ********* epoch 2 ********* test accuracy:0.9758 test loss: 9.320734
900: ********* epoch 2 ********* test accuracy:0.9743 test loss: 10.853048
1000: ********* epoch 2 ********* test accuracy:0.9768 test loss: 10.330263
1100: ********* epoch 2 ********* test accuracy:0.975 test loss: 10.038742
1200: ********* epoch 3 ********* test accuracy:0.9754 test loss: 9.735613
1300: ********* epoch 3 ****

Result: 98% Accuracy