## Reference
- [TensorFlow Example - convolution network](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3%20-%20Neural%20Networks/convolutional_network.ipynb)

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import os

data_path = "./../data/MNIST/"
if not os.path.exists(data_path):
    os.makedirs(data_path)

mnist = input_data.read_data_sets(data_path, one_hot=True)

Extracting ./../data/MNIST/train-images-idx3-ubyte.gz
Extracting ./../data/MNIST/train-labels-idx1-ubyte.gz
Extracting ./../data/MNIST/t10k-images-idx3-ubyte.gz
Extracting ./../data/MNIST/t10k-labels-idx1-ubyte.gz


In [2]:
# Parameters
learning_rate = 0.01
num_iters = 3000
batch_size = 64
display_step = 20

geometry = [28, 28]
classes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
num_classes = len(classes)
dropout_prob = 0.75

In [3]:
# Tensor Flow Graph Input
X = tf.placeholder(tf.float32, [None, geometry[0]*geometry[1]])
y = tf.placeholder(tf.float32, [None, num_classes])
dropout = tf.placeholder(tf.float32)

# AlexNet Weight & bias
# 3x3 conv, 1 input, 64 outputs
Wc1 = tf.Variable(tf.random_normal([3, 3, 1, 64]))
bc1 = tf.Variable(tf.random_normal([64]))

# 3x3 conv, 64 input, 128 outputs
Wc2 = tf.Variable(tf.random_normal([3, 3, 64, 128]))
bc2 = tf.Variable(tf.random_normal([128]))

# 3x3 conv, 128 input, 256 outputs
Wc3 = tf.Variable(tf.random_normal([3, 3, 128, 256]))
bc3 = tf.Variable(tf.random_normal([256]))

# Fully connected (Standard 3-layer MLP), 4*4*256 input, 1024 
Wf1 = tf.Variable(tf.random_normal([4*4*256, 1024]))
bf1 = tf.Variable(tf.random_normal([1024]))

Wf2 = tf.Variable(tf.random_normal([1024, 1024]))
bf2 = tf.Variable(tf.random_normal([1024]))

Wout = tf.Variable(tf.random_normal([1024, num_classes]))
bout = tf.Variable(tf.random_normal([num_classes]))

In [4]:
# Convolution Network

# Reshape input picture
input_X = tf.reshape(X, shape=[-1, 28, 28, 1])

# Stage 1 : Convolution -> ReLU -> Max Pooling -> Local Response Normalization -> Dropout
conv1 = tf.nn.conv2d(input_X, Wc1, strides = [1, 1, 1, 1], padding='SAME')
conv1 = tf.nn.relu(tf.nn.bias_add(conv1, bc1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
conv1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
conv1 = tf.nn.dropout(conv1, dropout)

# Stage 2 : Convolution -> ReLU -> Max Pooling -> Local Response Normalization -> Dropout
conv2 = tf.nn.conv2d(conv1, Wc2, strides = [1, 1, 1, 1], padding='SAME')
conv2 = tf.nn.relu(tf.nn.bias_add(conv2, bc2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
conv2 = tf.nn.dropout(conv2, dropout)

# Stage 3 : Convolution -> ReLU -> Max Pooling -> Local Response Normalization -> Dropout
conv3 = tf.nn.conv2d(conv2, Wc3, strides = [1, 1, 1, 1], padding='SAME')
conv3 = tf.nn.relu(tf.nn.bias_add(conv3, bc3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.lrn(conv3, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm3')
conv3 = tf.nn.dropout(conv3, dropout)

# Stage 4 : Fully connected : Linear -> ReLU -> Linear
fc1 = tf.reshape(conv3, [-1, Wf1.get_shape().as_list()[0]])
fc1 = tf.nn.relu(tf.add(tf.matmul(fc1, Wf1), bf1))
fc2 = tf.nn.relu(tf.add(tf.matmul(fc1, Wf2), bf2))

out = tf.add(tf.matmul(fc2, Wout), bout)

In [5]:
# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [6]:
init = tf.global_variables_initializer()

# Launch the Graph
with tf.Session() as sess:
    sess.run(init)
    
    # Train
    for epoch in range(1, num_iters+1):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training data
        
        sess.run(optimizer, feed_dict={X: batch_xs, y: batch_ys, dropout: dropout_prob})
        
        if epoch % 100 == 0:
            loss = sess.run(cost, feed_dict={X: batch_xs, y: batch_ys, dropout: 1.})
            print("Epoch : ", epoch, " loss=" , loss)
    
    print("Optimization Finishied")
    
    # Test
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: mnist.test.images, 
                                                             y: mnist.test.labels, 
                                                             dropout: 1.}) )

Epoch :  100  loss= 6211.72
Epoch :  200  loss= 3085.72
Epoch :  300  loss= 1240.81
Epoch :  400  loss= 1410.34
Epoch :  500  loss= 1100.17
Epoch :  600  loss= 295.073
Epoch :  700  loss= 1361.19
Epoch :  800  loss= 604.806
Epoch :  900  loss= 1138.17
Epoch :  1000  loss= 585.26
Epoch :  1100  loss= 285.21
Epoch :  1200  loss= 138.941
Epoch :  1300  loss= 183.752
Epoch :  1400  loss= 246.955
Epoch :  1500  loss= 240.526
Epoch :  1600  loss= 111.65
Epoch :  1700  loss= 134.647
Epoch :  1800  loss= 129.82
Epoch :  1900  loss= 167.846
Epoch :  2000  loss= 331.514
Epoch :  2100  loss= 154.635
Epoch :  2200  loss= 297.247
Epoch :  2300  loss= 396.379
Epoch :  2400  loss= 19.6962
Epoch :  2500  loss= 243.327
Epoch :  2600  loss= 124.271
Epoch :  2700  loss= 121.217
Epoch :  2800  loss= 15.1604
Epoch :  2900  loss= 108.478
Epoch :  3000  loss= 247.452
Optimization Finishied
Testing Accuracy: 0.9412
