In [None]:
# Simple Convolutional network for MNIST by aluo

%matplotlib inline
# Inline plotting

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import some stuff from future, or else the code doesn't work
from IPython import display

import argparse
import sys
import matplotlib.pyplot as plt

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
# Use the tensorflow MNIST downloader

import tensorflow as tf
FLAGS = None

In [None]:
def main(_):
    train_phase = tf.placeholder(tf.bool)
    # The phase can be either training (network is learning) or inference (network is being used)
    # So the train_phase is either true or false
    # This will affect the application of batchnorm layers (not used) and dropout layers
    
    x = tf.placeholder(tf.float32, [None, 784])
    # Set a placeholder to take in input
    # Shape of input is a flattened 28 x 28 image, so 784 values
    
    digit_img = tf.reshape(x[0], [28, 28])
    
    x_img = tf.reshape(x, [-1, 28, 28, 1])
    # Reshape it into batchsize x 28 x 28 x 1 (because black & white)
    
    l_conv1 = tf.layers.conv2d(x_img, filters=32, kernel_size=(5, 5), padding='SAME', activation=tf.nn.relu)
    # First layer outputs 32 feature maps and applies relu
    
    l_conv2 = tf.layers.conv2d(l_conv1, filters=64, kernel_size=(5, 5), padding='SAME', activation=tf.nn.relu)
    # Second layer is a outputs 64 feature maps and applies relu
    
    l_norm1 = tf.layers.batch_normalization(l_conv2, training=train_phase)
    # Apply batch norm
    
    l_pool1 = tf.layers.max_pooling2d(l_norm1, pool_size=[2, 2], strides=2)
    # Apply pooling, reduces images size by factor 2 on each side, now is 14 x 14 x 64
    
    l_conv3 = tf.layers.conv2d(l_pool1, filters=32, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu)
    # Third layer is a outputs 32 feature maps and applies relu
    
    l_conv4 = tf.layers.conv2d(l_conv3, filters=16, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu)
    # Fourth layer is a outputs 16 feature maps and applies relu
    
    l_pool2 = tf.layers.max_pooling2d(l_conv4, pool_size=[2, 2], strides=2)
    # Pool again, now is 7 x 7 x 16
    
    l_flat1 = tf.reshape(l_pool2, [-1, 7 * 7 * 16])
    # Flatten the output of the previous layer
    
    l_dense1 = tf.layers.dense(l_flat1, units=512, activation=tf.nn.relu)
    # Dense layer with 512 neurons
    
    l_dropout1 = tf.layers.dropout(inputs=l_dense1, rate=0.25, training=train_phase)
    # Dropout 25%
    
    y_output = tf.layers.dense(l_dense1, units=10, activation=tf.nn.relu)
    # Finally, make sure the output layer has 10 neurons (corresponding to the numbers from 0...9)
    
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
    # Import data, set the labels to be a one-hot encoding
    # So an image that represents 2
    # Will have a label that is [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    # Remember 0 is the first digit!
    
    y_ = tf.placeholder(tf.float32, [None, 10])
    # We define a place to input the label into the graph
    
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_output))
    # Define a loss using cross-entropy 
    # Basically measures how different is the predicted label is from the actual label
    # Take a mean across all 10 digits, could potentially also use a sum
    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # Ported over from conv example
    # Update Ops are required for batch_norm to work (moving average for batch mean & std)
    
    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        # Choose how we want to optimize (Adam is usually a good choice)
        # And choose the loss that we want to minimize

    y_output_digit = tf.argmax(y_output, 1)
    y_digit = tf.argmax(y_, 1)
    correct_prediction = tf.equal(y_output_digit, y_digit)
    # Take the maximum probability as the predicted digits
    # For example, if the network predicts [0.1, 0.8, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    # Then we say that the network predicts the image represents a 1
    # We then compare this to y_ (the actual digit)
    # If they are the same, we count it as a correct prediction
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # The accuracy is simply the correction predictions as a ratio of total predictions (batch size)
    step_holder = []
    accuracy_holder = []
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # Initialize the weights, offsets etc.
        
        for i in range(20000):
            batch = mnist.train.next_batch(32)
            # Get 32 images
            
            train_step.run(feed_dict={x: batch[0], y_: batch[1], train_phase: True})
            # Run a round of optimizations 
            
            if i % 15 == 0:
                # Every 15 batches, we check on the accuracy
                step_holder.append(i)
                # Keep track of the steps
                
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], train_phase: False})
                # Calculate the training_accuracy for the current batch
                
                accuracy_holder.append(train_accuracy)
                # Keep track of the current accuracy
                
                display.clear_output(wait=True)
                print('step {}, training accuracy {}'.format(i, train_accuracy))
                plt.subplot(1, 2, 1)
                plt.imshow(digit_img.eval(feed_dict={x:batch[0]}))
                plt.title("Predicted digit: {}".format(y_output_digit.eval(feed_dict={x:batch[0],train_phase: False})[0]))
                plt.subplot(1, 2, 2)
                plt.plot(step_holder, accuracy_holder)
                plt.title("Accuracy vs training batches")
                # Plotting stuff
                
                display.display(plt.gcf())
                plt.gcf().clear()

In [None]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', help='Directory for storing input data')
    FLAGS, unparsed = parser.parse_known_args()
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)