In [1]:
import tensorflow as tf
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from six.moves import xrange
import os.path
import time

from tensorflow.examples.tutorials.mnist import input_data

In [2]:
# The MNIST dataset has 10 classes, representing the digits 0 through 9.
NUM_CLASSES = 10

# The MNIST images are always 28x28 pixels.
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE

In [3]:
def _inference(images, hidden1_units, hidden2_units):
    """Build MNIST model
    
    Args:
    images: Images placeholder, from inputs().
    hidden1_units: Size of the first hidden layer.
    hidden2_units: Size of the second hidden layer.
    
    Returns:
    softmax_linear: Output tensor with the computed logits.
    """
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape,
                                      stddev=1.0 / math.sqrt(float(shape[0])))
        return tf.Variable(initial, name='weights')

    def bias_variable(shape):
        #initial = tf.zeros(shape)
        initial = tf.constant(0.1, shape=shape) # prevent dead neural on initialization
        return tf.Variable(initial, name='biases')
    
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights = weight_variable([IMAGE_PIXELS, hidden1_units]) 
        biases = bias_variable([hidden1_units])
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
        
        # Add summary ops to collect data
        _ = tf.histogram_summary('weights1', weights)
        _ = tf.histogram_summary('biases1', biases)
        _ = tf.histogram_summary('hidden1', hidden1)
    
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights = weight_variable([hidden1_units, hidden2_units]) 
        biases = bias_variable([hidden2_units])
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
        
        # Add summary ops to collect data
        _ = tf.histogram_summary('weights2', weights)
        _ = tf.histogram_summary('biases2', biases)
        _ = tf.histogram_summary('hidden2', hidden2)
        
    # Linear
    with tf.name_scope('softmax_linear'):
        weights = weight_variable([hidden2_units, NUM_CLASSES]) 
        biases = bias_variable([NUM_CLASSES])
        logits = tf.matmul(hidden2, weights) + biases # feedforward score
        
        # Add summary ops to collect data
        _ = tf.histogram_summary('weights3', weights)
        _ = tf.histogram_summary('biases3', biases)
        _ = tf.histogram_summary('logits', logits)      

    
    return logits

In [4]:
def _loss(logits, labels):
    """Calculates the loss.
    
    Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size].
    
    Returns:
    loss: Loss tensor of type float.
    """
    # Convert from sparse integer labels in the range [0, NUM_CLASSES)
    # to 1-hot dense float vectors.
    
    batch_size = tf.size(labels)
    labels = tf.expand_dims(labels, 1)
    indices = tf.expand_dims(tf.range(0, batch_size), 1)
    concated = tf.concat(1, [indices, labels])
    
    onehot_labels = tf.sparse_to_dense(concated,
                                       tf.pack([batch_size, NUM_CLASSES]),
                                       1.0, 0.0)
    
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                            onehot_labels,
                                                            name='xentropy')
    
    with tf.name_scope('xent'):
        loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
        #_ = tf.scalar_summary('cross entropy', loss)
    return loss

In [5]:
def _training(loss, learning_rate):
    """Sets up the training Ops.
    
    Args:
    loss: Loss tensor.
    learning_rate: The learning rate to use for gradient descent.
    
    Returns:
    train_op: The Op for training.
    """
    # summarizer to track the loss over time in TensorBoard.
    tf.scalar_summary(loss.op.name, loss)
    
    # gradient descent optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    #optimizer = tf.train.AdamOptimizer(learning_rate)
    
    # track global step.
    global_step = tf.Variable(0, name='global_step', trainable=False)
    
    # single training step.
    with tf.name_scope('train'):
        train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op

In [6]:
def _evaluation(logits, labels):
    """Evaluate the quality of the predictions.
    
    Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size], with values in the
      range [0, NUM_CLASSES).
      
    Returns:
    A scalar int32 tensor with the number of examples
    that were predicted correctly.
    """
    # returns a bool tensor with shape [batch_size]
    correct = tf.nn.in_top_k(logits, labels, 1)
    # Return the number of true entries.
    with tf.name_scope('test'):
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        _ = tf.scalar_summary('accuracy', accuracy)
    return tf.reduce_sum(tf.cast(correct, tf.int32))

In [7]:
def placeholder_inputs(batch_size):
    """Generate placeholder variables to represent the input tensors.
    
    Args:
    batch_size: The batch size.
    
    Returns:
    images_placeholder: Images placeholder.
    labels_placeholder: Labels placeholder.
    """
    images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
    return images_placeholder, labels_placeholder

In [8]:
def fill_feed_dict(data_set, images_pl, labels_pl):
    """Fills the feed_dict for training the given step.

    Args:
        data_set: The set of images and labels, from input_data.read_data_sets()
        images_pl: The images placeholder, from placeholder_inputs().
        labels_pl: The labels placeholder, from placeholder_inputs().
        
    Returns:
        feed_dict: The feed dictionary mapping from placeholders to values.
      """
    images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
                                                   FLAGS.fake_data)
    feed_dict = {
        images_pl: images_feed,
        labels_pl: labels_feed,
        }
    return feed_dict

In [9]:
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_set):
    """Runs one evaluation against the full epoch of data.
    
    Args:
    sess: The session in which the model has been trained.
    eval_correct: The Tensor that returns the number of correct predictions.
    images_placeholder: The images placeholder.
    labels_placeholder: The labels placeholder.
    data_set: The set of images and labels to evaluate, from
      input_data.read_data_sets().
    """
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = data_set.num_examples // FLAGS.batch_size
    num_examples = steps_per_epoch * FLAGS.batch_size
    
    for step in xrange(steps_per_epoch):
        feed_dict = fill_feed_dict(data_set,
                               images_placeholder,
                               labels_placeholder)
        true_count += sess.run(eval_correct, feed_dict=feed_dict)
    precision = true_count / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
        (num_examples, true_count, precision))

In [10]:
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = _inference(images_placeholder,
                                 FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = _loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = _training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = _evaluation(logits, labels_placeholder)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Run the Op to initialize the variables.
        init = tf.initialize_all_variables()
        sess.run(init)

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter('logs/',
                                                graph_def=sess.graph_def)

        # And then after everything is built, start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train,
                                     images_placeholder,
                                     labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss],
                                   feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                saver.save(sess, FLAGS.train_dir, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess,
                        eval_correct,
                        images_placeholder,
                        labels_placeholder,
                        data_sets.test)

In [11]:
# Basic model parameters as external flags.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 2000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.  '
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'data', 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
                     'for unit testing.')

In [12]:
run_training()

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz
Step 0: loss = 2.31 (0.012 sec)
Step 100: loss = 2.00 (0.006 sec)
Step 200: loss = 1.58 (0.006 sec)
Step 300: loss = 1.27 (0.005 sec)
Step 400: loss = 1.15 (0.005 sec)
Step 500: loss = 0.73 (0.005 sec)
Step 600: loss = 0.82 (0.006 sec)
Step 700: loss = 0.69 (0.005 sec)
Step 800: loss = 0.51 (0.008 sec)
Step 900: loss = 0.43 (0.005 sec)
Training Data Eval:
  Num examples: 55000  Num correct: 47953  Precision @ 1: 0.8719
Validation Data Eval:
  Num examples: 5000  Num correct: 4386  Precision @ 1: 0.8772
Test Data Eval:
  Num examples: 10000  Num correct: 8759  Precision @ 1: 0.8759
Step 1000: loss = 0.46 (0.021 sec)
Step 1100: loss = 0.48 (0.117 sec)
Step 1200: loss = 0.38 (0.006 sec)
Step 1300: loss = 0.43 (0.006 sec)
Step 1400: loss = 0.31 (0.005 sec)
Step 1500: loss = 0.30 (0.005 sec)
Step 1600: loss = 0.31 (0.006 se