In [None]:
# Last tutorial we created the entire graph and then launched the session. 
# There is also an interactive session, which I think is useful while you are putting
# together a new model.  You can run your graph in parts as you add to it, which
# makes it useful for debugging and experimenting.

# In this tutorial, we'll use the interactive graph to 'simulate' how we would
# experiment with and assemble a model a piece at a time.  Once we have finished
# putting the model together we re-assemble it "for real" and run it in a single sesion.

In [None]:
# First we will load the data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

In [None]:
# Start the interactive session
import tensorflow as tf
sess = tf.InteractiveSession()

In [None]:
# Remember that placeholders are promises to provide data.
# First we'll make placeholders for the input data and the output.
# The input data are 784 pixel images, and the output will be a 10
# dimensional vector, each corresponding to a number 0-9.

x = tf.placeholder(tf.float32, shape=[None, 784])  # Pixel intensities
y_ = tf.placeholder(tf.float32, shape=[None, 10])  # Training labels

# None lets us put any size in that dimension of x and y.  So each image
# will have 784 pixels, and occupies a single row in the tensor.  Since we
# did not specify the number of rows, we are free to provide as many as
# we want.

In [None]:
# Some helper functions for building the neural network.  These just create variables
# of a certain shape and randomly initialize them.

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [None]:
# Now we will initialize the weights.  This will be variables, not
# placeholders, since they will be updated by tensorflow.  Parameters to be
# learned are typically variables in tensorflow.

hidden_nodes = 25

W = weight_variable([784,hidden_nodes])  # A 784 x 10 matrix
b = bias_variable([hidden_nodes])

h = tf.nn.relu(tf.matmul(x, W) + b)

W_output = weight_variable([hidden_nodes, 10])
b_output = bias_variable([10])

# We are implementing a softmax model.  Think of it like logistic regression
# on more dimensions than 2.  
#
#                             P(y = j | x) = exp(x*w_j) / sum(exp(x*W + b))
#
sess.run(tf.global_variables_initializer())

In [None]:
# Calculate the unscaled logits.  This is what Tensorflow wants, so this is
# what tensorflow gets.  I guess that unscaled means that we don't exponentiate
# the output either.  The tensorflow function we are about to call takes care 
# of all this.  I have to say the tutorial/documentation are opaque here.

y = tf.matmul(h,W_output) + b_output

# cross_entropy is not a good name for this function.  We should call it loss, but
# I am going to leave it like this to match the tutorial.

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))   

# tf.nn.softmax_etc calculates the cross entropy of one sample.  We take the mean 
# because that is convention.  
# See https://en.wikipedia.org/wiki/Cross_entropy#Cross-entropy_error_function_and_logistic_regression

In [None]:
# Now we define a train_step for the model.  Just one step.
# Minimize loss function using gradient descent.

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

In [None]:
# Now run the train step 1000 times and see where we are at.

for _ in range(1000):
  batch = mnist.train.next_batch(100)  # Grab 100 training instsances.
  train_step.run(feed_dict={x: batch[0], y_: batch[1]}) # Give the training batch to x and y_

In [None]:
# Now that the model has been run, we evaluate the models' performance.
#
# The correct way to do this is to add ops to the graph and then execute all at once.
# One complaint I have about this is that it makes it difficult to build your code in parts...
# Even with the interactive session, it is unwieldy to see how tensors move through the
# graph.

# First, we get the predicted labels for each label for each model and compare it to the 
# correct label.

correct_label = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_,1)) # This returns integers I guess.

accuracy = tf.reduce_mean(tf.cast(correct_label, tf.float32))  # Reduce mean needs floats, so cast to float32

print(accuracy.eval(feed_dict={x:mnist.test.images, y_:mnist.test.labels}))
print(sess.run(accuracy, feed_dict={x:mnist.test.images, y_:mnist.test.labels}))