In [16]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [17]:
import tensorflow as tf
sess = tf.InteractiveSession()

In [18]:
#The input images x will consist of a 2d tensor of floating point numbers. 
#Here we assign it a shape of [None, 784], where 784 is the dimensionality of a single flattened 28 by 28 pixel MNIST image,
#and None indicates that the first dimension, corresponding to the batch size, can be of any size. 
x = tf.placeholder(tf.float32, shape=[None, 784])

#The target output classes y_ will also consist of a 2d tensor, 
# where each row is a one-hot 10-dimensional vector indicating which digit class (zero through nine) the corresponding MNIST image belongs to.
y_ = tf.placeholder(tf.float32, shape=[None, 10])

In [19]:
# weights W and biases b for our model.

# We pass the initial value for each parameter in the call to tf.Variable.
# In this case, we initialize both W and b as tensors full of zeros.
# W is a 784x10 matrix (because we have 784 input features and 10 outputs)
W = tf.Variable(tf.zeros([784,10]))

# b is a 10-dimensional vector (because we have 10 classes).
b = tf.Variable(tf.zeros([10]))

In [20]:
# Before Variables can be used within a session, they must be initialized using that session. 
# This step takes the initial values (in this case tensors full of zeros) that have already been specified, 
# and assigns them to each Variable.
# This can be done for all Variables at once:
sess.run(tf.global_variables_initializer())

In [21]:
#Predicted Class and Loss Function

# We can now implement our regression model.
# It only takes one line! We multiply the vectorized input images x by the weight matrix W, add the bias b.
y = tf.matmul(x,W) + b

In [24]:
# We can specify a loss function just as easily. 
# Loss indicates how bad the model's prediction was on a single example; 
# we try to minimize that while training across all the examples. 
# Here, our loss function is the cross-entropy between the target and the softmax activation function applied to the model's prediction.
# As in the beginners tutorial, we use the stable formulation:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))

#Note that tf.nn.softmax_cross_entropy_with_logits internally applies the softmax on the model's unnormalized model prediction and sums across all classes, and tf.reduce_mean takes the average over these sums.


In [25]:
# Train the Model
# Now that we have defined our model and training loss function, 
# it is straightforward to train using TensorFlow.
# Because TensorFlow knows the entire computation graph, it can use automatic differentiation to find the gradients of the loss with respect to each of the variables. 
# TensorFlow has a variety of built-in optimization algorithms .
# For this example, we will use steepest gradient descent, with a step length of 0.5, to descend the cross entropy.

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)


In [27]:
#The returned operation train_step, when run, will apply the gradient descent updates to the parameters. 
# Training the model can therefore be accomplished by repeatedly running train_step.
for i in range(1000):
  batch = mnist.train.next_batch(100)
  train_step.run(feed_dict={x: batch[0], y_: batch[1]})

In [30]:
#Evaluate the Model

#How well did our model do?

# First we'll figure out where we predicted the correct label. tf.argmax is an extremely useful function which gives you the index of the highest entry in a tensor along some axis. For example, tf.argmax(y,1) is the label our model thinks is most likely for each input, while tf.argmax(y_,1) is the true label. We can use tf.equal to check if our prediction matches the truth.
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
#That gives us a list of booleans.
#To determine what fraction are correct, we cast to floating point numbers and then take the mean. For example, [True, False, True, True] would become [1,0,1,1] which would become 0.75.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [31]:
#Finally, we can evaluate our accuracy on the test data. This should be about 92% correct.
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9214
