# MNIST with Estimator and Experiment

Demonstrates how to implement different image models on MNIST using Estimator/Experiment.

## Loading data

Let's download MNIST data and examine the shape. We will need these numbers ...

In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('mnist/data', one_hot=True, reshape=False)
print mnist.train.images.shape
print mnist.train.labels.shape

In [None]:
HEIGHT=28
WIDTH=28
NCLASSES=10

## Define the model.
Let's start with a very simple linear classifier. All our models will have this basic interface -- they will take an image and return logits.

In [None]:
def linear_model(img):
  X = tf.reshape(img, [-1, HEIGHT*WIDTH]) # flattened
  #W = tf.Variable(tf.zeros([HEIGHT*WIDTH, NCLASSES]))
  #b = tf.Variable(tf.zeros([NCLASSES]))
  W = tf.Variable(tf.truncated_normal([HEIGHT*WIDTH, NCLASSES], stddev=0.1))
  b = tf.Variable(tf.truncated_normal([NCLASSES], stddev=0.1))
  ylogits = tf.matmul(X, W) + b
  return ylogits, NCLASSES

## Write the harness

As usual, we need to specify input functions for training, evaluation, and predicition.

In [None]:
def make_train_input_fn(mnist):
  def input_fn():
    features, labels = tf.train.shuffle_batch([tf.constant(mnist.train.images), tf.constant(mnist.train.labels)],
                                            batch_size=100, capacity=5000, min_after_dequeue=2000, enqueue_many=True)
    features = {'image': features}
    return features, labels
  return input_fn

def make_eval_input_fn(mnist):
  def input_fn():
    features, labels = tf.constant(mnist.test.images), tf.constant(mnist.test.labels)
    features = {'image': features}
    return features, labels
  return input_fn

def serving_input_fn():
    inputs = {'image': tf.placeholder(tf.float32, [None, HEIGHT, WIDTH])}
    features = inputs # as-is
    return tf.estimator.export.ServingInputReceiver(features, inputs)

I could have simply used a LinearClassifier, but later on, I will not want to use different models, and so let's write a custom estimator

In [None]:
def image_classifier(features, labels, mode, params):
  ylogits, nclasses = linear_model(features['image'])
  probabilities = tf.nn.softmax(ylogits)
  classes = tf.cast(tf.argmax(probabilities, 1), tf.uint8)
  if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=ylogits, labels=tf.one_hot(labels, nclasses)))
    evalmetrics =  {'accuracy': tf.metrics.accuracy(classes, labels)}
    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = tf.contrib.layers.optimize_loss(loss, tf.train.get_global_step(),
                                                 learning_rate=params['learning_rate'], optimizer="Adam")
    else:
      train_op = None
  else:
    loss = None
    train_op = None
    evalmetrics = None
 
  return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={"probabilities": probabilities, "classes": classes},
        loss=loss,
        train_op=train_op,
        eval_metric_ops=evalmetrics,
        export_outputs={'classes': tf.estimator.export.PredictOutput({"probabilities": probabilities, "classes": classes})}
    )

def create_custom_estimator(output_dir, hparams):
  training_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=None,
                                               save_checkpoints_steps=hparams['train_steps']/5)
  return tf.estimator.Estimator(model_fn=image_classifier, model_dir=output_dir, 
                                config=training_config, params=hparams)

Experiment is the class that does distributed training.

In [None]:
def make_experiment_fn(output_dir, data_dir, hparams):
  def experiment_fn(output_dir):
    mnist = input_data.read_data_sets(data_dir, reshape=False)  
    return tf.contrib.learn.Experiment(
      estimator=create_custom_estimator(output_dir, hparams),
      train_input_fn=make_train_input_fn(mnist),
      eval_input_fn=make_eval_input_fn(mnist),
      train_steps=hparams['train_steps'],
      eval_steps=1,
      min_eval_frequency=min(100,hparams['train_steps']/10),
      export_strategies=tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(serving_input_fn=serving_input_fn)
    )
  return experiment_fn

This is the main() function

In [None]:
output_dir='mnist/learned'
hparams = {'train_steps': 100, 'learning_rate': 0.01}
tf.contrib.learn.learn_runner.run(make_experiment_fn(output_dir, 'mnist/data', hparams), output_dir)

## Run as a Python module

Let's run it as Python module

In [None]:
%bash
rm -rf mnistmodel.tar.gz mnist_trained
export PYTHONPATH=${PYTHONPATH}:${PWD}/mnistmodel
python -m trainer.task \
   --output_dir=${PWD}/mnist_trained \
   --train_steps=100 --learning_rate=0.01 --job-dir=./tmp

Now, let's do it on ML Engine

In [None]:
%bash
BUCKET=cloud-training-demos-ml
REGION=us-central1
OUTDIR=gs://${BUCKET}/mnist/trained
JOBNAME=mnist_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
   --region=$REGION \
   --module-name=trainer.task \
   --package-path=${PWD}/mnistmodel/trainer \
   --job-dir=$OUTDIR \
   --staging-bucket=gs://$BUCKET \
   --scale-tier=BASIC_GPU \
   -- \
   --train_steps=100 --learning_rate=0.01

<pre>
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
</pre>