## a. Download CIFAR-10 dataset

In [None]:
import cPickle
import os
import tarfile
import tensorflow as tf

In [None]:
CIFAR_FILENAME = 'cifar-10-python.tar.gz'
CIFAR_DOWNLOAD_URL = 'http://www.cs.toronto.edu/~kriz/' + CIFAR_FILENAME
CIFAR_LOCAL_FOLDER = 'cifar-10-batches-py'

In [None]:
def _download_and_extract(data_dir):
  tf.contrib.learn.datasets.base.maybe_download(CIFAR_FILENAME, data_dir, CIFAR_DOWNLOAD_URL)
  tarfile.open(os.path.join(data_dir, CIFAR_FILENAME), 'r:gz').extractall(data_dir)

In [None]:
def _get_file_names():
  """Returns the file names expected to exist in the input_dir."""
  file_names = {}
  file_names['train'] = ['data_batch_%d' % i for i in xrange(1, 5)]
  file_names['validation'] = ['data_batch_5']
  file_names['eval'] = ['test_batch']
  return file_names

In [None]:
def _read_pickle_from_file(filename):
  with tf.gfile.Open(filename, 'r') as f:
    data_dict = cPickle.load(f)
  return data_dict

In [None]:
def _convert_to_tfrecord(input_files, output_file):
  """Converts a file to TFRecords."""
  print('Generating %s' % output_file)
  with tf.python_io.TFRecordWriter(output_file) as record_writer:
    for input_file in input_files:
      data_dict = _read_pickle_from_file(input_file)
      data = data_dict['data']
      labels =  data_dict['labels']
      num_entries_in_batch = len(labels)
      for i in range(num_entries_in_batch):
        example = tf.train.Example(features=tf.train.Features(
          feature={
            'image': _bytes_feature(data[i].tobytes()),
            'label': _int64_feature(labels[i])
          }))
        record_writer.write(example.SerializeToString())

In [None]:
def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
def _bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))

In [None]:
def create_tfrecord_files(data_dir='cifar-10'):
  _download_and_extract(data_dir)
  file_names = _get_file_names()
  input_dir = os.path.join(data_dir, CIFAR_LOCAL_FOLDER)

  for mode, files in file_names.items():
    input_files = [os.path.join(input_dir, f) for f in files]
    output_file = os.path.join(data_dir, mode+'.tfrecords')
    try:
      os.remove(output_file)
    except OSError:
      pass
    # Convert to tf.train.Example and write to TFRecords.
    _convert_to_tfrecord(input_files, output_file)

In [None]:
create_tfrecord_files()

## Steps to use the TF Estimator APIs
1. Define dataset **metadata** and **global constants**
2. Define **data input function** to read the data from the source + **apply pre-processing**
3. Create TF **feature columns** based on metadata + **extended feature columns**
4. Instantiate a **model function** with the required **feature columns, EstimatorSpecs, & parameters**
5. Define a **serving function**
6. Run **Experiment** by supplying training and validation data, as well as required parameters
7. **Evaluate** the model using test data
8. Perform **predictions**

In [None]:
import math
import shutil
import tensorflow as tf

from datetime import datetime
import re
from tensorflow.python.feature_column import feature_column

from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib.learn import make_export_strategy

print(tf.__version__)

In [None]:
train_data_files = [os.path.join('cifar-10', 'train.tfrecords')]
valid_data_files = [os.path.join('cifar-10', 'validation.tfrecords')]
test_data_files = [os.path.join('cifar-10', 'eval.tfrecords')]

## 1. Define dataset metadata and global constants

In [None]:
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_HEIGHT = 32
IMAGE_WIDTH = 32
IMAGE_DEPTH = 3

# Global constants describing the CIFAR-10 data set.
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

# If a model is trained with multiple GPUs, prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'

# We use a weight decay of 0.0002, which performs better than the 0.0001 that
# was originally suggested.
WEIGHT_DECAY = 2e-4
MOMENTUM = 0.9

# Global constants describing model behaviors
MODEL_NAME = 'cnn-model-01'
USE_CHECKPOINT = False

## 2. Define Data Input Function

### a. parsing CIFAR-10 dataset

In [None]:
def parse_record(serialized_example):
  features = tf.parse_single_example(
    serialized_example,
    features={
      'image': tf.FixedLenFeature([], tf.string),
      'label': tf.FixedLenFeature([], tf.int64),
    })
  
  image = tf.decode_raw(features['image'], tf.uint8)
  image.set_shape([IMAGE_DEPTH * IMAGE_HEIGHT * IMAGE_WIDTH])
  image = tf.reshape(image, [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
  image = tf.cast(tf.transpose(image, [1, 2, 0]), tf.float32)
  
  label = tf.cast(features['label'], tf.int32)
  label = tf.one_hot(label, NUM_CLASSES)

  return image, label

### b. preprocessing CIFAR-10 dataset

In [None]:
def preprocess_image(image, is_training=False):
  """Preprocess a single image of layout [height, width, depth]."""
  if is_training:
    # Resize the image to add four extra pixels on each side.
    image = tf.image.resize_image_with_crop_or_pad(
        image, IMAGE_HEIGHT + 8, IMAGE_WIDTH + 8)

    # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
    image = tf.random_crop(image, [IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH])

    # Randomly flip the image horizontally.
    image = tf.image.random_flip_left_right(image)

  # Subtract off the mean and divide by the variance of the pixels.
  image = tf.image.per_image_standardization(image)
  return image

### c. data pipeline input function

In [None]:
def generate_input_fn(file_names,
                      mode=tf.estimator.ModeKeys.EVAL,
                      num_epochs=None,
                      batch_size=1):

  def _input_fn():
    dataset = tf.data.TFRecordDataset(filenames=file_names)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    if is_training:
      buffer_size = batch_size * 2 + 1
      dataset = dataset.shuffle(buffer_size=buffer_size)

    dataset = dataset.map(parse_record)
    dataset = dataset.map(lambda image, label: (preprocess_image(image, is_training), label))

    dataset = dataset.prefetch(2 * batch_size)

    # We call repeat after shuffling, rather than before, to prevent separate
    # epochs from blending together.
    dataset = dataset.repeat(num_epochs)

    # Batch results by up to batch_size, and then fetch the tuple from the
    # iterator.
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    images, labels = iterator.get_next()

    features = {'images': images}
    return features, labels
  
  return _input_fn

## 3. Define Feature Columns

In [None]:
def get_feature_columns():
  feature_columns = {
    'images': tf.feature_column.numeric_column('images', (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)),
  }
  return feature_columns

In [None]:
feature_columns = get_feature_columns()
print("Feature Columns: {}".format(feature_columns))

## 4. Instantiate an Estimator

In [None]:
def _activation_summary(x):
  """Helper to create summaries for activations.
  Creates a summary that provides a histogram of activations.
  Creates a summary that measures the sparsity of activations.
  Args:
    x: Tensor
  Returns:
    nothing
  """
  # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
  # session. This helps the clarity of presentation on tensorboard.
  tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
  tf.summary.histogram(tensor_name + '/activations', x)
  tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
  
def _variable_on_cpu(name, shape, initializer):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    dtype = tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  return var

def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.
  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.
  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
  Returns:
    Variable Tensor
  """
  dtype = tf.float32
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var

In [None]:
def inference(images):
  with tf.variable_scope('conv1') as scope:
    kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv1)
    
  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
  
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)

  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
    
  with tf.variable_scope('local3') as scope:
    pool2_shape = pool2.get_shape()
    dim = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]
    reshape = tf.reshape(pool2, [-1, dim])
    weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    _activation_summary(local3)

  with tf.variable_scope('local4') as scope:
    weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    _activation_summary(local4)

  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
    logits = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
    _activation_summary(logits)

  return logits

In [None]:
def get_loss(logits, labels):
  # Calculate loss, which includes softmax cross entropy and L2 regularization.
  cross_entropy = tf.losses.softmax_cross_entropy(
    logits=logits, onehot_labels=labels)

  # Create a tensor named cross_entropy for logging purposes.
  tf.identity(cross_entropy, name='cross_entropy')
  tf.summary.scalar('cross_entropy', cross_entropy)

  # Add weight decay to the loss.
  loss = cross_entropy + WEIGHT_DECAY * tf.add_n(
      [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
  
  return loss

In [None]:
def get_train_op(loss, params, mode):
  if mode == tf.estimator.ModeKeys.TRAIN:
    # Scale the learning rate linearly with the batch size. When the batch size
    # is 128, the learning rate should be 0.1.
    initial_learning_rate = 0.1 * params.batch_size / 128
    batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / params.batch_size
    global_step = tf.train.get_or_create_global_step()

    # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
    boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
    values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
    learning_rate = tf.train.piecewise_constant(
        tf.cast(global_step, tf.int32), boundaries, values)

    # Create a tensor named learning_rate for logging purposes
    tf.identity(learning_rate, name='learning_rate')
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.MomentumOptimizer(
        learning_rate=learning_rate,
        momentum=MOMENTUM)

    # Batch norm requires update ops to be added as a dependency to the train_op
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step)
  else:
    train_op = None
    
  return train_op

In [None]:
def get_metrics(predictions, labels):
  # Calculate accuracy
  accuracy = tf.metrics.accuracy(predictions['classes'],
                                 tf.argmax(labels, axis=1))

  # Create a tensor named train_accuracy for logging purposes
  tf.identity(accuracy[1], name='train_accuracy')
  tf.summary.scalar('train_accuracy', accuracy[1])
  
  return {'accuracy': accuracy}

In [None]:
def model_fn(features, labels, mode, params):
  # Create the input layers from the features
  feature_columns = list(get_feature_columns().values())
  
  images = tf.feature_column.input_layer(
    features=features, feature_columns=feature_columns)
  
  images = tf.reshape(
    images, shape=(-1, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))

  # Calculate logits through CNN
  logits = inference(images)

  # Get predictions
  predictions = {
    'classes': tf.argmax(logits, axis=1),
    'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  }

  # Provide an estimator spec for `ModeKeys.PREDICT`
  if mode == tf.estimator.ModeKeys.PREDICT:
    export_outputs = {
      'predictions': tf.estimator.export.PredictOutput(predictions)
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      export_outputs=export_outputs)

  loss = get_loss(logits=logits, labels=labels)
  train_op = get_train_op(loss=loss, mode=mode, params=params)
  metrics = get_metrics(predictions=predictions, labels=labels)
  
  # Return EstimatorSpec
  return tf.estimator.EstimatorSpec(
    mode=mode,
    predictions=predictions,
    loss=loss,
    train_op=train_op,
    eval_metric_ops=metrics)

In [None]:
def create_estimator(run_config, hparams):
  return tf.estimator.Estimator(
      model_fn=model_fn,
      params=hparams,
      config=run_config)

## 5. Run Experiment

### a. Define Experiment Function

In [None]:
def generate_experiment_fn(**experiment_args):
  def _experiment_fn(run_config, hparams):
    return tf.contrib.learn.Experiment(
      estimator=create_estimator(run_config, hparams),
      train_input_fn=generate_input_fn(file_names=train_data_files,
                                       mode=tf.contrib.learn.ModeKeys.TRAIN,
                                       num_epochs=hparams.num_epochs,
                                       batch_size=hparams.batch_size),
      eval_input_fn=generate_input_fn(file_names=valid_data_files,
                                      mode=tf.contrib.learn.ModeKeys.EVAL,
                                      num_epochs=hparams.num_epochs,
                                      batch_size=hparams.batch_size),
      **experiment_args
    )
  
  return _experiment_fn  

### b. Set HParam and RunConfig

In [None]:
NUM_EPOCHS = 1
BATCH_SIZE = 200
TRAIN_SIZE = 50000
NUM_EVAL = 1
CHECKPOINT_STEPS = int((TRAIN_SIZE/BATCH_SIZE) * (NUM_EPOCHS/NUM_EVAL))

hparams = tf.contrib.training.HParams(
  num_epochs=NUM_EPOCHS,
  batch_size=BATCH_SIZE,
)

model_dir = 'trained_models/{}'.format(MODEL_NAME)

run_config = tf.contrib.learn.RunConfig(
  save_checkpoints_steps=CHECKPOINT_STEPS,
  tf_random_seed=19851211,
  model_dir=model_dir
)


print(hparams)
print("Model Directory:", run_config.model_dir)
print("")
print("Dataset Size:", TRAIN_SIZE)
print("Batch Size:", BATCH_SIZE)
print("Steps per Epoch:",TRAIN_SIZE/BATCH_SIZE)
print("Total Steps:", (TRAIN_SIZE/BATCH_SIZE)*NUM_EPOCHS)
print("Required Evaluation Steps:", NUM_EVAL) 
print("That is 1 evaluation step after each",NUM_EPOCHS/NUM_EVAL," epochs")
print("Save Checkpoint After",CHECKPOINT_STEPS,"steps")

### c. Define Serving Function

In [None]:
def serving_input_fn():

  receiver_tensor = {'images': tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32)}
  features = {'images': tf.map_fn(preprocess_image, receiver_tensor['images'])}
  
  return tf.estimator.export.ServingInputReceiver(features, receiver_tensor)

### d. Run Experiment via learn_runner

In [None]:
if not USE_CHECKPOINT:
  print("Removing previous artifacts...")
  shutil.rmtree(model_dir, ignore_errors=True)
else:
  print("Resuming training...")
  
tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow()
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................")

learn_runner.run(
  experiment_fn=generate_experiment_fn(
    export_strategies=[make_export_strategy(
        serving_input_fn,
        exports_to_keep=1
      )]
  ),
  schedule='train_and_evaluate',
  run_config=run_config,
  hparams=hparams
)

time_end = datetime.utcnow()
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))    

## 6. Evaluate the Model

In [None]:
train_size = 1000
valid_size = 1000
test_size = 1000

train_input_fn = generate_input_fn(file_names=train_data_files,
                                   mode=tf.contrib.learn.ModeKeys.TRAIN,
                                   num_epochs=None,
                                   batch_size=train_size)

valid_input_fn = generate_input_fn(file_names=valid_data_files,
                                   mode=tf.contrib.learn.ModeKeys.EVAL,
                                   num_epochs=None,
                                   batch_size=valid_size)

test_input_fn = generate_input_fn(file_names=test_data_files,
                                  mode=tf.contrib.learn.ModeKeys.EVAL,
                                  num_epochs=None,
                                  batch_size=test_size)


estimator = create_estimator(run_config, hparams)

train_results = estimator.evaluate(input_fn=train_input_fn, steps=1)
print()
print("######################################################################################")
print("# {}".format(train_results))
print("######################################################################################")

valid_results = estimator.evaluate(input_fn=valid_input_fn, steps=1)
print()
print("######################################################################################")
print("# {}".format(valid_results))
print("######################################################################################")

test_results = estimator.evaluate(input_fn=test_input_fn, steps=1)
print()
print("######################################################################################")
print("# {}".format(test_results))
print("######################################################################################")

## 7. Prediction

In [None]:
import os
import numpy as np

export_dir = model_dir + '/export/Servo/'
saved_model_dir = os.path.join(export_dir, os.listdir(export_dir)[-1]) 

print(saved_model_dir)
print('')

predictor_fn = tf.contrib.predictor.from_saved_model(
  export_dir = saved_model_dir,
  signature_def_key='predictions')

N = 1000
images = []
labels = []

with tf.gfile.Open('cifar-10/cifar-10-batches-py/test_batch', 'r') as f:
  eval_data = cPickle.load(f)
  
for i in range(N):
  x = np.random.randint(10000)
  image = eval_data['data'][x]
  image = np.reshape(image, [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
  image = np.transpose(image, [1, 2, 0])
  images.append(image)
  labels.append(eval_data['labels'][x])

output = predictor_fn(
  {
    'images': images,
  }
)

In [None]:
np.sum([a==r for a, r in zip(labels, output['classes'])]) / float(N)

## 8. TensorBoard

In [None]:
from google.datalab.ml import TensorBoard
TensorBoard().start(model_dir)

In [None]:
for pid in TensorBoard.list()['pid']:
    TensorBoard().stop(pid)
    print 'Stopped TensorBoard with pid {}'.format(pid)

## (Optional) Using tf.keras for the Inference Part

In [None]:
def inference(images, mode):
  if mode == tf.estimator.ModeKeys.TRAIN:
    tf.keras.backend.set_learning_phase(True)
  else:
    tf.keras.backend.set_learning_phase(False)
        
  model = tf.keras.models.Sequential()
  # Define input tensor in Keras world.
  model.add(tf.keras.layers.InputLayer(input_tensor=images))
    
  # The first convolutional layer.
  model.add(tf.keras.layers.Conv2D(
      filters=32, kernel_size=(3, 3), padding='same', activation='relu'))
  model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), padding='same'))

  # The second convolutional layer.
  model.add(tf.keras.layers.Conv2D(
      filters=32, kernel_size=(3, 3), padding='same', activation='relu'))
  model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), padding='same'))
  model.add(tf.keras.layers.Dropout(0.25))
    
  # The third convolutional layer
  model.add(tf.keras.layers.Conv2D(
      filters=64, kernel_size=(3, 3), padding='same', activation='relu'))
    
  # The fourth convolutional layer
  model.add(tf.keras.layers.Conv2D(
      filters=64, kernel_size=(3, 3), padding='same', activation='relu'))
  model.add(tf.keras.layers.Dropout(0.25))
    
  model.add(tf.keras.layers.Flatten())
    
  model.add(tf.keras.layers.Dense(512, activation='relu'))
  model.add(tf.keras.layers.Dropout(0.5))

  model.add(tf.keras.layers.Dense(NUM_CLASSES))
    
  logits = model.output
    
  return logits