In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import tensorflow as tf

import utils

  return f(*args, **kwds)


In [4]:
#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
#WORK_DIRECTORY = 'data'
#IMAGE_SIZE = 28
#NUM_CHANNELS = 1
#PIXEL_DEPTH = 255
#NUM_LABELS = 10
#VALIDATION_SIZE = 5000  # Size of the validation set.
#SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100  # Number of steps between evaluations.
DATASET = 'cifar10'

NUM_UNROLL_STEPS = 5

In [5]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        
        gated = tf.multiply(pool2_flat, gates)
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [9]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 12.6 ms
Minibatch loss: 10.663, learning rate: 0.001000
Minibatch error: [76.5625, 75.0, 76.5625, 76.5625, 75.0]
Validation error: [89.6, 89.6, 89.6, 89.6, 89.6]
Step 100 (epoch 0.14), 61.4 ms
Minibatch loss: 8.181, learning rate: 0.001000
Minibatch error: [59.375, 62.5, 57.8125, 64.0625, 62.5]
Validation error: [56.46, 56.38, 56.38, 56.38, 56.38]
Step 200 (epoch 0.28), 61.3 ms
Minibatch loss: 6.782, learning rate: 0.001000
Minibatch error: [43.75, 46.875, 46.875, 51.5625, 46.875]
Validation error: [50.7, 50.6, 50.6, 50.6, 50.6]
Step 300 (epoch 0.43), 61.3 ms
Minibatch loss: 6.660, learning rate: 0.001000
Minibatch error: [54.6875, 53.125, 56.25, 50.0, 56.25]
Validation error: [46.14, 46.36, 46.36, 46.36, 46.36]
Step 400 (epoch 0.57), 61.3 ms
Minibatch loss: 6.478, learning rate: 0.001000
Minibatch error: [45.3125, 43.75, 45.3125, 42.1875, 39.0625]
Validation error: [43.8, 43.66, 43.66, 43.68, 43.68]
Step 500 (epoch 0.71), 61.3 ms
Minibatch loss: 5.537

Step 3700 (epoch 5.26), 61.3 ms
Minibatch loss: 1.004, learning rate: 0.000774
Minibatch error: [4.6875, 7.8125, 4.6875, 4.6875, 4.6875]
Validation error: [28.060000000000002, 28.120000000000005, 28.08, 28.0, 28.08]
Step 3800 (epoch 5.40), 61.5 ms
Minibatch loss: 0.872, learning rate: 0.000774
Minibatch error: [12.5, 4.6875, 4.6875, 1.5625, 1.5625]
Validation error: [29.14, 29.120000000000005, 29.200000000000003, 29.14, 29.019999999999996]
Step 3900 (epoch 5.55), 61.7 ms
Minibatch loss: 0.764, learning rate: 0.000774
Minibatch error: [7.8125, 6.25, 4.6875, 3.125, 3.125]
Validation error: [27.040000000000006, 27.099999999999994, 27.0, 27.040000000000006, 27.040000000000006]
Step 4000 (epoch 5.69), 61.6 ms
Minibatch loss: 0.764, learning rate: 0.000774
Minibatch error: [9.375, 3.125, 3.125, 3.125, 1.5625]
Validation error: [28.939999999999998, 29.019999999999996, 28.900000000000006, 29.0, 29.019999999999996]
Step 4100 (epoch 5.83), 61.6 ms
Minibatch loss: 0.809, learning rate: 0.000774
M

In [9]:
print("".join(stdout_lines))

Step 0 (epoch 0.00), 2.8 ms
Minibatch loss: 10.917, learning rate: 0.001000
Minibatch error: [79.6875, 84.375, 81.25, 84.375, 82.8125]
Validation error: [90.4, 90.4, 90.4, 90.4, 90.4]
Step 100 (epoch 0.12), 48.8 ms
Minibatch loss: 0.255, learning rate: 0.001000
Minibatch error: [1.5625, 0.0, 0.0, 1.5625, 0.0]
Validation error: [4.659999999999997, 4.719999999999999, 4.700000000000003, 4.700000000000003, 4.700000000000003]
Step 200 (epoch 0.23), 48.8 ms
Minibatch loss: 0.653, learning rate: 0.001000
Minibatch error: [4.6875, 7.8125, 4.6875, 6.25, 4.6875]
Validation error: [2.760000000000005, 2.739999999999995, 2.739999999999995, 2.739999999999995, 2.739999999999995]
Step 300 (epoch 0.35), 48.8 ms
Minibatch loss: 0.530, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [2.519999999999996, 2.5, 2.5, 2.5, 2.5]
Step 400 (epoch 0.47), 48.8 ms
Minibatch loss: 0.670, learning rate: 0.001000
Minibatch error: [4.6875, 6.25, 3.125, 4.6875, 4.6875]
Valid

### Introduced G

In [10]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [11]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 4.4 ms
Minibatch loss: 10.496, learning rate: 0.001000
Minibatch error: [75.0, 76.5625, 76.5625, 76.5625, 71.875]
Validation error: [88.03999999999999, 88.08, 88.08, 88.08, 88.08]
Step 100 (epoch 0.14), 66.1 ms
Minibatch loss: 8.098, learning rate: 0.001000
Minibatch error: [57.8125, 59.375, 59.375, 51.5625, 59.375]
Validation error: [55.48, 56.18, 56.02, 56.14, 56.1]
Step 200 (epoch 0.28), 65.9 ms
Minibatch loss: 6.920, learning rate: 0.001000
Minibatch error: [45.3125, 48.4375, 45.3125, 48.4375, 43.75]
Validation error: [49.82, 50.14, 50.56, 50.66, 50.66]
Step 300 (epoch 0.43), 66.1 ms
Minibatch loss: 6.547, learning rate: 0.001000
Minibatch error: [56.25, 46.875, 45.3125, 48.4375, 45.3125]
Validation error: [46.66, 46.04, 46.12, 46.08, 46.08]
Step 400 (epoch 0.57), 66.2 ms
Minibatch loss: 5.868, learning rate: 0.001000
Minibatch error: [40.625, 40.625, 40.625, 40.625, 45.3125]
Validation error: [41.46, 41.42, 41.48, 41.42, 41.44]
Step 500 (epoch 0.7

Step 3600 (epoch 5.12), 66.0 ms
Minibatch loss: 1.402, learning rate: 0.000774
Minibatch error: [10.9375, 9.375, 9.375, 7.8125, 7.8125]
Validation error: [26.739999999999995, 26.900000000000006, 27.060000000000002, 27.060000000000002, 27.120000000000005]
Step 3700 (epoch 5.26), 66.2 ms
Minibatch loss: 0.481, learning rate: 0.000774
Minibatch error: [6.25, 1.5625, 1.5625, 0.0, 1.5625]
Validation error: [27.099999999999994, 26.939999999999998, 27.120000000000005, 27.200000000000003, 27.22]
Step 3800 (epoch 5.40), 66.2 ms
Minibatch loss: 0.610, learning rate: 0.000774
Minibatch error: [6.25, 6.25, 6.25, 1.5625, 4.6875]
Validation error: [26.959999999999994, 27.060000000000002, 27.040000000000006, 27.099999999999994, 27.040000000000006]
Step 3900 (epoch 5.55), 66.1 ms
Minibatch loss: 0.232, learning rate: 0.000774
Minibatch error: [1.5625, 0.0, 0.0, 0.0, 0.0]
Validation error: [27.08, 27.319999999999993, 27.379999999999995, 27.36, 27.42]
Step 4000 (epoch 5.69), 66.3 ms
Minibatch loss: 0.74

## Separate 2-layer networks for F and G

In [12]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections_for_gates = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections_for_gates, units=num_units_after_conv, activation=tf.nn.sigmoid)
        projections_for_bias = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        bias = tf.layers.dense(inputs=projections_for_bias, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [13]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 3.9 ms
Minibatch loss: 10.651, learning rate: 0.001000
Minibatch error: [70.3125, 68.75, 76.5625, 70.3125, 73.4375]
Validation error: [86.66, 86.74, 86.74, 86.74, 86.74]
Step 100 (epoch 0.14), 67.1 ms
Minibatch loss: 7.537, learning rate: 0.001000
Minibatch error: [57.8125, 56.25, 56.25, 56.25, 60.9375]
Validation error: [53.88, 53.9, 53.92, 54.04, 54.02]
Step 200 (epoch 0.28), 67.1 ms
Minibatch loss: 6.464, learning rate: 0.001000
Minibatch error: [42.1875, 46.875, 43.75, 50.0, 48.4375]
Validation error: [49.96, 50.48, 50.74, 50.76, 50.78]
Step 300 (epoch 0.43), 67.1 ms
Minibatch loss: 6.284, learning rate: 0.001000
Minibatch error: [42.1875, 48.4375, 48.4375, 46.875, 48.4375]
Validation error: [44.76, 44.5, 44.56, 44.54, 44.54]
Step 400 (epoch 0.57), 67.0 ms
Minibatch loss: 6.273, learning rate: 0.001000
Minibatch error: [40.625, 39.0625, 42.1875, 43.75, 45.3125]
Validation error: [42.74, 42.82, 42.62, 42.66, 42.66]
Step 500 (epoch 0.71), 67.1 ms
Min

Validation error: [27.72, 27.980000000000004, 27.879999999999995, 27.959999999999994, 28.040000000000006]
Step 3700 (epoch 5.26), 66.8 ms
Minibatch loss: 0.204, learning rate: 0.000774
Minibatch error: [3.125, 1.5625, 0.0, 0.0, 1.5625]
Validation error: [27.86, 27.739999999999995, 27.620000000000005, 27.739999999999995, 27.72]
Step 3800 (epoch 5.40), 66.9 ms
Minibatch loss: 0.475, learning rate: 0.000774
Minibatch error: [7.8125, 1.5625, 1.5625, 3.125, 0.0]
Validation error: [27.939999999999998, 27.739999999999995, 27.599999999999994, 27.599999999999994, 27.659999999999997]
Step 3900 (epoch 5.55), 66.9 ms
Minibatch loss: 0.503, learning rate: 0.000774
Minibatch error: [6.25, 3.125, 3.125, 3.125, 1.5625]
Validation error: [25.64, 25.58, 25.519999999999996, 25.480000000000004, 25.519999999999996]
Step 4000 (epoch 5.69), 67.0 ms
Minibatch loss: 0.687, learning rate: 0.000774
Minibatch error: [9.375, 3.125, 3.125, 4.6875, 4.6875]
Validation error: [27.459999999999994, 27.36, 27.43999999999

In [14]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    def get_gates_and_bias(layer_sizes):
        gates = prior
        bias = prior
        for size in layer_sizes[:-1]:
            gates = tf.layers.dense(inputs=gates, units=size, activation=tf.nn.relu)
            bias = tf.layers.dense(inputs=bias, units=size, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=gates, units=layer_sizes[-1], activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=bias, units=layer_sizes[-1], activation=None)
        return gates, bias
        
    def apply_gating_on_conv(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_channels = tensor.get_shape()[-1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_channels, ))
        gates = tf.expand_dims(tf.expand_dims(gates, axis=1), axis=1)
        bias = tf.expand_dims(tf.expand_dims(bias, axis=1), axis=1)
        return tf.multiply(tensor, gates) + bias
    
    def apply_gating_on_dense(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_units = tensor.get_shape()[1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_units, ))
        return tf.multiply(tensor, gates) + bias
        
    
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv1 = apply_gating_on_conv(conv1, (100,))
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv2 = apply_gating_on_conv(conv2, (100,))
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    pool2_flat = tf.reshape(pool2, [pool2.get_shape()[0], -1])
    pool2_flat = apply_gating_on_dense(pool2_flat, (100,))
    
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dense = apply_gating_on_dense(dense, (100,))
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [15]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 7.3 ms
Minibatch loss: 11.211, learning rate: 0.001000
Minibatch error: [90.625, 82.8125, 85.9375, 82.8125, 82.8125]
Validation error: [90.28, 90.28, 90.28, 90.28, 90.28]
Step 100 (epoch 0.14), 103.8 ms
Minibatch loss: 9.110, learning rate: 0.001000
Minibatch error: [64.0625, 67.1875, 68.75, 68.75, 65.625]
Validation error: [60.54, 60.16, 60.58, 60.44, 60.62]
Step 200 (epoch 0.28), 102.9 ms
Minibatch loss: 6.902, learning rate: 0.001000
Minibatch error: [50.0, 50.0, 51.5625, 50.0, 45.3125]
Validation error: [53.02, 52.9, 52.9, 53.04, 53.04]
Step 300 (epoch 0.43), 102.8 ms
Minibatch loss: 7.094, learning rate: 0.001000
Minibatch error: [57.8125, 54.6875, 54.6875, 56.25, 56.25]
Validation error: [48.82, 50.5, 49.26, 49.82, 49.54]
Step 400 (epoch 0.57), 102.8 ms
Minibatch loss: 6.593, learning rate: 0.001000
Minibatch error: [53.125, 48.4375, 48.4375, 50.0, 43.75]
Validation error: [46.0, 45.94, 45.88, 45.94, 45.96]
Step 500 (epoch 0.71), 102.7 ms
Minibat

Validation error: [26.78, 26.72, 26.680000000000007, 26.86, 26.939999999999998]
Step 3700 (epoch 5.26), 102.8 ms
Minibatch loss: 0.520, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [27.64, 27.900000000000006, 27.86, 27.939999999999998, 27.939999999999998]
Step 3800 (epoch 5.40), 102.8 ms
Minibatch loss: 0.762, learning rate: 0.000774
Minibatch error: [9.375, 1.5625, 3.125, 1.5625, 3.125]
Validation error: [27.22, 27.680000000000007, 27.72, 27.72, 27.879999999999995]
Step 3900 (epoch 5.55), 102.7 ms
Minibatch loss: 0.700, learning rate: 0.000774
Minibatch error: [4.6875, 7.8125, 3.125, 3.125, 1.5625]
Validation error: [26.680000000000007, 26.760000000000005, 26.72, 26.78, 26.900000000000006]
Step 4000 (epoch 5.69), 102.8 ms
Minibatch loss: 1.289, learning rate: 0.000774
Minibatch error: [10.9375, 9.375, 7.8125, 7.8125, 7.8125]
Validation error: [27.819999999999993, 27.939999999999998, 27.980000000000004, 28.0, 27.959999999999994]
St