In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import tensorflow as tf

import utils

  return f(*args, **kwds)


In [4]:
#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
#WORK_DIRECTORY = 'data'
#IMAGE_SIZE = 28
#NUM_CHANNELS = 1
#PIXEL_DEPTH = 255
#NUM_LABELS = 10
#VALIDATION_SIZE = 5000  # Size of the validation set.
#SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100  # Number of steps between evaluations.
DATASET = 'mnist'

NUM_UNROLL_STEPS = 5

In [5]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        
        gated = tf.multiply(pool2_flat, gates)
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [8]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 2.8 ms
Minibatch loss: 10.917, learning rate: 0.001000
Minibatch error: [79.6875, 84.375, 81.25, 84.375, 82.8125]
Validation error: [90.4, 90.4, 90.4, 90.4, 90.4]
Step 100 (epoch 0.12), 48.8 ms
Minibatch loss: 0.255, learning rate: 0.001000
Minibatch error: [1.5625, 0.0, 0.0, 1.5625, 0.0]
Validation error: [4.659999999999997, 4.719999999999999, 4.700000000000003, 4.700000000000003, 4.700000000000003]
Step 200 (epoch 0.23), 48.8 ms
Minibatch loss: 0.653, learning rate: 0.001000
Minibatch error: [4.6875, 7.8125, 4.6875, 6.25, 4.6875]
Validation error: [2.760000000000005, 2.739999999999995, 2.739999999999995, 2.739999999999995, 2.739999999999995]
Step 300 (epoch 0.35), 48.8 ms
Minibatch loss: 0.530, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [2.519999999999996, 2.5, 2.5, 2.5, 2.5]
Step 400 (epoch 0.47), 48.8 ms
Minibatch loss: 0.670, learning rate: 0.001000
Minibatch error: [4.6875, 6.25, 3.125, 4.6875, 

Validation error: [0.9599999999999937, 0.9200000000000017, 0.9200000000000017, 0.9200000000000017, 0.9200000000000017]
Step 3500 (epoch 4.07), 48.9 ms
Minibatch loss: 0.055, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 1.5625, 0.0]
Validation error: [1.1599999999999966, 1.1400000000000006, 1.1400000000000006, 1.1400000000000006, 1.1400000000000006]
Step 3600 (epoch 4.19), 48.8 ms
Minibatch loss: 0.001, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0600000000000023, 1.0, 1.0400000000000063, 1.0400000000000063, 1.0400000000000063]
Step 3700 (epoch 4.31), 48.7 ms
Minibatch loss: 0.004, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.019999999999996, 1.019999999999996, 1.0, 1.0, 1.0]
Step 3800 (epoch 4.42), 48.8 ms
Minibatch loss: 0.009, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.1200000000000045, 1.1200000000000045, 1.1400000000000006, 1.14000000000000

Validation error: [0.9399999999999977, 0.9200000000000017, 0.9000000000000057, 0.9000000000000057, 0.9000000000000057]
Step 7000 (epoch 8.15), 48.7 ms
Minibatch loss: 0.003, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0799999999999983, 1.0999999999999943, 1.0799999999999983, 1.0799999999999983, 1.0799999999999983]
Step 7100 (epoch 8.26), 48.8 ms
Minibatch loss: 0.001, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.9000000000000057, 0.9599999999999937, 0.9599999999999937, 0.9599999999999937, 0.9599999999999937]
Step 7200 (epoch 8.38), 48.7 ms
Minibatch loss: 0.000, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.8400000000000034, 0.8599999999999994, 0.8599999999999994, 0.8599999999999994, 0.8599999999999994]
Step 7300 (epoch 8.49), 48.6 ms
Minibatch loss: 0.000, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.799999999999997

In [9]:
print("".join(stdout_lines))

Step 0 (epoch 0.00), 2.8 ms
Minibatch loss: 10.917, learning rate: 0.001000
Minibatch error: [79.6875, 84.375, 81.25, 84.375, 82.8125]
Validation error: [90.4, 90.4, 90.4, 90.4, 90.4]
Step 100 (epoch 0.12), 48.8 ms
Minibatch loss: 0.255, learning rate: 0.001000
Minibatch error: [1.5625, 0.0, 0.0, 1.5625, 0.0]
Validation error: [4.659999999999997, 4.719999999999999, 4.700000000000003, 4.700000000000003, 4.700000000000003]
Step 200 (epoch 0.23), 48.8 ms
Minibatch loss: 0.653, learning rate: 0.001000
Minibatch error: [4.6875, 7.8125, 4.6875, 6.25, 4.6875]
Validation error: [2.760000000000005, 2.739999999999995, 2.739999999999995, 2.739999999999995, 2.739999999999995]
Step 300 (epoch 0.35), 48.8 ms
Minibatch loss: 0.530, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [2.519999999999996, 2.5, 2.5, 2.5, 2.5]
Step 400 (epoch 0.47), 48.8 ms
Minibatch loss: 0.670, learning rate: 0.001000
Minibatch error: [4.6875, 6.25, 3.125, 4.6875, 4.6875]
Valid

### Introduced G

In [10]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [11]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 3.2 ms
Minibatch loss: 10.825, learning rate: 0.001000
Minibatch error: [81.25, 84.375, 73.4375, 79.6875, 84.375]
Validation error: [90.72, 90.72, 90.72, 90.72, 90.72]
Step 100 (epoch 0.12), 52.5 ms
Minibatch loss: 0.262, learning rate: 0.001000
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 0.0]
Validation error: [4.959999999999994, 4.799999999999997, 4.900000000000006, 5.019999999999996, 5.019999999999996]
Step 200 (epoch 0.23), 52.1 ms
Minibatch loss: 0.744, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 1.5625]
Validation error: [2.8799999999999955, 2.8799999999999955, 2.8400000000000034, 2.8599999999999994, 2.8599999999999994]
Step 300 (epoch 0.35), 52.1 ms
Minibatch loss: 0.614, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [2.6400000000000006, 2.739999999999995, 2.680000000000007, 2.680000000000007, 2.700000000000003]
Step 400 (epoch 0.47), 52.3 ms
Minibatch loss: 0.442, 

Validation error: [0.9200000000000017, 0.9399999999999977, 0.9599999999999937, 0.9599999999999937, 0.9599999999999937]
Step 3500 (epoch 4.07), 52.2 ms
Minibatch loss: 0.010, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.9599999999999937, 0.980000000000004, 0.980000000000004, 0.980000000000004, 0.980000000000004]
Step 3600 (epoch 4.19), 52.2 ms
Minibatch loss: 0.000, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.019999999999996, 1.019999999999996, 1.019999999999996, 1.019999999999996, 1.019999999999996]
Step 3700 (epoch 4.31), 52.2 ms
Minibatch loss: 0.027, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.1599999999999966, 1.1200000000000045, 1.1200000000000045, 1.1200000000000045, 1.1200000000000045]
Step 3800 (epoch 4.42), 52.3 ms
Minibatch loss: 0.011, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.9399999999999977, 0.9399

Validation error: [0.8799999999999955, 0.9000000000000057, 0.9000000000000057, 0.9000000000000057, 0.9000000000000057]
Step 7000 (epoch 8.15), 52.4 ms
Minibatch loss: 0.065, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 1.5625, 0.0, 0.0]
Validation error: [1.1200000000000045, 1.1400000000000006, 1.1200000000000045, 1.1200000000000045, 1.1200000000000045]
Step 7100 (epoch 8.26), 52.4 ms
Minibatch loss: 0.005, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028]
Step 7200 (epoch 8.38), 52.2 ms
Minibatch loss: 0.009, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.6599999999999966, 0.6400000000000006, 0.6599999999999966, 0.6599999999999966, 0.6599999999999966]
Step 7300 (epoch 8.49), 52.3 ms
Minibatch loss: 0.005, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.939999999999

## Separate 2-layer networks for F and G

In [12]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections_for_gates = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections_for_gates, units=num_units_after_conv, activation=tf.nn.sigmoid)
        projections_for_bias = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        bias = tf.layers.dense(inputs=projections_for_bias, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [13]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 3.9 ms
Minibatch loss: 10.927, learning rate: 0.001000
Minibatch error: [84.375, 84.375, 82.8125, 84.375, 82.8125]
Validation error: [90.76, 90.76, 90.76, 90.76, 90.76]
Step 100 (epoch 0.12), 53.5 ms
Minibatch loss: 0.433, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [5.459999999999994, 5.459999999999994, 5.540000000000006, 5.579999999999998, 5.560000000000002]
Step 200 (epoch 0.23), 53.2 ms
Minibatch loss: 0.558, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 4.6875, 3.125, 3.125]
Validation error: [3.5600000000000023, 3.480000000000004, 3.4000000000000057, 3.4000000000000057, 3.3799999999999955]
Step 300 (epoch 0.35), 53.4 ms
Minibatch loss: 0.473, learning rate: 0.001000
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [2.6599999999999966, 2.680000000000007, 2.6599999999999966, 2.680000000000007, 2.680000000000007]
Step 400 (epoch 0.47), 53.4 ms
Minibatch loss: 0.554, l

Validation error: [1.3199999999999932, 1.3199999999999932, 1.2999999999999972, 1.2999999999999972, 1.2999999999999972]
Step 3500 (epoch 4.07), 53.5 ms
Minibatch loss: 0.026, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.1400000000000006, 1.1400000000000006, 1.1200000000000045, 1.1200000000000045, 1.1200000000000045]
Step 3600 (epoch 4.19), 53.5 ms
Minibatch loss: 0.001, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.8400000000000034, 0.7600000000000051, 0.7800000000000011, 0.7800000000000011, 0.7800000000000011]
Step 3700 (epoch 4.31), 53.4 ms
Minibatch loss: 0.002, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.980000000000004, 0.980000000000004, 0.9599999999999937, 0.9599999999999937, 0.9599999999999937]
Step 3800 (epoch 4.42), 53.3 ms
Minibatch loss: 0.004, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.1200000000000045,

Step 6900 (epoch 8.03), 53.4 ms
Minibatch loss: 0.015, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0799999999999983, 1.0799999999999983, 1.0600000000000023, 1.0600000000000023, 1.0600000000000023]
Step 7000 (epoch 8.15), 53.3 ms
Minibatch loss: 0.002, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.9200000000000017, 0.9200000000000017, 0.9000000000000057, 0.9000000000000057, 0.9000000000000057]
Step 7100 (epoch 8.26), 53.4 ms
Minibatch loss: 0.000, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.7000000000000028, 0.6800000000000068, 0.6400000000000006, 0.6599999999999966, 0.6400000000000006]
Step 7200 (epoch 8.38), 53.4 ms
Minibatch loss: 0.004, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.7000000000000028, 0.7199999999999989, 0.7199999999999989, 0.7199999999999989, 0.7199999999999989]
Step 7300 (epoch 8.49), 53.4 ms
Mini

In [26]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    def get_gates_and_bias(layer_sizes):
        gates = prior
        bias = prior
        for size in layer_sizes[:-1]:
            gates = tf.layers.dense(inputs=gates, units=size, activation=tf.nn.relu)
            bias = tf.layers.dense(inputs=bias, units=size, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=gates, units=layer_sizes[-1], activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=bias, units=layer_sizes[-1], activation=None)
        return gates, bias
        
    def apply_gating_on_conv(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_channels = tensor.get_shape()[-1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_channels, ))
        gates = tf.expand_dims(tf.expand_dims(gates, axis=1), axis=1)
        bias = tf.expand_dims(tf.expand_dims(bias, axis=1), axis=1)
        return tf.multiply(tensor, gates) + bias
    
    def apply_gating_on_dense(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_units = tensor.get_shape()[1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_units, ))
        return tf.multiply(tensor, gates) + bias
        
    
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv1 = apply_gating_on_conv(conv1, (100,))
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv2 = apply_gating_on_conv(conv2, (100,))
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    pool2_flat = tf.reshape(pool2, [pool2.get_shape()[0], -1])
    pool2_flat = apply_gating_on_dense(pool2_flat, (100,))
    
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dense = apply_gating_on_dense(dense, (100,))
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=training)

    logits = tf.layers.dense(inputs=dropout, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [27]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

stdout_lines = utils.run_train(apply, train_config, dataset,
                               build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 6.4 ms
Minibatch loss: 11.330, learning rate: 0.001000
Minibatch error: [85.9375, 89.0625, 84.375, 84.375, 82.8125]
Validation error: [90.76, 90.76, 90.76, 90.76, 90.76]
Step 100 (epoch 0.12), 86.5 ms
Minibatch loss: 0.638, learning rate: 0.001000
Minibatch error: [3.125, 4.6875, 3.125, 4.6875, 3.125]
Validation error: [9.260000000000005, 9.019999999999996, 9.0, 9.019999999999996, 9.060000000000002]
Step 200 (epoch 0.23), 86.0 ms
Minibatch loss: 0.818, learning rate: 0.001000
Minibatch error: [7.8125, 6.25, 7.8125, 4.6875, 6.25]
Validation error: [3.739999999999995, 3.8599999999999994, 3.9200000000000017, 3.9200000000000017, 3.9200000000000017]
Step 300 (epoch 0.35), 86.1 ms
Minibatch loss: 0.637, learning rate: 0.001000
Minibatch error: [7.8125, 3.125, 4.6875, 3.125, 3.125]
Validation error: [3.1599999999999966, 3.4000000000000057, 3.3599999999999994, 3.480000000000004, 3.4599999999999937]
Step 400 (epoch 0.47), 86.1 ms
Minibatch loss: 1.108, learning

Validation error: [1.0999999999999943, 1.0799999999999983, 1.0999999999999943, 1.0799999999999983, 1.0799999999999983]
Step 3500 (epoch 4.07), 86.3 ms
Minibatch loss: 0.046, learning rate: 0.000815
Minibatch error: [1.5625, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.2999999999999972, 1.3799999999999955, 1.3799999999999955, 1.3599999999999994, 1.3599999999999994]
Step 3600 (epoch 4.19), 86.3 ms
Minibatch loss: 0.002, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0400000000000063, 1.0600000000000023, 1.0400000000000063, 1.0400000000000063, 1.0400000000000063]
Step 3700 (epoch 4.31), 86.3 ms
Minibatch loss: 0.004, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0, 1.019999999999996, 1.0, 1.0, 1.0]
Step 3800 (epoch 4.42), 86.2 ms
Minibatch loss: 0.018, learning rate: 0.000815
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0799999999999983, 1.1800000000000068, 1.1400000000000006, 1.1400000000000

Validation error: [1.0400000000000063, 1.0, 1.0, 1.0, 1.0]
Step 7000 (epoch 8.15), 86.3 ms
Minibatch loss: 0.013, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0, 0.9399999999999977, 0.9000000000000057, 0.9000000000000057, 0.9000000000000057]
Step 7100 (epoch 8.26), 86.3 ms
Minibatch loss: 0.014, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [1.0, 0.9399999999999977, 0.9200000000000017, 0.9200000000000017, 0.9200000000000017]
Step 7200 (epoch 8.38), 86.2 ms
Minibatch loss: 0.001, learning rate: 0.000663
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [0.7399999999999949, 0.7399999999999949, 0.7199999999999989, 0.7199999999999989, 0.7199999999999989]
Step 7300 (epoch 8.49), 86.2 ms
Minibatch loss: 0.050, learning rate: 0.000663
Minibatch error: [0.0, 1.5625, 0.0, 0.0, 0.0]
Validation error: [1.0400000000000063, 1.019999999999996, 1.0, 1.0, 1.0]
Step 7400 (epoch 8.61), 86.3 ms
Minibatch loss: 0.