In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import tensorflow as tf

import utils

  return f(*args, **kwds)


In [4]:
#SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
#WORK_DIRECTORY = 'data'
#IMAGE_SIZE = 28
#NUM_CHANNELS = 1
#PIXEL_DEPTH = 255
#NUM_LABELS = 10
#VALIDATION_SIZE = 5000  # Size of the validation set.
#SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100  # Number of steps between evaluations.
DATASET = 'cifar10'

NUM_UNROLL_STEPS = 5

### Like 1_stage/confnn_gated_cifar10.ipynb, but remove dropout

In [5]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        
        gated = tf.multiply(pool2_flat, gates)
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)

    logits = tf.layers.dense(inputs=dense, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

## Use priors = False

In [6]:
use_priors = False

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

history, stdout_lines = utils.run_train(apply, train_config, dataset,
                                        build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 34.4 ms
Minibatch loss: 10.233, learning rate: 0.001000
Minibatch error: [84.375, 84.375, 84.375, 84.375, 84.375]
Validation error: [90.26, 90.26, 90.26, 90.26, 90.26]
Step 100 (epoch 0.14), 47.0 ms
Minibatch loss: 8.224, learning rate: 0.001000
Minibatch error: [59.375, 59.375, 59.375, 59.375, 59.375]
Validation error: [56.28, 56.28, 56.28, 56.28, 56.28]
Step 200 (epoch 0.28), 46.8 ms
Minibatch loss: 6.515, learning rate: 0.001000
Minibatch error: [40.625, 40.625, 40.625, 40.625, 40.625]
Validation error: [50.3, 50.3, 50.3, 50.3, 50.3]
Step 300 (epoch 0.43), 46.9 ms
Minibatch loss: 6.113, learning rate: 0.001000
Minibatch error: [48.4375, 48.4375, 48.4375, 48.4375, 48.4375]
Validation error: [44.68, 44.68, 44.68, 44.68, 44.68]
Step 400 (epoch 0.57), 46.8 ms
Minibatch loss: 6.510, learning rate: 0.001000
Minibatch error: [43.75, 43.75, 43.75, 43.75, 43.75]
Validation error: [43.14, 43.14, 43.14, 43.14, 43.14]
Step 500 (epoch 0.71), 46.8 ms
Minibatch lo

Step 3500 (epoch 4.98), 46.9 ms
Minibatch loss: 0.816, learning rate: 0.000815
Minibatch error: [6.25, 6.25, 6.25, 6.25, 6.25]
Validation error: [29.659999999999997, 29.659999999999997, 29.659999999999997, 29.659999999999997, 29.659999999999997]
Step 3600 (epoch 5.12), 46.9 ms
Minibatch loss: 1.233, learning rate: 0.000774
Minibatch error: [6.25, 6.25, 6.25, 6.25, 6.25]
Validation error: [29.480000000000004, 29.480000000000004, 29.480000000000004, 29.480000000000004, 29.480000000000004]
Step 3700 (epoch 5.26), 46.9 ms
Minibatch loss: 0.293, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [29.14, 29.14, 29.14, 29.14, 29.14]
Step 3800 (epoch 5.40), 46.9 ms
Minibatch loss: 0.659, learning rate: 0.000774
Minibatch error: [6.25, 6.25, 6.25, 6.25, 6.25]
Validation error: [28.28, 28.28, 28.28, 28.28, 28.28]
Step 3900 (epoch 5.55), 46.8 ms
Minibatch loss: 0.228, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation err

## Use priors = True

In [7]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

history, stdout_lines = utils.run_train(apply, train_config, dataset,
                                        build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 10.0 ms
Minibatch loss: 10.501, learning rate: 0.001000
Minibatch error: [67.1875, 67.1875, 67.1875, 67.1875, 67.1875]
Validation error: [86.03999999999999, 86.03999999999999, 86.03999999999999, 86.03999999999999, 86.03999999999999]
Step 100 (epoch 0.14), 59.6 ms
Minibatch loss: 8.475, learning rate: 0.001000
Minibatch error: [60.9375, 60.9375, 60.9375, 60.9375, 60.9375]
Validation error: [57.76, 57.68, 57.68, 57.68, 57.68]
Step 200 (epoch 0.28), 59.5 ms
Minibatch loss: 6.548, learning rate: 0.001000
Minibatch error: [51.5625, 53.125, 53.125, 53.125, 53.125]
Validation error: [52.4, 52.36, 52.36, 52.36, 52.36]
Step 300 (epoch 0.43), 59.6 ms
Minibatch loss: 6.304, learning rate: 0.001000
Minibatch error: [48.4375, 46.875, 46.875, 46.875, 46.875]
Validation error: [44.0, 44.12, 44.1, 44.1, 44.1]
Step 400 (epoch 0.57), 59.5 ms
Minibatch loss: 6.747, learning rate: 0.001000
Minibatch error: [45.3125, 45.3125, 45.3125, 45.3125, 45.3125]
Validation error: [4

Step 3700 (epoch 5.26), 59.6 ms
Minibatch loss: 0.279, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [28.36, 28.040000000000006, 28.22, 28.180000000000007, 28.22]
Step 3800 (epoch 5.40), 59.5 ms
Minibatch loss: 0.616, learning rate: 0.000774
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [28.36, 28.319999999999993, 28.299999999999997, 28.260000000000005, 28.299999999999997]
Step 3900 (epoch 5.55), 59.6 ms
Minibatch loss: 0.627, learning rate: 0.000774
Minibatch error: [4.6875, 4.6875, 4.6875, 4.6875, 4.6875]
Validation error: [27.480000000000004, 27.36, 27.36, 27.319999999999993, 27.36]
Step 4000 (epoch 5.69), 59.6 ms
Minibatch loss: 0.691, learning rate: 0.000774
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [29.400000000000006, 29.5, 29.379999999999995, 29.379999999999995, 29.379999999999995]
Step 4100 (epoch 5.83), 59.5 ms
Minibatch loss: 0.491, learning rate: 0.000774
Minibatch er

### Introduced G

In [8]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=projections, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)

    logits = tf.layers.dense(inputs=dense, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [9]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

history, stdout_lines = utils.run_train(apply, train_config, dataset,
                                        build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 11.5 ms
Minibatch loss: 10.593, learning rate: 0.001000
Minibatch error: [65.625, 65.625, 65.625, 65.625, 65.625]
Validation error: [84.24, 84.38, 84.38, 84.38, 84.38]
Step 100 (epoch 0.14), 64.8 ms
Minibatch loss: 7.937, learning rate: 0.001000
Minibatch error: [53.125, 56.25, 56.25, 56.25, 56.25]
Validation error: [54.16, 54.86, 54.84, 55.1, 55.1]
Step 200 (epoch 0.28), 64.7 ms
Minibatch loss: 6.620, learning rate: 0.001000
Minibatch error: [48.4375, 50.0, 50.0, 50.0, 50.0]
Validation error: [48.28, 49.42, 49.7, 49.66, 49.66]
Step 300 (epoch 0.43), 64.8 ms
Minibatch loss: 6.422, learning rate: 0.001000
Minibatch error: [48.4375, 45.3125, 45.3125, 45.3125, 45.3125]
Validation error: [46.1, 45.96, 45.78, 45.82, 45.82]
Step 400 (epoch 0.57), 64.8 ms
Minibatch loss: 5.773, learning rate: 0.001000
Minibatch error: [37.5, 42.1875, 42.1875, 42.1875, 42.1875]
Validation error: [41.74, 41.62, 41.48, 41.5, 41.5]
Step 500 (epoch 0.71), 64.9 ms
Minibatch loss: 5

Step 3700 (epoch 5.26), 64.8 ms
Minibatch loss: 0.090, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [27.120000000000005, 27.200000000000003, 27.239999999999995, 27.28, 27.260000000000005]
Step 3800 (epoch 5.40), 64.8 ms
Minibatch loss: 0.196, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [29.159999999999997, 29.180000000000007, 29.159999999999997, 29.14, 29.14]
Step 3900 (epoch 5.55), 64.8 ms
Minibatch loss: 0.338, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [27.060000000000002, 27.14, 27.28, 27.28, 27.260000000000005]
Step 4000 (epoch 5.69), 64.6 ms
Minibatch loss: 0.428, learning rate: 0.000774
Minibatch error: [3.125, 3.125, 3.125, 3.125, 3.125]
Validation error: [27.939999999999998, 28.0, 27.980000000000004, 27.980000000000004, 27.939999999999998]
Step 4100 (epoch 5.83), 64.7 ms
Minibatch loss: 0.367, learning rate: 0.000774
Minibatch error: [1.5625

## Separate 2-layer networks for F and G

In [10]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_shape = pool2.get_shape()
    num_units_after_conv = pool2_shape[1] * pool2_shape[2] * pool2_shape[3]

    pool2_flat = tf.reshape(pool2, [-1, num_units_after_conv])
    
    if use_priors:
        projections_for_gates = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=projections_for_gates, units=num_units_after_conv, activation=tf.nn.sigmoid)
        projections_for_bias = tf.layers.dense(inputs=prior, units=100, activation=tf.nn.relu)
        bias = tf.layers.dense(inputs=projections_for_bias, units=num_units_after_conv, activation=None)
        
        gated = tf.multiply(pool2_flat, gates) + bias
    else:
        gated = pool2_flat
    
    
    dense = tf.layers.dense(inputs=gated, units=1024, activation=tf.nn.relu)

    logits = tf.layers.dense(inputs=dense, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [11]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

history, stdout_lines = utils.run_train(apply, train_config, dataset,
                                        build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 10.9 ms
Minibatch loss: 10.441, learning rate: 0.001000
Minibatch error: [76.5625, 76.5625, 76.5625, 76.5625, 76.5625]
Validation error: [88.76, 88.78, 88.78, 88.78, 88.78]
Step 100 (epoch 0.14), 65.9 ms
Minibatch loss: 8.046, learning rate: 0.001000
Minibatch error: [59.375, 59.375, 59.375, 59.375, 59.375]
Validation error: [54.96, 56.16, 56.18, 56.38, 56.34]
Step 200 (epoch 0.28), 65.8 ms
Minibatch loss: 6.769, learning rate: 0.001000
Minibatch error: [48.4375, 53.125, 53.125, 53.125, 53.125]
Validation error: [49.58, 50.52, 50.76, 50.76, 50.76]
Step 300 (epoch 0.43), 65.7 ms
Minibatch loss: 6.135, learning rate: 0.001000
Minibatch error: [48.4375, 45.3125, 45.3125, 45.3125, 45.3125]
Validation error: [44.98, 44.68, 44.38, 44.34, 44.32]
Step 400 (epoch 0.57), 65.8 ms
Minibatch loss: 6.185, learning rate: 0.001000
Minibatch error: [43.75, 42.1875, 40.625, 40.625, 40.625]
Validation error: [43.34, 44.42, 44.14, 44.16, 44.16]
Step 500 (epoch 0.71), 65.7

Step 3700 (epoch 5.26), 65.8 ms
Minibatch loss: 0.139, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [28.879999999999995, 28.64, 28.659999999999997, 28.659999999999997, 28.680000000000007]
Step 3800 (epoch 5.40), 65.8 ms
Minibatch loss: 0.293, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [29.959999999999994, 29.900000000000006, 30.040000000000006, 30.060000000000002, 30.040000000000006]
Step 3900 (epoch 5.55), 65.9 ms
Minibatch loss: 0.067, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [28.340000000000003, 28.379999999999995, 28.340000000000003, 28.340000000000003, 28.299999999999997]
Step 4000 (epoch 5.69), 65.8 ms
Minibatch loss: 0.191, learning rate: 0.000774
Minibatch error: [0.0, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [29.040000000000006, 29.060000000000002, 29.0, 29.040000000000006, 29.040000000000006]
Step 4100 (epoch 5.83), 66.0 ms
Mini

In [12]:
def model_step(input_images, prior, batch_size, training, num_labels, use_priors):
    """The Model definition."""
    def get_gates_and_bias(layer_sizes):
        gates = prior
        bias = prior
        for size in layer_sizes[:-1]:
            gates = tf.layers.dense(inputs=gates, units=size, activation=tf.nn.relu)
            bias = tf.layers.dense(inputs=bias, units=size, activation=tf.nn.relu)
        gates = tf.layers.dense(inputs=gates, units=layer_sizes[-1], activation=tf.nn.sigmoid)
        bias = tf.layers.dense(inputs=bias, units=layer_sizes[-1], activation=None)
        return gates, bias
        
    def apply_gating_on_conv(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_channels = tensor.get_shape()[-1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_channels, ))
        gates = tf.expand_dims(tf.expand_dims(gates, axis=1), axis=1)
        bias = tf.expand_dims(tf.expand_dims(bias, axis=1), axis=1)
        return tf.multiply(tensor, gates) + bias
    
    def apply_gating_on_dense(tensor, layer_sizes):
        if not use_priors:
            return tensor
        num_units = tensor.get_shape()[1]
        gates, bias = get_gates_and_bias(layer_sizes + (num_units, ))
        return tf.multiply(tensor, gates) + bias
        
    
    inputs = input_images
    
    conv1 = tf.layers.conv2d(
        inputs=inputs,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv1 = apply_gating_on_conv(conv1, (100,))
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    conv2 = apply_gating_on_conv(conv2, (100,))
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    pool2_flat = tf.reshape(pool2, [pool2.get_shape()[0], -1])
    pool2_flat = apply_gating_on_dense(pool2_flat, (100,))
    
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dense = apply_gating_on_dense(dense, (100,))

    logits = tf.layers.dense(inputs=dense, units=num_labels)
    posteriors = tf.nn.softmax(logits)
    
    return logits, posteriors

def apply(input_images, training, train_labels_node, num_labels, use_priors):
    results = []
    loss = 0.0

    batch_size = input_images.get_shape()[0]
    priors = tf.ones((batch_size, num_labels)) / num_labels
    for step in range(NUM_UNROLL_STEPS):
        with tf.variable_scope('one_step', reuse=(step > 0)):
            logits, posteriors = model_step(input_images, priors, batch_size,
                                            training=training, num_labels=num_labels,
                                            use_priors=use_priors)
        priors = posteriors
        results.append((logits, posteriors))
        loss += tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
    return tf.stack([logits for (logits, _) in results]), loss

In [13]:
use_priors = True

tf.reset_default_graph()

dataset = utils.get_dataset(DATASET)

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    1e-3,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    dataset.train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate)

train_config = dict(
    optimizer=optimizer,
    batch_var=batch,
    learning_rate_var=learning_rate,
    train_batch_size=BATCH_SIZE,
    eval_batch_size=EVAL_BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    eval_frequency=EVAL_FREQUENCY,
)

history, stdout_lines = utils.run_train(apply, train_config, dataset,
                                        build_func_kwargs=dict(use_priors=use_priors))

Initialized!
Step 0 (epoch 0.00), 24.2 ms
Minibatch loss: 11.202, learning rate: 0.001000
Minibatch error: [84.375, 84.375, 84.375, 84.375, 84.375]
Validation error: [90.28, 90.28, 90.28, 90.28, 90.28]
Step 100 (epoch 0.14), 101.4 ms
Minibatch loss: 8.938, learning rate: 0.001000
Minibatch error: [64.0625, 62.5, 62.5, 62.5, 62.5]
Validation error: [59.42, 60.0, 59.94, 60.2, 60.12]
Step 200 (epoch 0.28), 101.4 ms
Minibatch loss: 6.847, learning rate: 0.001000
Minibatch error: [45.3125, 48.4375, 45.3125, 46.875, 46.875]
Validation error: [51.7, 52.84, 52.0, 52.16, 52.12]
Step 300 (epoch 0.43), 101.2 ms
Minibatch loss: 6.590, learning rate: 0.001000
Minibatch error: [50.0, 53.125, 51.5625, 53.125, 51.5625]
Validation error: [48.08, 48.9, 48.4, 48.66, 48.5]
Step 400 (epoch 0.57), 101.4 ms
Minibatch loss: 6.225, learning rate: 0.001000
Minibatch error: [45.3125, 46.875, 43.75, 43.75, 43.75]
Validation error: [46.72, 46.56, 45.96, 46.12, 45.96]
Step 500 (epoch 0.71), 101.3 ms
Minibatch loss:

Step 3800 (epoch 5.40), 101.1 ms
Minibatch loss: 0.342, learning rate: 0.000774
Minibatch error: [1.5625, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [29.14, 29.5, 29.64, 29.620000000000005, 29.659999999999997]
Step 3900 (epoch 5.55), 101.3 ms
Minibatch loss: 0.088, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [28.700000000000003, 28.760000000000005, 28.64, 28.64, 28.560000000000002]
Step 4000 (epoch 5.69), 101.4 ms
Minibatch loss: 0.215, learning rate: 0.000774
Minibatch error: [0.0, 0.0, 0.0, 0.0, 0.0]
Validation error: [29.5, 29.459999999999994, 29.480000000000004, 29.540000000000006, 29.519999999999996]
Step 4100 (epoch 5.83), 101.3 ms
Minibatch loss: 0.255, learning rate: 0.000774
Minibatch error: [3.125, 1.5625, 1.5625, 1.5625, 1.5625]
Validation error: [28.72, 28.560000000000002, 28.560000000000002, 28.599999999999994, 28.64]
Step 4200 (epoch 5.97), 101.5 ms
Minibatch loss: 0.267, learning rate: 0.000774
Minibatch error: [0.0, 1.5625