In [13]:
from __future__ import division, print_function
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import Image, display, clear_output
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets
import tensorflow as tf
import math
from tensorflow.python.framework.ops import reset_default_graph

from sklearn.utils import shuffle

In [36]:

# To speed up training we'll only work on a subset of the data containing only the numbers 0, 1, 4, 9.
# We discretize the data to 0 and 1 in order to use it with a 
# bernoulli observation model p(x|z) = Ber(mu(z))

def bernoulli_sample(x):
    return np.random.binomial(1,x,size=x.shape).astype('float32')

# Load data from compressed file with mnist
data = np.load('data/mnist.npz')

# Possible classes
classes = list(range(10))

# Set the classes we want to use.
included_classes = [0, 1, 4, 9] 

idxs_train = []
idxs_valid = []
idxs_test = []
num_classes = 0
for c in included_classes:
    if c in classes:
        num_classes += 1
        idxs_train += np.where(data['y_train'] == c)[0].tolist()
        idxs_valid += np.where(data['y_valid'] == c)[0].tolist()
        idxs_test += np.where(data['y_test'] == c)[0].tolist()

print("Number of classes included:", num_classes)
x_train = bernoulli_sample(data['X_train'][idxs_train]).astype('float32')
# Since this is unsupervised, the targets are only used for validation.
targets_train = data['y_train'][idxs_train].astype('int32')
x_train, targets_train = shuffle(x_train, targets_train, random_state=1234)

x_valid = bernoulli_sample(data['X_valid'][idxs_valid]).astype('float32')
targets_valid = data['y_valid'][idxs_valid].astype('int32')

x_test = bernoulli_sample(data['X_test'][idxs_test]).astype('float32')
targets_test = data['y_test'][idxs_test].astype('int32')

print("training set dim(%i, %i)." % x_train.shape)
print("validation set dim(%i, %i)." % x_valid.shape)
print("test set dim(%i, %i)." % x_test.shape)

Number of classes included: 4
training set dim(20457, 784).
validation set dim(3999, 784).
test set dim(4106, 784).


In [37]:
from tensorflow import layers
from tensorflow.contrib.layers import fully_connected, convolution2d, convolution2d_transpose, batch_norm, max_pool2d, dropout
from tensorflow.python.ops.nn import relu, elu, relu6, sigmoid, tanh, softmax, softplus

# Error functions    
def sum_of_squared_errors(p, t):
    return tf.reduce_sum(tf.square(p - t), axis=[1])


# computing cross entropy per sample
def categorical_cross_entropy(p, t, eps=1e-10):
    return -tf.reduce_sum(t * tf.log(p+eps), axis=[1])


def binary_cross_entropy(p, t, eps=1e-10):
    return -tf.reduce_sum(t * tf.log(p+eps) + (1-t) * tf.log(1-p+eps), axis=-1)


def kl_normal2_stdnormal(mean, log_var, eps=0.0):
    return -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis=1)


c = - 0.5 * math.log(2*math.pi)
def log_normal2(x, mean, log_var, eps=0.0):
    return tf.reduce_sum(c - log_var/2 - tf.square(x - mean) / (2 * tf.exp(log_var) + eps), axis=[1])

In [77]:
# reset graph
reset_default_graph()

# -- THE MODEL --#
num_channels = 1; #Black and white for MNIST
num_classes = 2;
k = 16;
height = width = 28

# Layer definitions
def layer(x, units):
    x = fully_connected(x, num_outputs=units, activation_fn=relu,
                         normalizer_fn=batch_norm)
    x = convolution2d(x, num_outputs=units, kernel_size=(3, 3),
                             stride=1)
    return dropout(x, is_training=is_training_pl)
    
def dense_block(x, num_layers):
    res = []
    for i in range(num_layers):
        layer_output = layer(x, k)
        x = tf.concat([x, layer_output], axis=-1)
        res.append(layer_output)
    return x, res
    

def transition_up(x, units):
    return convolution2d_transpose(x, num_outputs=units, kernel_size=(3, 3), stride=2)
    
    
def transition_down(x, units, pooling=True):
    #x = batch_norm(x, scope='tdown_batch_norm') #Batch norm should be included in fully_connected layer below
    x = fully_connected(x, num_outputs=units, activation_fn=relu,
                     normalizer_fn=batch_norm)
    x = convolution2d(x, num_outputs=units, kernel_size=(1, 1),
                         stride=1)
    x = dropout(x, is_training=is_training_pl)
    if pooling:
        x = max_pool2d(x, kernel_size=(2, 2))
    return x

# - Tiramisu Architecture - #
# Input placeholder
x_pl = tf.placeholder(tf.float32, [None, height, width, num_channels], 'x_pl')
y_pl = tf.placeholder(tf.float32, [None, num_classes], 'y_pl')
is_training_pl = tf.placeholder(tf.bool, name="is-training_pl")
print('x_pl', x_pl.shape)
print('y_pl', y_pl.shape)

with tf.name_scope('tiramisu'):
    # DOWN SAMPLING
    x = convolution2d(x_pl, num_outputs=k, kernel_size=(3, 3),
                             stride=1, scope="pre-convolution")

    skip1 = dense_block(x, 4)[0]
    
    skip1 = tf.concat([x, skip1], axis=-1)
    x = transition_down(skip1, 4*16, True)

    skip2 = dense_block(x, 5)[0]
    skip2 = tf.concat([x, skip2], axis=-1)
    x = transition_down(skip2, 5*16+4*16, True)

    # BOTTLENECK
    x = dense_block(x, 15)[0]

    # UPSAMPLING
    x = transition_up(x, 15*16)
    x = tf.concat([x, skip2], axis=-1)
    x = dense_block(x, 5)[0]

    x = transition_up(x, 5*16)
    skipUp = tf.concat([x, skip1], axis=-1)
    x = dense_block(skipUp, 4)[0]
    x = tf.concat([x, skipUp], axis=-1)

    # Output layers
    x = convolution2d(x, num_outputs=num_classes, kernel_size=(1, 1),
                             stride=1, scope="post-convolution")

    x = fully_connected(x, num_outputs=num_classes, activation_fn=softmax, scope="SoftMax")

print("Model built")

x_pl (?, 28, 28, 1)
y_pl (?, 2)
Model built
