# Variational Auto-Encoder

Work in progress.

In [23]:
import conx as cx
import keras.backend as K

We need a function to use as the activation function for the Sampler layer:

In [24]:
LENGTH = 5 # latent size

In [25]:
def sampler(inputs):
    ## inputs is a merged concat
    mean, stddev = inputs[:, :LENGTH], inputs[:, LENGTH:]
    # we sample from the standard normal a matrix of batch_size * latent_size (taking into account minibatches)
    std_norm = K.random_normal(shape=(K.shape(mean)[0], LENGTH), mean=0, stddev=1)
    # sampling from Z~N(μ, σ^2) is the same as sampling from μ + σX, X~N(0,1)
    return mean + K.exp(stddev) * std_norm

In [26]:
net = cx.Network("vae")

In [27]:
net.add(cx.Layer("input", 2),
        cx.Layer("hidden", 5),
        cx.Layer("mean", LENGTH),
        cx.Layer("stddev", LENGTH),
        cx.LambdaLayer("encode", sampler), # function, that takes input layer's output
        cx.Layer("output", 2));

In [28]:
net.connect("input", "hidden")
net.connect("hidden", "mean")
net.connect("hidden", "stddev")
net.connect("mean", "encode")
net.connect("stddev", "encode")
net.connect("encode", "output")

To allow an additional error function, we need to declare "encode" (an internal bank) as an output:

In [29]:
net.additional_output_banks = ["encode"]

And then we can provide a dictionary of error functions by name:

In [30]:
net.compile(error={"output": "mse", "encode": "mse"}, optimizer="adam")

In [31]:
net.picture([1,-1])

<IPython.core.display.Javascript object>

In [32]:
net.propagate_to("encode", [1,-1])

[0.7351282238960266,
 5.082088470458984,
 0.5933042168617249,
 -2.287989854812622,
 -1.0812584161758423]

CAPACITY used to break input down to a set number of basis.

BETA (> 1) used for latent regularizer.

In [33]:
CAPACITY = 32
BETA = 1.5

def bvae_loss(targets, outputs):
    mean, stddev = outputs[:LENGTH], outputs[LENGTH:]
    # kl divergence:
    latent_loss = -0.5 * K.mean(1 + stddev
                        - K.square(mean)
                        - K.exp(stddev), axis=-1)
    # use beta to force less usage of vector space:
    # also try to use <capacity> dimensions of the space:
    latent_loss = BETA * K.abs(latent_loss - CAPACITY/LENGTH)
    return latent_loss

def vae_loss(targets, outputs):
    mean, stddev = outputs[:LENGTH], outputs[LENGTH:]
    # kl divergence:
    latent_loss = -0.5 * K.mean(1 + stddev
                        - K.square(mean)
                        - K.exp(stddev), axis=-1)
    return latent_loss

In [34]:
net.compile(error={"output": "mse", "encode": vae_loss}, optimizer="adam")

In [35]:
net.compile(error={"output": "mse", "encode": bvae_loss}, optimizer="adam")