# VAE - Gaussian Linear Classifier

This notebook illustrate how to combine a Variational AutoEncoder (VAE) and a Gaussian Linear Classifier (GLC) with the [beer framework](https://github.com/beer-asr/beer).

In [None]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')

import math
import yaml
import numpy as np
import torch
import torch.optim
from torch import nn



# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

## Data 

As a simple example we consider the following synthetic data: 

In [None]:
# First cluster.
mean = np.array([-3, 3]) 
cov = np.array([[1, -1], [-1, 2.]])
data1 = np.random.multivariate_normal(mean, cov, size=100)

# Second cluster.
mean = np.array([3, 2.5]) 
cov = np.array([[2, 1], [1, .75]])
data2 = np.random.multivariate_normal(mean, cov, size=100)

data = np.vstack([data1, data2]) 

np.random.shuffle(data)

# Mean, variance of the data to scale the figure.
mean = data.mean(axis=0)
var = data.var(axis=0)
std_dev = np.sqrt(max(var))
x_range = (mean[0] - 3 * std_dev, mean[0] + 3 * std_dev)
y_range = (mean[1] - 3 * std_dev, mean[1] + 3 * std_dev)
global_range = (min(x_range[0], y_range[0]), max(x_range[1], y_range[1]))

fig = figure(title='Data', width=400, height=400,
             x_range=global_range, y_range=global_range)
fig.circle(data[:, 0], data[:, 1])

show(fig)

## Model Creation

We first create the VAE-GLC.

#### NOTE:
To obtain a Gaussian Quadratic Classifier, us a GMM model with individual (diagonal) covariance matrix.

In [None]:
vae_conf_str = '''
type: VAE
llh_type: normal
normalizing_flow:
  type: InverseAutoRegressive
  depth: 5
  iaf_block:
    activation: Tanh
    context_dim: 10
    data_dim: 2
    depth: 2
    width: 20
encoder:
  nnet_structure:
  - residual: false
    block_structure:
    - Linear:in_features=<feadim>;out_features=50
    - ReLU
    - Linear:in_features=50;out_features=50
    - ReLU
  prob_layer:
    type: NormalizingFlowLayer
    covariance: isotropic
    flow_params_dim: 10
    dim_in: 50
    dim_out: 2
decoder:
  nnet_structure:
  - residual: false
    block_structure:
    - Linear:in_features=2;out_features=50
    - ReLU
    - Linear:in_features=50;out_features=50
    - ReLU
  prob_layer:
    type: NormalLayer
    covariance: isotropic
    dim_in: 50
    dim_out: <feadim>
latent_model:
  type: Normal
  covariance: isotropic
  prior_strength: 1.
  noise_std: 0.
'''

In [None]:
data_mean = torch.from_numpy(data.mean(axis=0)).float()
data_var = torch.from_numpy(np.var(data, axis=0)).float()

conf_data = vae_conf_str.replace('<feadim>', str(len(data_mean)))
conf = yaml.load(conf_data)
model = beer.create_model(conf, data_mean, data_var).double()

## Variational Bayes Training

In [None]:
epochs = 5_000
lrate_bayesmodel = 0.
lrate_encoder = 1e-3
X = torch.from_numpy(data).double()

nnet_parameters = list(model.encoder.parameters()) + list(model.decoder.parameters())
std_optimizer = torch.optim.Adam(nnet_parameters, lr=lrate_encoder, weight_decay=1e-2)
optimizer = beer.BayesianModelCoordinateAscentOptimizer(
    model.mean_field_groups, 
    lrate=lrate_bayesmodel, 
    std_optim=std_optimizer)
    
elbos = []
for epoch in range(epochs):
    optimizer.zero_grad()
    elbo = beer.evidence_lower_bound(model, X, datasize=len(X))
    elbo.backward()
    elbo.natural_backward()
    optimizer.step()
    
    if epoch > 0:
        elbos.append(float(elbo) / len(X))

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

In [None]:
fig1 = figure(title='Observed space', width=400, height=400)
fig2 = figure(title='Latent space', width=400, height=400, x_range=(-5, 5), y_range=(-5, 5))


mean, variance, flow_params = model.encoder(X)
_, samples = model.nflow(mean, variance, flow_params, use_mean=False)
samples = samples.detach()
r_class_X = model.decoder(samples)[0]
samples = samples.data.numpy()
class_X, r_class_X = X.numpy(), r_class_X.detach().numpy()
fig1.circle(class_X[:, 0], class_X[:, 1], alpha=.5)
fig1.cross(r_class_X[:, 0], r_class_X[:, 1], color='salmon')
fig2.circle(samples[:, 0], samples[:, 1])
    
show(gridplot([[fig1, fig2]]))