# Probabilistic Principal Component Analysis

This notebook illustrate how to use a PPCA model with the [beer framework](https://github.com/beer-asr/beer). 

In [None]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import Arrow, OpenHead, NormalHead, VeeHead
from bokeh.models import ColumnDataSource, LabelSet, Legend
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6
import colorcet as cc
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

## Data

Generate some normally distributed data with some correlation.

In [None]:
mean = np.array([-10, 10]) 
cov = np.array([[2, 1.5], [1.5, 1.5]])
#cov = np.eye(2) 
data = np.random.multivariate_normal(mean, cov, size=100)

x_range = (mean[0] - 5, mean[0] + 5)
y_range = (mean[1] - 5, mean[1] + 5)

fig = figure(
    title='Data',
    width=400,
    height=400,
    x_range=x_range,
    y_range=y_range
)
fig.circle(data[:, 0], data[:, 1])
plotting.plot_normal(fig, mean, cov, alpha=.5, color='Salmon')

show(fig)

## Model Creation

In [None]:
data_mean = torch.from_numpy(data.mean(axis=0)).double()
data_var = torch.from_numpy(data.var(axis=0)).double()

conf = {
    'type': 'PPCA',
    'dim_subspace': 1,
    'prior_strength': 1.,
    'noise_std': 0.1
}
ppca = beer.create_model(conf, data_mean, data_var).double()

## Variational Bayes Training 

In [None]:
epochs = 200
lrate = 1
X = torch.from_numpy(data).double()
optim = beer.BayesianModelCoordinateAscentOptimizer(ppca.mean_field_groups)
    
elbos = []
for epoch in range(epochs):
    optim.zero_grad()
    elbo = beer.evidence_lower_bound(ppca, X, datasize=len(X))
    elbo.natural_backward()
    elbos.append(round(float(elbo) / len(X), 5))
    optim.step()
    
fig = figure(width=400, height=400)
fig.line(range(epochs), elbos, color='blue')
show(fig)

## Plotting the model

In [None]:
model = ppca

x = np.linspace(-20, 20, 1000)

A, B = model.mean.numpy(), model.mean.numpy() + model.subspace.numpy()[0, :]
slope = (A[1] - B[1]) / (A[0] - B[0])
intercept = -slope * ((slope * A[0] - A[1]) / slope)
s_line = np.c_[x, slope * x + intercept]
p_h = np.sqrt(1 / (2 * np.pi)) * np.exp(-.5 * x ** 2)
angle = np.arctan(abs(B[1] - A[1]) / abs(B[0] - A[0]))
R = np.array([
    [np.cos(angle), -np.sin(angle)],
    [np.sin(angle), np.cos(angle)]
])

fig1 = figure(
    x_range=x_range,
    y_range=y_range,
    width=400,
    height=400,
)

plotting.plot_normal(fig1, mean, cov, alpha=.4, color='Salmon')

for i in range(2):
    xy = np.c_[x, np.zeros_like(x)]
    rxy1 = xy @ R.T + model.mean.numpy()
    xy = np.c_[x, (i + 1) * np.sqrt(np.ones_like(x) / model.precision.numpy())]
    rxy2 = xy @ R.T + model.mean.numpy()
    band_x = np.append(rxy1[:,0], rxy2[:, 0][::-1])
    band_y = np.append(rxy1[:,1], rxy2[:, 1][::-1])
    fig1.patch(band_x, band_y, line_alpha=0., fill_alpha=0.5, fill_color='LightBlue')

    xy = np.c_[x, np.zeros_like(x)]
    rxy1 = xy @ R.T + model.mean.numpy()
    xy = np.c_[x, -(i + 1) * np.sqrt(np.ones_like(x) / model.precision.numpy())]
    rxy2 = xy @ R.T + model.mean.numpy()
    band_x = np.append(rxy1[:,0], rxy2[:, 0][::-1])
    band_y = np.append(rxy1[:,1], rxy2[:, 1][::-1])
    fig1.patch(band_x, band_y, line_alpha=0., fill_alpha=0.5, fill_color='LightBlue')


xy = np.c_[x, np.zeros_like(x)]
rxy1 = xy @ R.T + mean
xy = np.c_[x, p_h]
rxy2 = xy @ R.T + mean
band_x = np.append(rxy1[:,0], rxy2[:, 0][::-1])
band_y = np.append(rxy1[:,1], rxy2[:, 1][::-1])
fig1.patch(band_x, band_y, line_color='black', fill_color='LightGreen')

fig2 = figure(
    width=400,
    height=400,
    x_range=x_range,
    y_range=y_range
)
fig2.cross(data[:, 0], data[:, 1])
plotting.plot_normal(fig2, mean, cov, alpha=.3, line_color='black', 
                     fill_color='salmon')

show(gridplot([[fig2, fig1]]))