In [None]:
import pyro
import torch
from pyro.optim import SGD, Adam
import pyro.distributions as dist
from torch.distributions import constraints
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import beta
%matplotlib inline

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

## Introduction

In this notebook we the simple generative model from Slide 18, which you also experimented with in the notebook *student_BBVI.ipynb*:
 * https://www.moodle.aau.dk/mod/resource/view.php?id=1049031

In the previous notebook we derived the required gradients manually. Here we instead rely on differentiation functionality in Pyro, which, in turn is based n PyTorch.

## The model in plate notation

<img src="mean_model.png" width="600">

## The model defined in Pyro

Here we define the probabilistic model. Notice the close resemblance with the plate specification above.

In [None]:
def mean_model(data):

    # Define the random variable mu having a noral distribution as prior
    mu = pyro.sample("mu", dist.Normal(0.0,1000.0))
    # variance = pyro.sample("variance", dist.Normal(1,0))

    # and now the plate holding the observations. The number of observations are determined by the data set 
    # supplied to the function. 
    with pyro.plate("x_plate"):
        pyro.sample(f"X", dist.Normal(mu, 1.0), obs=data)

## The variational distribution

In Pyro the variational distribution is defined as a so-called guide. In this example our variational distribution is a beta distribution with parameters q_alpha and q_beta:

$$
q(\mu)= \mathit{Normal}(\mu | q_{mu}, 1)
$$

In [None]:
def mean_guide(data):

    # We initialize the variational parameter to 0.0. 
    q_mu = pyro.param("q_mu", torch.tensor(0.0))
    q_variance = pyro.param("q_variance", torch.tensor(1.1), constraint=constraints.positive)

    # The name of the random variable of the variational distribution must match the name of the corresponding
    # variable in the model exactly.
    pyro.sample("mu", dist.Normal(q_mu, q_variance))
    # variance = pyro.sample("variance", dist.Normal(2,1))


## Learning

Here we encapsulate the learning steps, relying on standard stochastic gradient descent

In [None]:
def learn(data):

    pyro.clear_param_store()

    elbo = pyro.infer.Trace_ELBO()
    svi = pyro.infer.SVI(model=mean_model,
                         guide=mean_guide,
                         optim=SGD({'lr':0.001}),
                         loss=elbo)

    num_steps = 1000
    for step in range(num_steps):
        loss = svi.step(data)

        if step % 50 == 0:
            print(f"Loss for iteration {step}: {loss}")

In [None]:
data = torch.tensor(np.random.normal(loc=100.0, scale=5.0, size=100),dtype=torch.float)
learn(data)

Get the learned variational parameter

## The learned parameter

In [None]:
qmu = pyro.param("q_mu").item()
q_variance = pyro.param("q_variance").item()

In [None]:
print(f"Mean of vaiational distribution: {qmu}")
print(f"q_variance: {q_variance}")

## Exercise
* Adapt the code above to accomodate a slight more rich variational distribution, where we also have a variational parameter for the standard deviation:
$$
q(\mu)= \mathit{Normal}(\mu | q_{mu}, q_{std})
$$
* Experiment with different data sets and parameter values. Try visualizing the variational posterior distribution.