In [2]:
import pandas as pd
import numpy as np
import os
import torch
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.infer.autoguide import AutoDiagonalNormal
import seaborn as sns
import matplotlib.pyplot as plt
import math



smoke_test = ('CI' in os.environ)
assert pyro.__version__.startswith('1.9.0')

We start with a simple model: choose a coin. Ground truth is the probability of heads is drawn from a beta(10,10). Then we do variational inference on this probability f by setting the variational family beta.

In [43]:
pyro.clear_param_store()

def model(data):
    # define the hyperparameters that control the Beta prior

    f1 = pyro.sample("latent_fairness_1", dist.Beta(10, 10))
    # loop over the observed data
    for i in range(len(data)):
        # observe datapoint i using the Bernoulli
        # likelihood Bernoulli(f)
        pyro.sample("obs_{}".format(i), dist.Bernoulli(f1), obs=data[i])

def guide_custom(data):

    # register the two variational parameters with Pyro.
    alpha_q1 = pyro.param("alpha_q1", torch.tensor(15.0),
                         constraint=dist.constraints.positive)
    beta_q1 = pyro.param("beta_q1", torch.tensor(15.0),
                        constraint=dist.constraints.positive)

    # sample latent_fairness from the distribution Beta(alpha_q, beta_q)
    pyro.sample("latent_fairness_1", dist.Beta(alpha_q1, beta_q1))


# setup the optimizer
adam_params = {"lr": 0.0005}
optimizer = Adam(adam_params)

# setup the inference algorithm
svi = SVI(model, guide_custom, optimizer, loss=Trace_ELBO())

data = []
for _ in range(6):
    data.append(torch.tensor(1.0))
for _ in range(4):
    data.append(torch.tensor(0.0))


# do gradient steps
n_steps = 2000
losses = []

for step in range(n_steps):
    loss = svi.step(data)
    losses.append(loss)
    if step % 100 == 0:
        print(f"Step {step}: Loss = {loss}")


alpha_q1 = pyro.param("alpha_q1").item()
beta_q1 = pyro.param("beta_q1").item()

inferred_mean = alpha_q1 / (alpha_q1 + beta_q1)
# compute inferred standard deviation
factor = beta_q1 / (alpha_q1 * (1.0 + alpha_q1 + beta_q1))
inferred_std = inferred_mean * math.sqrt(factor)

print("\nBased on the data and our prior belief, the fairness " +
      "of the coin is %.3f +- %.3f" % (inferred_mean, inferred_std))

Step 0: Loss = 7.079536557197571
Step 100: Loss = 7.046950459480286
Step 200: Loss = 7.051387071609497
Step 300: Loss = 7.092971563339233
Step 400: Loss = 7.081258296966553
Step 500: Loss = 7.070039987564087
Step 600: Loss = 7.065178036689758
Step 700: Loss = 7.057203531265259
Step 800: Loss = 7.071073770523071
Step 900: Loss = 7.048192739486694
Step 1000: Loss = 7.102862358093262
Step 1100: Loss = 7.074877858161926
Step 1200: Loss = 7.089845061302185
Step 1300: Loss = 7.05859249830246
Step 1400: Loss = 7.069109559059143
Step 1500: Loss = 7.071178674697876
Step 1600: Loss = 7.020928859710693
Step 1700: Loss = 7.037477970123291
Step 1800: Loss = 7.041293025016785
Step 1900: Loss = 7.112540245056152

Based on the data and our prior belief, the fairness of the coin is 0.541 +- 0.090


Now we use the autoguide function autoDiagonalNormal to see if ADVI can solve the problem.

In [10]:
auto_guide = AutoDiagonalNormal(model)
svi = SVI(model, auto_guide, optimizer, loss=Trace_ELBO())

n_steps = 2000
losses = []

for step in range(n_steps):
    loss = svi.step(data)
    losses.append(loss)
    if step % 100 == 0:
        print(f"Step {step}: Loss = {loss}")

for name, value in pyro.get_param_store().items():
    print(f"{name} = {value.detach().cpu().numpy()}")


Step 0: Loss = 7.3824931383132935
Step 100: Loss = 7.511573910713196
Step 200: Loss = 7.485952734947205
Step 300: Loss = 6.924910664558411
Step 400: Loss = 7.326046347618103
Step 500: Loss = 7.347728729248047
Step 600: Loss = 6.904543399810791
Step 700: Loss = 7.315403342247009
Step 800: Loss = 6.635766625404358
Step 900: Loss = 6.842504858970642
Step 1000: Loss = 7.215908527374268
Step 1100: Loss = 7.257016181945801
Step 1200: Loss = 6.8420621156692505
Step 1300: Loss = 6.9793747663497925
Step 1400: Loss = 7.227752089500427
Step 1500: Loss = 7.1300565004348755
Step 1600: Loss = 7.210431098937988
Step 1700: Loss = 7.1971904039382935
Step 1800: Loss = 7.191376566886902
Step 1900: Loss = 7.151340126991272
alpha_q1 = 16.096538543701172
beta_q1 = 13.950069427490234
AutoDiagonalNormal.loc = [0.14385241]
AutoDiagonalNormal.scale = [0.33450317]
mean = 9.970494270324707


Try some other guide that we manually make the transformation.

In [45]:
def guide_custom1(data):
    # Define parameters for the variational distribution of x
    loc = pyro.param("loc", torch.tensor(0.0))
    scale = pyro.param("scale", torch.tensor(1.0), constraint=dist.constraints.positive)
    
    # Sample x from the variational distribution
    x = pyro.sample("x", dist.Normal(loc, scale))
    
    # Apply the sigmoid transformation to x to get f
    f = torch.sigmoid(x)
    
    # Register f as a variational parameter
    pyro.sample("latent_fairness_1", dist.Delta(f))
    
    
svi = SVI(model, guide_custom1, optimizer, loss=Trace_ELBO())

n_steps = 2000
losses = []

for step in range(n_steps):
    loss = svi.step(data)
    losses.append(loss)
    if step % 100 == 0:
        print(f"Step {step}: Loss = {loss}")

for name, value in pyro.get_param_store().items():
    print(f"{name} = {value.detach().cpu().numpy()}")

{'x'}


Step 0: Loss = 4.732509672641754
Step 100: Loss = 6.1552417278289795
Step 200: Loss = 9.10539186000824
Step 300: Loss = 6.330778419971466
Step 400: Loss = 6.159422755241394
Step 500: Loss = 8.217960596084595
Step 600: Loss = 4.825847387313843
Step 700: Loss = 5.2056132555007935
Step 800: Loss = 7.444134712219238
Step 900: Loss = 7.612884283065796
Step 1000: Loss = 4.964548230171204
Step 1100: Loss = 6.554319858551025
Step 1200: Loss = 6.134634852409363
Step 1300: Loss = 6.9302650690078735
Step 1400: Loss = 5.252403378486633
Step 1500: Loss = 5.8072633147239685
Step 1600: Loss = 5.84585964679718
Step 1700: Loss = 5.343686878681183
Step 1800: Loss = 7.718535780906677
Step 1900: Loss = 7.622731328010559
alpha_q1 = 16.16783905029297
beta_q1 = 13.730469703674316
AutoDiagonalNormal.loc = [0.14385241]
AutoDiagonalNormal.scale = [0.33450317]
mean = nan
std = nan
loc = 0.1232151985168457
scale = 0.6606632471084595


In [47]:
loc = pyro.param('loc').item()
torch.sigmoid(torch.tensor(loc))

tensor(0.5308)