In [2]:
!pip install pyro-ppl

Collecting pyro-ppl
[?25l  Downloading https://files.pythonhosted.org/packages/c0/77/4db4946f6b5bf0601869c7b7594def42a7197729167484e1779fff5ca0d6/pyro_ppl-1.3.1-py3-none-any.whl (520kB)
[K     |████████████████████████████████| 522kB 3.6MB/s 
[?25hCollecting pyro-api>=0.1.1
  Downloading https://files.pythonhosted.org/packages/fc/81/957ae78e6398460a7230b0eb9b8f1cb954c5e913e868e48d89324c68cec7/pyro_api-0.1.2-py3-none-any.whl
Installing collected packages: pyro-api, pyro-ppl
Successfully installed pyro-api-0.1.2 pyro-ppl-1.3.1


In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns

import torch
from torch import nn
from torch.distributions import constraints
import functools

import pyro
import pyro.distributions as dist
from pyro.infer import SVI, JitTraceEnum_ELBO, TraceEnum_ELBO
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal, AutoGuideList, AutoDelta
from pyro.optim import ClippedAdam

import time

# fix MCMCndom generator seed (for reproducibility of results)
np.random.seed(42)

# matplotlib style options
plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 10)

  import pandas.util.testing as tm


In [0]:
import requests, io
r = requests.get('https://github.com/MikkelGroenning/MBML_project/blob/master/notebook/fakedata.npy?raw=true')

data = np.load(io.BytesIO(r.content)).astype('int32')

In [27]:
data.shape

(25, 100, 50)

In [30]:
data_torch = torch.tensor(data).long()
data_torch.shape

torch.Size([25, 100, 50])

In [55]:

def model(T=100, data=None):
    """ Define priors over beta1, beta2, tau, noises, sigma, z_prev1 and z_prev2 (keep the shapes in mind)
    # Your code here
    """

    psi = torch.tensor(1)
    delta = torch.tensor(1)
    sigma = torch.tensor(1)

    ALPHA = [] 
    BETA = []
    
    for t in range(T):

        if t == 0:
            alpha_t = pyro.sample(name="alpha_%d"%t, fn=dist.LogisticNormal(loc=torch.zeros(25), scale=delta))

        else:
            alpha_t = pyro.sample(name="alpha_%d"%t, fn=dist.LogisticNormal(loc=alpha_prev, scale=delta))
        
        ALPHA.append(alpha_t)

        alpha_prev = alpha_t

        """ Make a plate of size num_topics with name "topics" and define a variable "topic_words".
          This represents the phi above. Use the equivalent of a uniform distribution for it  """
        with pyro.plate("topics_%d"%t, 5):
            if t == 0:
                beta_t = pyro.sample(name="beta_%d"%t, fn=dist.LogisticNormal(loc=torch.zeros(5), scale=sigma))

            else:
                beta_t = pyro.sample(name="beta_%d"%t, fn=dist.LogisticNormal(loc=beta_prev, scale=sigma))

            BETA.append(beta_t)

            beta_prev = beta_t 

        with pyro.plate("documents_%d"%t, 25, dim=-3) as ind:
            if data is not None:
                with pyro.util.ignore_jit_warnings():
                    assert data.shape == (25, 100, 50)
                data_p = data[ind, t, :]
            
            theta_t = pyro.sample("theta_%d"%t, dist.Normal(loc = alpha_t, scale = psi))

            # doc_topics = pyro.sample("doc_topics", dist.Categorical(torch.ones(num_topics)/ num_topics))
            
            with pyro.plate("words_%d"%t, 50, dim=-2):
                # The word_topics variable is marginalized out during inference,
                # achieved by specifying infer={"enumerate": "parallel"} and using
                # TraceEnum_ELBO for inference. Thus we can ignore this variable in
                # the guide.


                z = pyro.sample("z", dist.Categorical(logits = theta_t), infer={"enumerate": "sequential"})
                w = pyro.sample("w", dist.Categorical(logits = beta_t[z]), obs=data_p)
    

def guide(T=100, data=None):
    psi_q = pyro.param("psi_q", torch.tensor(1), constraint = constraints.positive)
    delta_q = pyro.param("delta_q", torch.tensor(1), constraint = constraints.positive)
    sigma_q = pyro.param("sigma_q", torch.tensor(1), constraint = constraints.positive)
    
    alpha_q = pyro.param(
        'alpha_q', 
        lambda: torch.zeros(100, 25)
    )
    print(alpha_q.shape)
    """
    delta_q = pyro.param(
        'delta_q', 
        lambda: torch.ones(100, 25),
        constraint = constraints.positive
    )
    """
    for t in range(T):
        pyro.sample('alpha_%d'%t, dist.LogisticNormal(loc = alpha_q[t, :], scale = delta_q))
        

        beta_q = pyro.param(
            'beta_q', 
            lambda: torch.zeros(100, 5),
        )

        # number of topics = 5
        with pyro.plate("topics_%d"%t, 5):
            pyro.sample('beta_%d'%t, dist.LogisticNormal(loc=beta_q[t], scale=sigma_q))

            theta_q = pyro.param(
                "theta_q",
                lambda: torch.zeros(100, 25),
            )
        # number of documents = 5
        with pyro.plate("documents_%d"%t, 25) as ind:
            pyro.sample('theta_%d'%t, dist.Normal(loc = alpha_q, scale = psi_q))
            

elbo = TraceEnum_ELBO(max_plate_nesting=2)

optim = ClippedAdam({'lr': 0.005})
svi = SVI(model, guide, optim, elbo)

# Define the number of optimization steps
n_steps = 4000

# do gradient steps
for step in range(n_steps):
    elbo = svi.step(T=100, data=data_torch)
    if step % 100 == 0:
        #print('.', end='')
        print("[%d] ELBO: %.1f" % (step, elbo))


torch.Size([100, 25])


IndexError: ignored

In [0]:
theta = pyro.sample("theta", dist.Normal(loc = torch.zeros(25), scale = 1))

for i in range(25):
    theta[i]

In [24]:
theta[]

tensor([-0.8228,  0.1903,  0.0497,  1.5497,  0.8046, -0.7520,  0.6795,  0.2901,
        -1.0446, -0.1805,  2.0987, -1.1899, -0.2427,  0.2198, -2.6388, -0.9863,
        -0.9912, -0.0474,  1.8155,  0.2630,  1.2530, -0.1000,  1.0106, -1.2649,
        -0.2423])

In [0]:
theta = pyro.sample("theta", dist.Normal(loc = torch.zeros(100), scale = 1))
z = pyro.sample("z", dist.Categorical(logits = theta))

In [10]:
z

tensor(50)

In [0]:
# What is the difference between plate and irange?