In [93]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model
import seaborn as sns
import torch

import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO
from pyro.optim import Adam, ClippedAdam

# fix random generator seed (for reproducibility of results)
np.random.seed(42)

# matplotlib style options
plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)

In [94]:
df = pd.read_csv("Data/cces_all_clean.csv")
df.head()

Unnamed: 0,abortion,state,eth,male,age,educ
0,1,MD,Other,-0.5,50-59,Some college
1,1,TN,White,-0.5,40-49,HS
2,1,OH,White,-0.5,60-69,Some college
3,0,CA,Other,-0.5,70+,Post-grad
4,1,KY,White,-0.5,40-49,HS


In [95]:
# separate between features/inputs (X) and target/output variables (y)
mat = df.values
X = mat[:,1:]
y = mat[:,0].astype("int")
ind = df.index.astype("int")
print(X)
print(X.shape)
print(y)
print(y.shape)
print(ind)
print(ind.shape)

[['MD' 'Other' -0.5 '50-59' 'Some college']
 ['TN' 'White' -0.5 '40-49' 'HS']
 ['OH' 'White' -0.5 '60-69' 'Some college']
 ...
 ['IN' 'White' -0.5 '18-29' 'Some college']
 ['CO' 'White' -0.5 '30-39' 'HS']
 ['GA' 'Other' 0.5 '30-39' 'Some college']]
(59810, 5)
[1 1 1 ... 0 0 0]
(59810,)
RangeIndex(start=0, stop=59810, step=1)
(59810,)


In [100]:
categories = ["state", "eth", "male", "age", "educ"]
for category in categories:
    df[category+"_cat"] = df[category].astype('category').cat.codes

In [101]:
df.dtypes

abortion       int64
state         object
eth           object
male         float64
age           object
educ          object
state_cat       int8
eth_cat         int8
male_cat        int8
age_cat         int8
educ_cat        int8
dtype: object

In [103]:
df

Unnamed: 0,abortion,state,eth,male,age,educ,state_cat,eth_cat,male_cat,age_cat,educ_cat
0,1,MD,Other,-0.5,50-59,Some college,19,2,0,3,4
1,1,TN,White,-0.5,40-49,HS,41,3,0,2,1
2,1,OH,White,-0.5,60-69,Some college,34,3,0,4,4
3,0,CA,Other,-0.5,70+,Post-grad,4,2,0,5,3
4,1,KY,White,-0.5,40-49,HS,16,3,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...
59805,0,ME,White,-0.5,18-29,Some college,20,3,0,0,4
59806,1,MN,White,0.5,30-39,4-Year College,22,3,1,1,0
59807,0,IN,White,-0.5,18-29,Some college,14,3,0,0,4
59808,0,CO,White,-0.5,30-39,HS,5,3,0,1,1


In [113]:
mat = df.iloc[:,6:].values
X = mat
X = torch.tensor(X).float()
y = torch.tensor(y).float()

  after removing the cwd from sys.path.


In [116]:
from torch import sigmoid
def hierarchical_model(X, n_fact = 8, obs=None):
    
    sigmas = pyro.sample("sigmas", dist.HalfCauchy(torch.ones(n_fact)*5))
    gammas = pyro.sample("gammas", dist.Normal(torch.zeros(5), 5.0))
    alpha_educ = pyro.sample("alpha_educ", dist.Normal(torch.zeros(5), sigmas[0]))
    alpha_eth = pyro.sample("alpha_eth", dist.Normal(torch.zeros(4), sigmas[1]))
    alpha_age = pyro.sample("alpha_age", dist.Normal(torch.zeros(6), sigmas[2]))
    alpha_male_eth = pyro.sample("alpha_male_eth", dist.Normal(torch.zeros((2,4)), sigmas[3]))
    alpha_educ_age = pyro.sample("alpha_educ_age", dist.Normal(torch.zeros((5,6)), sigmas[4]))
    alpha_educ_eth = pyro.sample("alpha_educ_eth", dist.Normal(torch.zeros((5,4)), sigmas[5]))
    
    # need the GROUP LEVEL PREDICTORS for states
    # Temporary solution
    ####################
    alpha_state = pyro.sample("alpha_state", dist.Normal(torch.zeros(50), sigmas[6]))
    ####################
    beta_male = pyro.sample("beta_male", dist.Normal(0.,5.))
    with pyro.plate("data"):
        y = pyro.sample("y",sigmoid(alpha_educ[list(X[:,4])] + alpha_eth[list(X[:,1])] + alpha_age[list(X[:,3])] + alpha_male_eth[list(X[:,2]), list(X[:,1])] + alpha_educ_age[list(X[:,4]),list(X[:,3])] + alpha_educ_eth[list(X[:,4]), list(X[:,1])] + beta_male*X[:,2]), obs= obs)
        print(y)
    return y

In [117]:
%%time

# Define guide function
guide = AutoDiagonalNormal(hierarchical_model)

# Reset parameter values
pyro.clear_param_store()

# Define the number of optimization steps
n_steps = 12000

# Setup the optimizer
adam_params = {"lr": 0.01}
optimizer = ClippedAdam(adam_params)

# Setup the inference algorithm
elbo = Trace_ELBO(num_particles=3)
svi = SVI(hierarchical_model, guide, optimizer, loss=elbo)

# Do gradient steps
for step in range(n_steps):
    elbo = svi.step(X,8,y)
    if step % 500 == 0:
        print("[%d] ELBO: %.1f" % (step, elbo))

tensor([1., 1., 1.,  ..., 0., 0., 0.])
tensor([1., 1., 1.,  ..., 0., 0., 0.])


AttributeError: 'Tensor' object has no attribute 'log_prob'