In [1]:
import numpy as np
import pandas as pd

from scipy.stats import multivariate_normal, norm

import pymc3 as pm

In [2]:
dat = pd.read_csv('cars.csv')

In [3]:
n_dat = dat.shape[0]

In [4]:
with pm.Model() as model_cars:
    alpha = pm.Normal('alpha', mu=0, sigma=100)
    beta  = pm.Normal('beta', mu=0, sigma=10)
    sigma = pm.Exponential('sigma', lam=1)
    mu    = alpha + beta*dat['speed']
    distance = pm.Normal('distance', mu=mu, sigma=sigma, observed=dat['dist'])

    par_post = pm.find_MAP()
    hessian  = pm.find_hessian(par_post, vars=[alpha, beta, sigma])

    




In [5]:
param_mu_post = [par_post['alpha'], par_post['beta'], par_post['sigma']]
sample_post   = multivariate_normal.rvs(size=10**4, mean=param_mu_post, cov=np.linalg.inv(hessian))
sample_post   = pd.DataFrame(sample_post, columns=['alpha', 'beta', 'sigma'])

In [8]:
lppd = 0
penalty = 0
WAIC_sample = []

for i in range(n_dat):
    p_y_post = norm.pdf(
        dat['dist'][i],
        loc = sample_post['alpha'] + sample_post['beta']*dat['speed'][i],
        scale = sample_post['sigma']
    )

    p_y_post         = p_y_post[~np.isnan(p_y_post)]
    log_p_y_post     = np.log(np.mean(p_y_post))
    p_y_post         = p_y_post[p_y_post>0]
    var_log_p_y_post = np.var(np.log(p_y_post)) 
    lppd    += log_p_y_post
    penalty += var_log_p_y_post
    WAIC_sample.append(-2*(log_p_y_post-var_log_p_y_post))

WAIC = -2*(lppd - penalty)
WAIC_std_error = np.sqrt(n_dat*np.var(WAIC_sample))

In [9]:
WAIC, WAIC_std_error

(422.91999652605125, 17.504025496049184)