In [22]:
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [23]:
df = pd.read_csv('unemployment.csv')
df

Unnamed: 0,UN,m,p,G,x
0,3.970,3.92,0.168,16.364,5.900
1,5.419,4.08,0.166,16.619,6.300
2,3.045,4.43,0.161,16.930,6.900
3,11.684,4.26,0.163,17.178,6.300
4,18.415,4.28,0.155,17.364,6.400
...,...,...,...,...,...
85,8.455,641.30,1.256,266.700,123.374
86,7.690,704.60,1.321,266.800,129.359
87,7.038,779.70,1.398,272.300,131.091
88,6.022,846.70,1.501,277.800,146.482


In [24]:
# Model
with pm.Model() as model:
    # prior 
    mu = pm.Normal("mu", 0, 1000)
    tau2 = pm.ChiSquared("tau2", 89)

    # update priors
    y_obs = pm.LogNormal("y_obs", mu, pm.math.sqrt(tau2), observed=df.UN)
    trace = pm.sample(2000, return_inferencedata=True, random_seed=10678)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, tau2]


Output()

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 57 seconds.


In [25]:
az.summary(trace,hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
mu,1.679,0.264,1.155,2.197,0.003,0.003,6250.0,4978.0,1.0
tau2,6.245,2.604,2.266,11.569,0.035,0.042,5516.0,4765.0,1.0


Find the probability that the parameter mu is larger than 3.4 [P(mu>3.4)].

In [26]:
prob_p_leq_0_3 = np.mean(trace.posterior["mu"].values.flatten() > 3.4)
print(f"Probability that mu <= 3.4: {prob_p_leq_0_3:.4f}")

Probability that mu <= 3.4: 0.0000
