In [1]:
!pip install pystan



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import stan # python3 -m pip install pystan
import nest_asyncio
nest_asyncio.apply()
import warnings
import arviz as az

In [3]:
# Define file paths
numbers_path = "dataframes/numbers.csv"
percentages_path = "dataframes/precentages.csv"
death_rates_path = "dataframes/death_rates.csv"

In [4]:
# Read the percentages CSV file
df = pd.read_csv(percentages_path, delimiter=';', header=0)
df = df.head(22)
df = df.drop(columns="[All]")
df.dtypes

Year       object
[0]        object
[1-4]      object
[5-9]      object
[10-14]    object
[15-19]    object
[20-24]    object
[25-29]    object
[30-34]    object
[35-39]    object
[40-44]    object
[45-49]    object
[50-54]    object
[55-59]    object
[60-64]    object
[65-69]    object
[70-74]    object
[75-79]    object
[80-84]    object
[85+]      object
dtype: object

In [5]:
df = df.map(lambda x: float(str(x).replace(',', '.')) if isinstance(x, str) else x)
df = df.map(lambda x: pd.to_numeric(str(x).replace(',', '.'), errors='coerce'))
df["Year"] = df["Year"].astype(int)
df = df.set_index("Year")

In [6]:
df = df.T

In [7]:
data = dict(
    N = 19,
    Y = 22,
    accidentData = df.values,
    xpred = 2022)

In [8]:
file_path = "stan/separate.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

posterior1 = stan.build(stan_code, data=data)

Building...



Building: found in cache, done.

In [9]:
fit_separate = posterior1.sample(num_chains=4, num_samples=1000)
separate_df = fit_separate.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   1% (103/8000)
Sampling:   3% (202/8000)
Sampling:   4% (301/8000)
Sampling:   5% (400/8000)
Sampling:   6% (500/8000)
Sampling:   8% (600/8000)
Sampling:   9% (700/8000)
Sampling:  10% (800/8000)
Sampling:  11% (900/8000)
Sampling:  12% (1000/8000)
Sampling:  14% (1100/8000)
Sampling:  15% (1200/8000)
Sampling:  16% (1300/8000)
Sampling:  18% (1400/8000)
Sampling:  19% (1500/8000)
Sampling:  20% (1600/8000)
Sampling:  21% (1700/8000)
Sampling:  22% (1800/8000)
Sampling:  24% (1900/8000)
Sampling:  25% (2000/8000)
Sampling:  26% (2100/8000)
Sampling:  28% (2200/8000)
Sampling:  29% (2300/8000)
Sampling:  30% (2400/8000)
Sampling:  31% (2500/8000)
Sampling:  32% (2600/8000)
Sampling:  34% (2700/8000)
Sampling:  35% (2800/8000)
Sampling:  36% (2900/8000)
Sampling:  38% (3000/8000)
Sampling:  39% (3100/8000)
Sampling:  40% (3200/8000)
Sampling:  41% (3300/8000)
Samplin

In [11]:
file_path = "stan/pooled.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

warnings.filterwarnings("ignore")

posterior2 = stan.build(stan_code, data=data)

Building...

In file included from /home/mantyke1/.cache/httpstan/4.10.1/models/yeibzhk5/model_yeibzhk5.cpp:2:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math/rev/fun.hpp:26:
                           [a, b, digamma_ab](auto& vi) mutable {
                             ~~^
In file included from /home/mantyke1/.cache/httpstan/4.10.1/models/yeibzhk5/model_yeibzhk5.cpp:2:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /opt/software/lib/python3.10/site-packages/ht





Building: 33.6s, done.

In [12]:
fit_pooled = posterior2.sample(num_chains=4, num_samples=1000)
pooled_df = fit_pooled.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   1% (100/8000)
Sampling:   4% (300/8000)
Sampling:   8% (600/8000)
Sampling:  12% (1000/8000)
Sampling:  22% (1800/8000)
Sampling:  45% (3600/8000)
Sampling:  66% (5300/8000)
Sampling:  86% (6900/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000168 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.68 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000173 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.73 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000168 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.68 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000174 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.74 seconds.
  Adjust your expectations accordingly!


In [13]:
file_path = "stan/hierarchical.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

warnings.filterwarnings("ignore")

posterior3 = stan.build(stan_code, data=data)

Building...

In file included from /home/mantyke1/.cache/httpstan/4.10.1/models/e7isi3o6/model_e7isi3o6.cpp:2:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math/rev/fun.hpp:26:
                           [a, b, digamma_ab](auto& vi) mutable {
                             ~~^
In file included from /home/mantyke1/.cache/httpstan/4.10.1/models/e7isi3o6/model_e7isi3o6.cpp:2:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /opt/software/lib/python3.10/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /opt/software/lib/python3.10/site-packages/ht





Building: 34.4s, done.

In [14]:
fit_hier = posterior3.sample(num_chains=4, num_samples=1000)
hier_df = fit_hier.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   1% (103/8000)
Sampling:   4% (302/8000)
Sampling:   6% (501/8000)
Sampling:   9% (700/8000)
Sampling:  12% (1000/8000)
Sampling:  16% (1300/8000)
Sampling:  19% (1500/8000)
Sampling:  21% (1700/8000)
Sampling:  24% (1900/8000)
Sampling:  28% (2200/8000)
Sampling:  31% (2500/8000)
Sampling:  35% (2800/8000)
Sampling:  52% (4200/8000)
Sampling:  68% (5400/8000)
Sampling:  84% (6700/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000198 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.98 seconds.
  Adjust your expectations accordingly!
  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan_pmdia4n0/model_e7isi3o6

In [15]:
separate_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,alpha.1,alpha.2,alpha.3,...,log_lik.10.22,log_lik.11.22,log_lik.12.22,log_lik.13.22,log_lik.14.22,log_lik.15.22,log_lik.16.22,log_lik.17.22,log_lik.18.22,log_lik.19.22
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,388.037049,0.940418,0.052958,6.2555,84.44,0.0,-359.579492,0.483549,10.733885,20.953142,...,-0.466067,-0.452709,0.555556,0.813635,1.596839,1.720802,2.190641,2.314294,2.544884,3.328173
std,5.784947,0.082047,0.004764,0.436196,30.312706,0.0,7.928845,0.035928,0.379801,0.664618,...,0.476541,0.54806,0.330649,0.357334,0.236189,0.293865,0.223223,0.243713,0.22986,0.298058
min,361.348339,0.357927,0.045418,6.0,63.0,0.0,-383.485097,0.340234,8.960293,17.941967,...,-3.770005,-3.880226,-1.33394,-1.900096,0.419388,-0.061739,0.87854,0.640828,0.722497,1.478317
25%,384.437643,0.923072,0.050597,6.0,63.0,0.0,-365.151102,0.460738,10.493536,20.537852,...,-0.724018,-0.749764,0.390375,0.643274,1.46827,1.575694,2.071547,2.185722,2.423631,3.184292
50%,388.288737,0.971839,0.054574,6.0,63.0,0.0,-359.965615,0.483602,10.725956,20.963936,...,-0.377466,-0.353583,0.612535,0.88419,1.623944,1.764915,2.2147,2.347043,2.57445,3.37905
75%,392.161567,0.991492,0.056934,7.0,127.0,0.0,-354.319352,0.507198,10.980826,21.384124,...,-0.114682,-0.051052,0.784597,1.064743,1.759451,1.926872,2.344245,2.480225,2.702275,3.534335
max,403.801902,1.0,0.057266,7.0,255.0,0.0,-326.544674,0.637285,12.212003,23.627517,...,0.445165,0.669719,1.294981,1.610757,2.183686,2.375969,2.851837,2.941057,3.120203,4.022232


In [16]:
pooled_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,alpha,beta,sigma,...,log_lik.10.22,log_lik.11.22,log_lik.12.22,log_lik.13.22,log_lik.14.22,log_lik.15.22,log_lik.16.22,log_lik.17.22,log_lik.18.22,log_lik.19.22
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,-1126.906404,0.920461,0.408353,2.67675,8.1585,0.0,1128.419315,9.970816,-0.186497,8.962861,...,-3.124171,-3.159728,-3.192255,-3.231797,-3.263603,-3.288441,-3.304812,-3.315123,-3.323534,-3.334386
std,1.269551,0.10889,0.03087,0.736814,4.351277,0.0,1.800341,0.854198,0.069757,0.315027,...,0.037287,0.043258,0.04814,0.053587,0.057683,0.060743,0.062703,0.063916,0.064895,0.066144
min,-1134.501049,0.048182,0.369617,1.0,1.0,0.0,1125.40801,6.762271,-0.455511,7.838124,...,-3.265031,-3.321504,-3.376686,-3.437113,-3.482761,-3.519154,-3.542848,-3.557592,-3.569526,-3.584808
25%,-1127.449504,0.891502,0.382911,2.0,7.0,0.0,1127.086977,9.397211,-0.232532,8.753018,...,-3.148724,-3.186941,-3.22221,-3.26523,-3.300474,-3.326969,-3.344191,-3.355241,-3.364157,-3.376255
50%,-1126.557697,0.963955,0.409537,3.0,7.0,0.0,1128.056265,9.968647,-0.185954,8.956545,...,-3.123267,-3.157817,-3.189481,-3.228015,-3.259588,-3.284505,-3.301408,-3.311486,-3.319868,-3.330778
75%,-1125.984712,0.993176,0.434978,3.0,11.0,0.0,1129.397108,10.532391,-0.141079,9.173607,...,-3.099143,-3.130454,-3.160557,-3.195775,-3.224859,-3.247458,-3.262218,-3.271484,-3.279,-3.288805
max,-1125.388813,1.0,0.44472,4.0,19.0,0.0,1139.378139,13.078133,0.074959,10.2552,...,-2.978206,-3.000871,-3.028932,-3.062675,-3.091798,-3.115371,-3.13122,-3.141312,-3.146726,-3.153271


In [17]:
pooled_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,alpha,beta,sigma,...,log_lik.10.22,log_lik.11.22,log_lik.12.22,log_lik.13.22,log_lik.14.22,log_lik.15.22,log_lik.16.22,log_lik.17.22,log_lik.18.22,log_lik.19.22
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,-1126.906404,0.920461,0.408353,2.67675,8.1585,0.0,1128.419315,9.970816,-0.186497,8.962861,...,-3.124171,-3.159728,-3.192255,-3.231797,-3.263603,-3.288441,-3.304812,-3.315123,-3.323534,-3.334386
std,1.269551,0.10889,0.03087,0.736814,4.351277,0.0,1.800341,0.854198,0.069757,0.315027,...,0.037287,0.043258,0.04814,0.053587,0.057683,0.060743,0.062703,0.063916,0.064895,0.066144
min,-1134.501049,0.048182,0.369617,1.0,1.0,0.0,1125.40801,6.762271,-0.455511,7.838124,...,-3.265031,-3.321504,-3.376686,-3.437113,-3.482761,-3.519154,-3.542848,-3.557592,-3.569526,-3.584808
25%,-1127.449504,0.891502,0.382911,2.0,7.0,0.0,1127.086977,9.397211,-0.232532,8.753018,...,-3.148724,-3.186941,-3.22221,-3.26523,-3.300474,-3.326969,-3.344191,-3.355241,-3.364157,-3.376255
50%,-1126.557697,0.963955,0.409537,3.0,7.0,0.0,1128.056265,9.968647,-0.185954,8.956545,...,-3.123267,-3.157817,-3.189481,-3.228015,-3.259588,-3.284505,-3.301408,-3.311486,-3.319868,-3.330778
75%,-1125.984712,0.993176,0.434978,3.0,11.0,0.0,1129.397108,10.532391,-0.141079,9.173607,...,-3.099143,-3.130454,-3.160557,-3.195775,-3.224859,-3.247458,-3.262218,-3.271484,-3.279,-3.288805
max,-1125.388813,1.0,0.44472,4.0,19.0,0.0,1139.378139,13.078133,0.074959,10.2552,...,-2.978206,-3.000871,-3.028932,-3.062675,-3.091798,-3.115371,-3.13122,-3.141312,-3.146726,-3.153271


In [18]:
hier_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,mu_alpha,mu_beta,sigma_alpha,...,log_lik.10.22,log_lik.11.22,log_lik.12.22,log_lik.13.22,log_lik.14.22,log_lik.15.22,log_lik.16.22,log_lik.17.22,log_lik.18.22,log_lik.19.22
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,-144.620502,0.857689,0.250828,4.22225,20.206,0.0,166.113498,9.470652,-0.19324,11.944483,...,-0.833122,-0.824591,-0.74451,-0.745502,-0.745831,-0.737447,-0.738595,-0.74241,-0.740928,-0.742199
std,4.871593,0.12992,0.017702,0.418209,7.551412,0.0,6.702864,2.707501,0.068962,2.052805,...,0.215362,0.209304,0.130055,0.128373,0.133858,0.11953,0.120143,0.128785,0.123185,0.127337
min,-173.474069,0.317865,0.227433,3.0,7.0,0.0,144.736063,-2.450962,-0.46678,7.452426,...,-2.266076,-2.389362,-1.96969,-1.870775,-1.689161,-1.804714,-1.844063,-1.740958,-2.126319,-1.920669
25%,-147.602263,0.775394,0.24093,4.0,15.0,0.0,161.525595,7.72194,-0.238635,10.469669,...,-0.909678,-0.902427,-0.78138,-0.781856,-0.779154,-0.773005,-0.771282,-0.776941,-0.776769,-0.775324
50%,-144.234715,0.895308,0.249602,4.0,15.0,0.0,165.663317,9.507282,-0.19345,11.658293,...,-0.760487,-0.756638,-0.705626,-0.708858,-0.70693,-0.704682,-0.705403,-0.706096,-0.705209,-0.703953
75%,-141.178039,0.967398,0.2595,4.0,31.0,0.0,170.409362,11.244621,-0.148692,13.124164,...,-0.687397,-0.683568,-0.665887,-0.666418,-0.665212,-0.663971,-0.665157,-0.665286,-0.664342,-0.664765
max,-130.370427,1.0,0.276674,5.0,63.0,0.0,194.020582,19.105752,0.124377,23.246709,...,-0.551578,-0.532333,-0.548808,-0.541507,-0.542834,-0.531666,-0.536728,-0.528008,-0.553444,-0.538057


In [19]:
alpha_columns = [col for col in separate_df.columns if col.startswith('alpha')]
beta_columns = [col for col in separate_df.columns if col.startswith('beta')]

# Extract alpha and beta as DataFrames
alpha_samples = separate_df[alpha_columns]
beta_samples = separate_df[beta_columns]

In [20]:
xpred = 2022
base_year = 2000
x_offset = xpred - base_year

# Calculate predictions
beta_samples = beta_samples.multiply(x_offset)



In [21]:
predictions = pd.DataFrame()
for i in range(1, 20):
    predictions[f'pred{i}'] = alpha_samples[f'alpha.{i}'] + beta_samples[f'beta.{i}']

# Summarize predictions
pred_mean = predictions.mean()  # Mean predictions
pred_cred = predictions.quantile([0.025, 0.975])  # 95% credible interval


#predictions

In [22]:
pred_mean # separate model

pred1      0.313838
pred2      7.200242
pred3     12.729514
pred4     11.097637
pred5     18.472515
pred6     16.780552
pred7     11.990142
pred8      8.661030
pred9      6.709051
pred10     5.171997
pred11     3.697750
pred12     2.666448
pred13     1.839179
pred14     1.218510
pred15     0.830103
pred16     0.562210
pred17     0.414751
pred18     0.293126
pred19     0.130511
dtype: float64

In [23]:
pred_cred # separate model

Unnamed: 0,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred10,pred11,pred12,pred13,pred14,pred15,pred16,pred17,pred18,pred19
0.025,0.239079,6.394115,11.333222,9.661311,16.860331,15.777953,11.348703,8.021163,6.296325,4.880924,3.449035,2.519932,1.728439,1.156652,0.778988,0.527052,0.386155,0.269233,0.121095
0.975,0.390252,7.990526,14.113281,12.575701,20.053734,17.797894,12.638653,9.296848,7.115099,5.445701,3.947015,2.804154,1.948642,1.277115,0.879776,0.597237,0.444707,0.316508,0.140107


In [24]:
alpha_columns = [col for col in pooled_df.columns if col.startswith('alpha')]
beta_columns = [col for col in pooled_df.columns if col.startswith('beta')]

# Extract alpha and beta as DataFrames
alpha_samples = pooled_df[alpha_columns]
beta_samples = pooled_df[beta_columns]

In [25]:
xpred = 2022
base_year = 2000
x_offset = xpred - base_year

# Calculate predictions
beta_samples = beta_samples.multiply(x_offset)

In [26]:
predictions = pd.DataFrame()

predictions['pred'] = alpha_samples['alpha'] + beta_samples['beta']

# Summarize predictions
pred_mean = predictions.mean()  # Mean predictions
pred_cred = predictions.quantile([0.025, 0.975])  # 95% credible interval

In [27]:
pred_mean # pooled model

pred    5.867881
dtype: float64

In [28]:
pred_cred # pooled model

Unnamed: 0,pred
0.025,4.06545
0.975,7.699394


In [29]:
alpha_columns = [col for col in hier_df.columns if col.startswith('alpha')]
beta_columns = [col for col in hier_df.columns if col.startswith('beta')]

# Extract alpha and beta as DataFrames
alpha_samples = hier_df[alpha_columns]
beta_samples = hier_df[beta_columns]

xpred = 2022
base_year = 2000
x_offset = xpred - base_year

# Calculate predictions
beta_samples = beta_samples.multiply(x_offset)

predictions = pd.DataFrame()
for i in range(1, 20):
    predictions[f'pred{i}'] = alpha_samples[f'alpha.{i}'] + beta_samples[f'beta.{i}']

# Summarize predictions
pred_mean = predictions.mean()  # Mean predictions
pred_cred = predictions.quantile([0.025, 0.975])  # 95% credible interval

In [30]:
pred_mean # hierarchical model

pred1      0.286827
pred2      7.184545
pred3     12.709544
pred4     11.097036
pred5     18.426011
pred6     16.809206
pred7     12.004060
pred8      8.659218
pred9      6.701815
pred10     5.159807
pred11     3.680436
pred12     2.642779
pred13     1.809865
pred14     1.192344
pred15     0.808178
pred16     0.538473
pred17     0.395125
pred18     0.269804
pred19     0.100871
dtype: float64

In [31]:
pred_cred # hierarchical model

Unnamed: 0,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred10,pred11,pred12,pred13,pred14,pred15,pred16,pred17,pred18,pred19
0.025,-0.402559,6.555503,12.028446,10.439001,17.764914,16.144615,11.333318,8.026411,6.058951,4.497527,2.991925,1.994126,1.143206,0.48585,0.165509,-0.103768,-0.297894,-0.384128,-0.588528
0.975,0.95605,7.84153,13.372342,11.726944,19.102833,17.479882,12.703336,9.321144,7.350543,5.832907,4.345959,3.289702,2.477336,1.85909,1.464157,1.197712,1.076874,0.931244,0.767075


In [32]:
hier_summary = az.summary(fit_hier)

In [33]:
hier_summary

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
mu_alpha,9.471,2.708,4.586,14.749,0.037,0.028,5392.0,2293.0,1.0
mu_beta,-0.193,0.069,-0.327,-0.064,0.001,0.001,5853.0,2448.0,1.0
sigma_alpha,11.944,2.053,8.444,15.680,0.029,0.022,6492.0,2840.0,1.0
sigma_beta,0.294,0.055,0.208,0.404,0.001,0.001,6138.0,2860.0,1.0
sigma,0.770,0.028,0.718,0.825,0.000,0.000,5354.0,2964.0,1.0
...,...,...,...,...,...,...,...,...,...
pred[14],0.802,0.831,-0.693,2.401,0.013,0.010,3854.0,3670.0,1.0
pred[15],0.544,0.855,-1.001,2.178,0.014,0.010,3938.0,3814.0,1.0
pred[16],0.370,0.869,-1.279,1.986,0.014,0.010,3950.0,3849.0,1.0
pred[17],0.264,0.834,-1.213,1.867,0.013,0.010,3827.0,3674.0,1.0


In [34]:
values = tmp['r_hat'].values

NameError: name 'tmp' is not defined

In [None]:
ones = True
for i in values:
    if i != 1:
        print(i)
        ones = False
if ones:
    print('All r_hats are equal to 1')

In [None]:
pooled_summary = az.summary(fit_pooled)

In [None]:
pooled_summary

In [None]:
separate_summary = az.summary(fit_separate)
separate_summary

In [None]:
ones = True
for i in values:
    if i != 1:
        print(i)
        ones = False
if ones:
    print('All r_hats are equal to 1')

In [None]:
tmp = az.summary(fit_separate)
vals = tmp['r_hat'].values
ones = True
for i in values:
    if i != 1:
        print(i)
        ones = False
if ones:
    print('All r_hats are equal to 1')

In [None]:
pooled_idata = az.from_pystan(posterior=fit_pooled)
az.plot_ess(pooled_idata, var_names=['alpha', 'beta', 'sigma'], kind='local');

In [None]:
separate_idata = az.from_pystan(posterior=fit_separate)
az.plot_ess(separate_idata, var_names=['alpha', 'beta', 'sigma'], kind='local');

In [None]:
hier_idata = az.from_pystan(posterior=fit_hier)
az.plot_ess(hier_idata, var_names=['alpha', 'beta', 'sigma'], kind='local', min_ess=1000);

In [None]:
diagnostics = hier_summary[["mean", "r_hat", "ess_bulk", "ess_tail"]]
diagnostics[100:120]

In [None]:
az.plot_trace(pooled_idata, var_names=['alpha', 'beta', 'sigma']);

In [None]:
_ = az.plot_trace(hier_idata, var_names=['alpha', 'beta', 'sigma']);

In [None]:
az.plot_trace(separate_idata, var_names=['alpha', 'beta', 'sigma']);

In [36]:
# Loo-cv Elias
sep_idata = az.from_pystan(posterior=fit_separate, log_likelihood="log_lik")
loo_separate = az.loo(sep_idata)

pool_idata = az.from_pystan(posterior=fit_pooled, log_likelihood="log_lik")
loo_pooled = az.loo(pool_idata)

hier_idata = az.from_pystan(posterior=fit_hier, log_likelihood="log_lik")
loo_hier = az.loo(hier_idata)

In [37]:
loo_separate

Computed from 4000 posterior samples and 418 observations log-likelihood matrix.

         Estimate       SE
elpd_loo    28.39    33.76
p_loo       46.49        -

------

Pareto k diagnostic values:
                         Count   Pct.
(-Inf, 0.5]   (good)      410   98.1%
 (0.5, 0.7]   (ok)          7    1.7%
   (0.7, 1]   (bad)         1    0.2%
   (1, Inf)   (very bad)    0    0.0%

In [39]:
loo_pooled

Computed from 4000 posterior samples and 418 observations log-likelihood matrix.

         Estimate       SE
elpd_loo -1511.53    16.47
p_loo        3.39        -
------

Pareto k diagnostic values:
                         Count   Pct.
(-Inf, 0.5]   (good)      418  100.0%
 (0.5, 0.7]   (ok)          0    0.0%
   (0.7, 1]   (bad)         0    0.0%
   (1, Inf)   (very bad)    0    0.0%

In [40]:
loo_hier

Computed from 4000 posterior samples and 418 observations log-likelihood matrix.

         Estimate       SE
elpd_loo  -503.56    25.62
p_loo       36.77        -
------

Pareto k diagnostic values:
                         Count   Pct.
(-Inf, 0.5]   (good)      415   99.3%
 (0.5, 0.7]   (ok)          3    0.7%
   (0.7, 1]   (bad)         0    0.0%
   (1, Inf)   (very bad)    0    0.0%