In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import stan # python3 -m pip install pystan
import nest_asyncio
nest_asyncio.apply()
import warnings

In [2]:
# Define file paths
numbers_path = "dataframes/numbers.csv"
percentages_path = "dataframes/precentages.csv"
death_rates_path = "dataframes/death_rates.csv"

In [3]:
# Read the percentages CSV file
df = pd.read_csv(percentages_path, delimiter=';', header=0)
df = df.head(22)
df = df.drop(columns="[All]")
df.dtypes

Year       object
[0]        object
[1-4]      object
[5-9]      object
[10-14]    object
[15-19]    object
[20-24]    object
[25-29]    object
[30-34]    object
[35-39]    object
[40-44]    object
[45-49]    object
[50-54]    object
[55-59]    object
[60-64]    object
[65-69]    object
[70-74]    object
[75-79]    object
[80-84]    object
[85+]      object
dtype: object

In [4]:
df = df.map(lambda x: float(str(x).replace(',', '.')) if isinstance(x, str) else x)
df = df.map(lambda x: pd.to_numeric(str(x).replace(',', '.'), errors='coerce'))
df["Year"] = df["Year"].astype(int)
df = df.set_index("Year")

In [5]:
df = df.T

In [6]:
data = dict(
    N = 19,
    Y = 22,
    accidentData = df.values,
    xpred = 2022)

In [7]:
file_path = "stan/separate.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

posterior1 = stan.build(stan_code, data=data)

Building...



Building: found in cache, done.

In [8]:
fit_separate = posterior1.sample(num_chains=4, num_samples=1000)
separate_df = fit_separate.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   1% (103/8000)
Sampling:   3% (202/8000)
Sampling:   4% (301/8000)
Sampling:   5% (400/8000)
Sampling:   6% (500/8000)
Sampling:   8% (600/8000)
Sampling:   9% (700/8000)
Sampling:  10% (800/8000)
Sampling:  11% (900/8000)
Sampling:  12% (1000/8000)
Sampling:  14% (1100/8000)
Sampling:  15% (1200/8000)
Sampling:  16% (1300/8000)
Sampling:  18% (1400/8000)
Sampling:  19% (1500/8000)
Sampling:  20% (1600/8000)
Sampling:  21% (1700/8000)
Sampling:  22% (1800/8000)
Sampling:  24% (1900/8000)
Sampling:  25% (2000/8000)
Sampling:  26% (2100/8000)
Sampling:  28% (2200/8000)
Sampling:  29% (2300/8000)
Sampling:  30% (2400/8000)
Sampling:  31% (2500/8000)
Sampling:  32% (2600/8000)
Sampling:  34% (2700/8000)
Sampling:  35% (2800/8000)
Sampling:  36% (2900/8000)
Sampling:  38% (3000/8000)
Sampling:  39% (3100/8000)
Sampling:  40% (3200/8000)
Sampling:  41% (3301/8000)
Samplin

In [9]:
file_path = "stan/pooled.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

warnings.filterwarnings("ignore")

posterior2 = stan.build(stan_code, data=data)

Building...



Building: found in cache, done.

In [10]:
fit_pooled = posterior2.sample(num_chains=4, num_samples=1000)
pooled_df = fit_pooled.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   2% (200/8000)
Sampling:   5% (400/8000)
Sampling:   8% (600/8000)
Sampling:  10% (800/8000)
Sampling:  32% (2600/8000)
Sampling:  55% (4400/8000)
Sampling:  78% (6200/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000163 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.63 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000208 seconds
  1000 transitions using 10 leapfrog steps per transition would take 2.08 seconds.
  Adjust your expectations accordingly!
  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan_99avklve/model_3yhidhyk.stan', line 30, column 6 to column 51)
  Gradient evaluation took 0.000204 seconds
  1000 transitions using 10 leapfrog steps per transition wou

In [11]:
file_path = "stan/hierarchical.stan"

# Read the Stan code from the file
with open(file_path, "r") as file:
    stan_code = file.read()

warnings.filterwarnings("ignore")

posterior3 = stan.build(stan_code, data=data)

Building...



Building: found in cache, done.

In [12]:
fit_hier = posterior3.sample(num_chains=4, num_samples=1000)
hier_df = fit_hier.to_frame()  # pandas `DataFrame, requires pandas

Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   1% (103/8000)
Sampling:   3% (202/8000)
Sampling:   5% (401/8000)
Sampling:   9% (701/8000)
Sampling:  11% (901/8000)
Sampling:  14% (1100/8000)
Sampling:  18% (1400/8000)
Sampling:  21% (1700/8000)
Sampling:  24% (1900/8000)
Sampling:  28% (2200/8000)
Sampling:  30% (2400/8000)
Sampling:  32% (2600/8000)
Sampling:  51% (4100/8000)
Sampling:  70% (5600/8000)
Sampling:  86% (6900/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000604 seconds
  1000 transitions using 10 leapfrog steps per transition would take 6.04 seconds.
  Adjust your expectations accordingly!
  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan_q1kx9v7a/model_v5p5ozjz.

In [13]:
separate_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,alpha.1,alpha.2,alpha.3,...,pred.10,pred.11,pred.12,pred.13,pred.14,pred.15,pred.16,pred.17,pred.18,pred.19
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,385.33334,0.912354,0.062153,6.00025,63.952,0.0,-356.884218,0.483853,10.723563,20.932875,...,5.108773,3.661486,2.662479,1.841951,1.218559,0.831691,0.558906,0.408785,0.288106,0.127832
std,5.633471,0.116892,0.00344,0.027388,7.797749,0.0,7.80545,0.035496,0.376868,0.663497,...,0.347575,0.29986,0.187613,0.135742,0.07959,0.063849,0.044733,0.038769,0.031179,0.012244
min,356.364939,0.036795,0.056568,5.0,31.0,0.0,-383.232742,0.303958,9.324209,18.249208,...,3.778624,2.476089,1.960831,1.272662,0.860765,0.547966,0.385758,0.254902,0.185245,0.078711
25%,381.701869,0.888484,0.061247,6.0,63.0,0.0,-362.154001,0.461113,10.481318,20.508106,...,4.877598,3.466498,2.538216,1.754582,1.165926,0.789178,0.52919,0.383513,0.267325,0.119701
50%,385.701181,0.953734,0.063047,6.0,63.0,0.0,-357.115451,0.483774,10.729355,20.944864,...,5.101152,3.66445,2.664592,1.841136,1.218138,0.832241,0.558707,0.409051,0.287908,0.127948
75%,389.262137,0.985629,0.063953,6.0,63.0,0.0,-351.882856,0.506914,10.973067,21.350768,...,5.341003,3.857441,2.785617,1.930562,1.269295,0.873672,0.587886,0.433162,0.308373,0.13583
max,401.339038,1.0,0.065949,7.0,127.0,0.0,-323.788285,0.625856,12.587416,23.710745,...,7.298508,4.920538,3.407589,2.375189,1.551144,1.058269,0.734052,0.554286,0.414033,0.189397


In [14]:
pooled_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,alpha,beta,sigma,...,pred.10,pred.11,pred.12,pred.13,pred.14,pred.15,pred.16,pred.17,pred.18,pred.19
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,-1127.082457,0.922527,0.402446,2.7505,8.6085,0.0,1128.599742,9.982029,-0.187887,8.963332,...,5.377049,5.552824,5.837464,5.66391,5.702964,5.668083,5.702101,5.677942,5.507806,5.763983
std,1.281093,0.108664,0.053751,0.763476,4.506307,0.0,1.770914,0.870634,0.070377,0.319478,...,8.972622,9.22552,8.964224,8.832361,8.988388,9.173054,8.9427,9.196122,8.965583,8.904583
min,-1136.104203,0.241806,0.325868,1.0,1.0,0.0,1125.663391,6.91685,-0.472788,7.948475,...,-27.922483,-26.411598,-26.428749,-25.438749,-29.705978,-33.536955,-26.14563,-23.810846,-25.680557,-34.43381
25%,-1127.630727,0.89743,0.379848,2.0,7.0,0.0,1127.32064,9.403682,-0.23399,8.742105,...,-0.578615,-0.589063,-0.250383,-0.37808,-0.224367,-0.538708,-0.368368,-0.618891,-0.515252,-0.36552
50%,-1126.752091,0.967481,0.403217,3.0,7.0,0.0,1128.258023,9.944442,-0.186674,8.957312,...,5.444396,5.461926,5.776256,5.503665,5.88144,5.549993,5.576799,5.488046,5.425135,5.693679
75%,-1126.162628,0.993728,0.425815,3.0,15.0,0.0,1129.490786,10.549632,-0.14102,9.167469,...,11.505515,11.67848,11.902429,11.776692,11.826918,12.267844,11.842522,12.045364,11.528132,11.524305
max,-1125.524339,1.0,0.477484,4.0,31.0,0.0,1139.401332,13.841967,0.089585,10.160981,...,38.64447,45.610834,36.098054,41.330303,36.302124,40.62965,36.986568,41.399003,36.104478,40.264698


In [15]:
hier_df.describe()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,mu_alpha,mu_beta,sigma_alpha,...,pred.10,pred.11,pred.12,pred.13,pred.14,pred.15,pred.16,pred.17,pred.18,pred.19
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,-144.677503,0.872358,0.237527,4.44275,23.33,0.0,166.114491,9.412454,-0.191781,11.972743,...,5.057942,3.641227,2.666777,1.818519,1.187416,0.773703,0.523464,0.39966,0.276659,0.097033
std,4.937648,0.120146,0.01936,0.499785,8.007192,0.0,6.774278,2.670181,0.067648,2.144911,...,0.83548,0.851477,0.834336,0.83756,0.850133,0.845969,0.855848,0.854819,0.877676,0.863671
min,-164.697857,0.332572,0.218042,3.0,7.0,0.0,146.460649,-1.671831,-0.492077,6.397679,...,2.150528,0.527556,-0.790937,-0.895081,-1.792867,-2.263516,-3.166858,-2.319647,-3.037495,-3.495633
25%,-147.746217,0.800491,0.218335,4.0,15.0,0.0,161.283623,7.696439,-0.234716,10.475249,...,4.494833,3.076652,2.099129,1.255539,0.614758,0.212348,-0.038411,-0.181163,-0.320482,-0.490763
50%,-144.29014,0.905752,0.236484,4.0,31.0,0.0,165.916117,9.396807,-0.191843,11.664811,...,5.053409,3.642403,2.693854,1.810853,1.178906,0.780514,0.524144,0.397572,0.286148,0.09596
75%,-141.253365,0.972958,0.255676,5.0,31.0,0.0,170.580353,11.139008,-0.149004,13.109059,...,5.640963,4.207392,3.219917,2.383091,1.764253,1.353927,1.104347,0.9816,0.870206,0.664424
max,-130.985307,1.0,0.2591,5.0,47.0,0.0,195.288787,18.666658,0.051828,27.037826,...,8.420636,6.949634,6.071378,5.072319,4.003874,3.97349,3.253731,3.694729,3.446323,3.347694


In [51]:
alpha_columns = [col for col in separate_df.columns if col.startswith('alpha')]
beta_columns = [col for col in separate_df.columns if col.startswith('beta')]

# Extract alpha and beta as DataFrames
alpha_samples = separate_df[alpha_columns]
beta_samples = separate_df[beta_columns]

In [52]:
xpred = 2022
base_year = 2000
x_offset = xpred - base_year

# Calculate predictions
beta_samples = beta_samples.multiply(x_offset)



In [55]:
predictions = pd.DataFrame()
for i in range(1, 20):
    predictions[f'pred{i}'] = alpha_samples[f'alpha.{i}'] + beta_samples[f'beta.{i}']

# Summarize predictions
pred_mean = predictions.mean()  # Mean predictions
pred_cred = predictions.quantile([0.025, 0.975])  # 95% credible interval

# Print results
"""for i, (mean, ci) in enumerate(zip(pred_mean, pred_cred.T.iterrows()), start=1):
    print(f"Age group {i}: Mean = {mean:.2f}, 95% CI = ({ci[1][0]:.2f}, {ci[1][1]:.2f})")"""


predictions

Unnamed: 0_level_0,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred10,pred11,pred12,pred13,pred14,pred15,pred16,pred17,pred18,pred19
draws,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,0.231546,7.296286,13.087388,11.627231,18.622458,16.528224,12.099427,8.082912,6.870210,5.258144,3.666718,2.773005,1.778794,1.238076,0.816415,0.537621,0.379469,0.308633,0.137208
1,0.325417,7.420590,12.272147,10.926373,18.315980,17.579872,11.836845,8.855062,6.601441,5.244454,3.733797,2.630393,1.880689,1.184411,0.819624,0.568557,0.429842,0.283974,0.127217
2,0.283571,7.107119,12.647219,13.104084,18.856647,16.626609,12.139733,7.764389,6.731030,5.170951,3.562047,2.683745,1.924098,1.180956,0.821435,0.543731,0.394681,0.280043,0.134967
3,0.252944,6.796580,12.919900,11.612730,18.671641,17.011426,11.482335,8.553752,6.620465,5.384683,3.670355,2.594233,1.826853,1.197248,0.843372,0.566603,0.404038,0.286626,0.127908
4,0.277953,7.556542,13.217664,10.944023,17.261622,16.057008,11.796594,8.686471,6.973683,5.183354,3.839888,2.776789,1.837635,1.229160,0.783175,0.587244,0.402572,0.296841,0.140350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,0.328814,7.274197,13.395506,11.382047,19.676630,16.868742,12.085492,8.677274,6.854269,5.208016,3.960757,2.693546,1.838216,1.184569,0.844507,0.584857,0.425089,0.308798,0.124974
3996,0.311659,7.091041,12.709196,11.675497,19.110162,16.813667,12.358486,8.618941,6.506065,4.973850,3.603177,2.783450,1.905717,1.254203,0.825458,0.539569,0.406929,0.281066,0.132188
3997,0.308141,5.787338,12.768076,9.300928,19.586806,16.697024,11.914800,8.789192,6.677271,5.153023,3.819597,2.661864,1.861583,1.208102,0.860916,0.557050,0.416383,0.296384,0.133195
3998,0.228477,7.097107,14.256861,11.858408,18.823675,16.641201,11.669493,8.565288,6.840585,5.318525,3.525593,2.722405,1.909233,1.187948,0.822826,0.574892,0.398184,0.315392,0.121225


In [56]:
pred_mean

pred1      0.314263
pred2      7.198708
pred3     12.727170
pred4     11.106540
pred5     18.476875
pred6     16.792393
pred7     11.978941
pred8      8.665486
pred9      6.707732
pred10     5.175211
pred11     3.692659
pred12     2.667878
pred13     1.840599
pred14     1.217870
pred15     0.831507
pred16     0.561978
pred17     0.414588
pred18     0.293046
pred19     0.130706
dtype: float64

In [57]:
pred_cred

Unnamed: 0,pred1,pred2,pred3,pred4,pred5,pred6,pred7,pred8,pred9,pred10,pred11,pred12,pred13,pred14,pred15,pred16,pred17,pred18,pred19
0.025,0.240102,6.376684,11.324851,9.709908,16.881524,15.827214,11.340258,8.022873,6.289776,4.901234,3.462069,2.525472,1.730243,1.156357,0.783057,0.527172,0.385266,0.268404,0.120929
0.975,0.391114,8.019328,14.150922,12.586968,20.169814,17.768211,12.620606,9.309738,7.128139,5.462633,3.929346,2.814856,1.945939,1.28059,0.882781,0.597071,0.445337,0.317667,0.140836
