In [1]:
# Import

import pandas as pd
import numpy as np
import pymc as pm
import arviz as az
import json
import re
import os
import networkx as nx

In [2]:
def load_jsonc(filepath):
    """Load JSONC file (JSON with comments)"""
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Remove single-line comments (// ...)
    content = re.sub(r'//.*?$', '', content, flags=re.MULTILINE)
    
    # Remove multi-line comments (/* ... */)
    content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
    
    # Remove trailing commas before closing brackets/braces
    content = re.sub(r',\s*([}\]])', r'\1', content)
    
    return json.loads(content)

def load_json(filepath):
    """Load regular JSON file"""
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)

def load_any(filepath):
    """Load either JSON or JSONC file based on extension"""
    if filepath.endswith('.jsonc'):
        return load_jsonc(filepath)
    else:
        return load_json(filepath)

In [3]:
# TASK 1: relocate age_weights to parameters.jsonc file
input_dir = './input_data'
stage_dirs = ['amd_sim_data', 'amd_sim_data_Early', 'amd_sim_data_Intermediate', 'amd_sim_data_Late-dry', 'amd_sim_data_Late-wet']

for stage_dir in stage_dirs:
    parameters = load_jsonc(f'{input_dir}/{stage_dir}/parameters.jsonc')
    print(f'{stage_dir:<30} parameters keys: {parameters.keys()}')

amd_sim_data                   parameters keys: dict_keys(['rr', 'f', 'i', 'age_weights', 'ages', 'p', 'r', 'pf', 'X'])
amd_sim_data_Early             parameters keys: dict_keys(['rr', 'f', 'i', 'age_weights', 'ages', 'p', 'r', 'pf', 'X'])
amd_sim_data_Intermediate      parameters keys: dict_keys(['rr', 'f', 'i', 'age_weights', 'ages', 'p', 'r', 'pf', 'X'])
amd_sim_data_Late-dry          parameters keys: dict_keys(['rr', 'f', 'i', 'age_weights', 'ages', 'p', 'r', 'pf', 'X'])
amd_sim_data_Late-wet          parameters keys: dict_keys(['rr', 'f', 'i', 'age_weights', 'ages', 'p', 'r', 'pf', 'X'])


In [4]:
# TASK 2: update Load based on region_id_graph
stage_dirs = ['amd_sim_data', 'amd_sim_data_Early', 'amd_sim_data_Intermediate', 'amd_sim_data_Late-dry', 'amd_sim_data_Late-wet']
stage = stage_dirs[2] # Intermediate

#------------------------------------------------------------------------------------------------------------#
# Read input data

input_data = pd.read_csv(f'{input_dir}/{stage}/input_data.csv')
output_template = pd.read_csv(f'{input_dir}/{stage}/output_template.csv')
parameters = load_any(f'{input_dir}/{stage}/parameters.jsonc')    # dict. {'p': {}, 'age_weights': [], 'ages: []}
hierarchy = load_any(f'{input_dir}/{stage}/hierarchy.json')       # dict of lists. {'nodes': [], 'edges': []}
nodes_to_fit = load_any(f'{input_dir}/{stage}/nodes_to_fit.json') # LIST of strings

print(f'number of rows: {len(input_data)}')
print(f'number of unique location_id: {input_data["location_id"].nunique()}')
print('--------------------------------')

#------------------------------------------------------------------------------------------------------------#
# Create region_graph
nodes = hierarchy['nodes']
name_to_id = {} # warning: this does not handle duplicate names
id_to_name = {}

region_id_graph = nx.DiGraph()

for node in nodes:
  name_to_id[node[0]] = node[1]['location_id']
  id_to_name[node[1]['location_id']] = node[0]

  # create region_id_graph
  region_id_graph.add_node(node[1]['location_id'],
                            level = node[1]['level'],
                            parent_id = node[1]['parent_id'],
                            name = node[0]
                          )

  my_id = node[1]['location_id']
  parent_id = node[1]['parent_id']
  if my_id != parent_id: # if my_id is not the root node
    region_id_graph.add_edge(parent_id, my_id)

print(f"number of nodes: {region_id_graph.number_of_nodes()}") 
print(f"number of edges: {region_id_graph.number_of_edges()}")

number of rows: 207
number of unique location_id: 18
--------------------------------
number of nodes: 233
number of edges: 232


In [5]:
print(id_to_name[422]) # IDs are not incremental

print(region_id_graph.nodes[422]['level'])
print(region_id_graph.nodes[422]['parent_id'])
print(region_id_graph.nodes[422]['name'])

United States Virgin Islands
3
104
United States Virgin Islands


In [6]:
# TASK 3: define "coords"

country_list = []
region_list = []
super_region_list = []

for node in hierarchy['nodes']:
  if node[1]['level'] == 3:
    country_list.append(node[1]['location_id'])
  elif node[1]['level'] == 2:
    region_list.append(node[1]['location_id'])
  elif node[1]['level'] == 1:
    super_region_list.append(node[1]['location_id'])
    
coords = {
    "country":      country_list,
    "region":       region_list,
    "super_region": super_region_list,
}

print(len(country_list))
print(len(region_list))
print(len(super_region_list))

204
21
7


In [7]:
# Utilizes id_to_name to print the name of the location
def describe():
        G = region_id_graph
        df = input_data
        for n in nx.dfs_postorder_nodes(G, 1):
            cnt = df['location_id'].eq(n).sum() + sum(G.nodes[c].get('cnt', 0) for c in G.successors(n))
            G.nodes[n]['cnt'] = int(cnt)
            G.nodes[n]['depth'] = nx.shortest_path_length(G, 1, n)
            
        for n in nx.dfs_preorder_nodes(G, 1):
            if G.nodes[n]['cnt'] > 0:
                print('  '*G.nodes[n]['depth'] + id_to_name[n], G.nodes[n]['cnt'])

# describe()

def keep():
    pass
    # Suggestion: filter input_data during "LOAD"

def filter_input_data_by_data_type(input_data: pd.DataFrame, data_type: str) -> pd.DataFrame:
        if not input_data.empty:
            return input_data[input_data['data_type'] == data_type]
        return input_data

# since our input_data only has 'p' data, this will return the same input_data
filter_input_data_by_data_type(input_data, 'p').head() 

Unnamed: 0,area,location_id,stage,stage_id,sex,sex_id,year_id,age_start,age_end,effective_sample_size,value,standard_error,x_sdi,x_tob,data_type,upper_ci,lower_ci,age_weights
0,Netherlands,89,Intermediate,5,Male,1,1990,55,64,1418.0,0.040903,0.00526,0.794612,0.434156,p,,,
1,Netherlands,89,Intermediate,5,Female,2,1990,55,64,1802.0,0.033851,0.00426,0.794612,0.38381,p,,,
2,Netherlands,89,Intermediate,5,Male,1,1990,65,74,1382.0,0.072359,0.006969,0.794612,0.434156,p,,,
3,Netherlands,89,Intermediate,5,Female,2,1990,65,74,1865.0,0.036997,0.004371,0.794612,0.38381,p,,,
4,Netherlands,89,Intermediate,5,Male,1,1990,75,84,796.0,0.103015,0.010774,0.794612,0.434156,p,,,


In [8]:
# process.asr() | parameters
# ------------------------------------------------------------
data_type                = 'p'
reference_area           = 'Global'
reference_sex            = 'Both'
reference_year           = 'all'
mu_age                   = None
mu_age_parent            = None
sigma_age_parent         = None
rate_type                = 'neg_binom'
lower_bound              = None
interpolation_method     = 'linear'
include_covariates       = True
zero_re                  = False
# ------------------------------------------------------------

In [9]:
# process.asr() | local variables
# ------------------------------------------------------------
ages = np.array(parameters['ages'], dtype=np.float64)
age_weights = np.array(parameters['age_weights'], dtype=np.float64)
data = filter_input_data_by_data_type(input_data, data_type)
lb_data = filter_input_data_by_data_type(input_data, lower_bound) if lower_bound else None
params_of_data_type = parameters.get(data_type, {})

# check: mu_age_parent & sigma_age_parent
#  if either mu_age_parent or sigma_age_parent is NaN, set them to None
if (isinstance(mu_age_parent, np.ndarray) and np.isnan(mu_age_parent).any()) or \
    (isinstance(sigma_age_parent, np.ndarray) and np.isnan(sigma_age_parent).any()):

    mu_age_parent = None
    sigma_age_parent = None
# ------------------------------------------------------------
print(f'ages: {ages} | type: {type(ages)} | ages.dtype: {ages.dtype}')

ages: [ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19.
 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36. 37.
 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55.
 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71. 72. 73.
 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. 91.
 92. 93. 94.] | type: <class 'numpy.ndarray'> | ages.dtype: float64


In [10]:
# process.asr() | [1] Prepare spline.spline (knots, smoothing)
# ------------------------------------------------------------
knots = np.array(params_of_data_type.get('parameter_age_mesh', np.arange(ages[0], ages[-1] + 1, 5)), dtype=np.float64)
# knots = np.ndarry: array([ 2, 30, 45, 60, 80, 94])

smooth_map = {'No Prior': np.inf, 'Slightly': 0.5, 'Moderately': 0.05, 'Very': 0.005}  # type(np.inf) == float
smoothness_param = params_of_data_type.get('smoothness')

if isinstance(smoothness_param, dict): 
    amount = smoothness_param.get('amount')

    if isinstance(amount, (int, float)): # smoothness_param is dict, and amount is int or float
        smoothing = float(amount)
    else:                                # smoothness_param is dict, and amount may be string
        smoothing = smooth_map.get(amount, 0.0)

else:                                    # smoothness_param may be string
    smoothing = smooth_map.get(smoothness_param, 0.0)
# after all, smoothing is float

# ------------------------------------------------------------
print(f'knots: {knots} | type: {type(knots)} | knots.dtype: {knots.dtype}')
print(f'smoothing: {smoothing}')

knots: [ 2. 30. 45. 60. 80. 94.] | type: <class 'numpy.ndarray'> | knots.dtype: float64
smoothing: 0.5


In [11]:
def inspect_model(model, var_name=None, show_shared_data=True):
    """
    Inspect a PyMC model. If var_name is None, print a summary,
    plus any shared_data contents. Otherwise, show details about a specific variable.
    """
    if var_name is None:
        print("📊 Model Summary:")
        print(f"  • Free RVs       : {len(model.free_RVs)} {[rv.name for rv in model.free_RVs]}")
        print(f"  • Observed RVs   : {len(model.observed_RVs)} {[rv.name for rv in model.observed_RVs]}")
        print(f"  • Deterministics : {len(model.deterministics)} {[rv.name for rv in model.deterministics]}")
        print(f"  • Potentials     : {len(model.potentials)} {[pot.name for pot in model.potentials]}")
        print(f"  • Total Named RVs: {len(model.named_vars)}")

        # --- Print shared_data contents if present ---
        if show_shared_data:
            if hasattr(model, "shared_data"):
                sd = model.shared_data
                if isinstance(sd, dict) and sd:
                    print("\n🔖 shared_data:")
                    for key, val in sd.items():
                        if isinstance(val, np.ndarray):
                            print(f"  • {key:15s}: array, shape={val.shape}, dtype={val.dtype}")
                        else:
                            print(f"  • {key:15s}: {val!r}")

    else:
        var_dict = model.named_vars
        if var_name not in var_dict:
            print(f"❌ Variable '{var_name}' not found in model.named_vars.")
            return

        var = var_dict[var_name]
        print(f"🔍 Variable: {var_name}")
        print(f"  • Type     : {type(var)}")
        print(f"  • Shape    : {getattr(var, 'shape', None)}")
        print(f"  • DType    : {getattr(var, 'dtype', None)}")
        print(f"  • Owner OP : {var.owner.op if getattr(var, 'owner', None) else 'None'}")

        if hasattr(var, 'distribution'):
            dist = var.distribution
            print(f"  • Distribution: {dist.__class__.__name__}")
            if hasattr(dist, 'dist'):
                print(f"    - PyMC Dist : {dist.dist.__class__.__name__}")
            if hasattr(dist, 'kwargs'):
                print("    - Parameters:")
                for k, v in dist.kwargs.items():
                    print(f"      {k}: {v}")

        if hasattr(var, 'eval'):
            try:
                val = var.eval()
                print(f"  • Current value (eval): {val}")
            except Exception as e:
                print(f"  • Could not evaluate variable: {e}")


In [12]:
pm_model = pm.Model()

with pm_model: 
    pm_model.shared_data = {    # NOTE: this is what used to be "vars" from class ModelVars
        "data_type": data_type,
        "ages":      ages,
        "age_weights": age_weights,
        "data":      data,
        "lb_data":   lb_data,
        "knots":     knots,
        "smoothing": smoothing,
        "interpolation_method": interpolation_method,
        "params_of_data_type": params_of_data_type,
        "reference_area_id": name_to_id[reference_area],
        "reference_sex": reference_sex,
        "reference_year": reference_year,
        "zero_re": zero_re,
        "region_id_graph": region_id_graph,
        "output_template": output_template,
    }

inspect_model(pm_model)

📊 Model Summary:
  • Free RVs       : 0 []
  • Observed RVs   : 0 []
  • Deterministics : 0 []
  • Potentials     : 0 []
  • Total Named RVs: 0

🔖 shared_data:
  • data_type      : 'p'
  • ages           : array, shape=(93,), dtype=float64
  • age_weights    : array, shape=(101,), dtype=float64
  • data           :                          area  location_id         stage  stage_id     sex   
0                 Netherlands           89  Intermediate         5    Male  \
1                 Netherlands           89  Intermediate         5  Female   
2                 Netherlands           89  Intermediate         5    Male   
3                 Netherlands           89  Intermediate         5  Female   
4                 Netherlands           89  Intermediate         5    Male   
..                        ...          ...           ...       ...     ...   
202                   Ireland           84  Intermediate         5    Both   
203                   Germany           81  Intermediate   

In [13]:
# process.asr() | (1) spline.py
import model.spline as spline
print(spline.__file__)

with pm_model:
    if mu_age is not None:
        unconstrained_mu_age = mu_age
        
    else:
        unconstrained_mu_age = spline.spline()
        
# unconstrained_mu_age is a PyTensor variable deterministic by gamma_p

/Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/spline.py


In [14]:
inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 1 ['gamma_p']
  • Observed RVs   : 0 []
  • Deterministics : 1 ['mu_age_p']
  • Potentials     : 1 ['smooth_p']
  • Total Named RVs: 3


In [15]:
inspect_model(pm_model, var_name='gamma_p', show_shared_data=False)

🔍 Variable: gamma_p
  • Type     : <class 'pytensor.tensor.var.TensorVariable'>
  • Shape    : TensorConstant{(1,) of 6}
  • DType    : float64
  • Owner OP : normal_rv{0, (0, 0), floatX, False}
  • Current value (eval): [ 14.80667945 -13.44607918   8.98352759  -8.89003884  -2.38664258
  -2.15818276]


In [16]:
inspect_model(pm_model, var_name='mu_age_p', show_shared_data=False)

🔍 Variable: mu_age_p
  • Type     : <class 'pytensor.tensor.var.TensorVariable'>
  • Shape    : TensorConstant{(1,) of 93}
  • DType    : float64
  • Owner OP : Elemwise{identity}
  • Current value (eval): [2.69438207e+06 2.59815414e+06 2.50192621e+06 2.40569828e+06
 2.30947035e+06 2.21324242e+06 2.11701449e+06 2.02078656e+06
 1.92455862e+06 1.82833069e+06 1.73210276e+06 1.63587483e+06
 1.53964690e+06 1.44341897e+06 1.34719104e+06 1.25096311e+06
 1.15473517e+06 1.05850724e+06 9.62279312e+05 8.66051381e+05
 7.69823450e+05 6.73595519e+05 5.77367587e+05 4.81139656e+05
 3.84911725e+05 2.88683794e+05 1.92455862e+05 9.62279312e+04
 1.44691164e-06 5.31379999e+02 1.06276000e+03 1.59413999e+03
 2.12551999e+03 2.65689999e+03 3.18827999e+03 3.71965998e+03
 4.25103998e+03 4.78241998e+03 5.31379998e+03 5.84517997e+03
 6.37655997e+03 6.90793997e+03 7.43931997e+03 7.97069996e+03
 7.43931998e+03 6.90793999e+03 6.37656000e+03 5.84518001e+03
 5.31380002e+03 4.78242003e+03 4.25104005e+03 3.71966006e+03
 

In [17]:
# process.asr() | (2) priors.py - level_constraints()
import model.priors as priors
print(priors.__file__)

with pm_model:
    constrained_mu_age =priors.level_constraints(unconstrained_mu_age)

/Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/priors.py


In [18]:
inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 1 ['gamma_p']
  • Observed RVs   : 0 []
  • Deterministics : 2 ['mu_age_p', 'constrained_mu_age_p']
  • Potentials     : 2 ['smooth_p', 'parent_similarity_p_level_constraints']
  • Total Named RVs: 5


In [19]:
# process.asr() | (3) priors.py - derivative_constraints()
with pm_model:    
    priors.derivative_constraints(mu_age=constrained_mu_age)

In [20]:
inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 1 ['gamma_p']
  • Observed RVs   : 0 []
  • Deterministics : 2 ['mu_age_p', 'constrained_mu_age_p']
  • Potentials     : 3 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_derivative_potential_p']
  • Total Named RVs: 6


In [21]:
# process.asr() | (4) priors.py - similar()
with pm_model:    
    if mu_age_parent is not None:
        parent_similarity_tv = priors.similar( # TODO: similar() is also used in level_constraints(). 
            mu_child=constrained_mu_age,     #      Thus, it is hard to reduce parameters.
            mu_parent=mu_age_parent,           #      Moreover, concerns on pm.Potential(parent_similarity_tv)
            sigma_parent=sigma_age_parent,     #      What happens if it is called twice with same name?
            sigma_difference=0.0,
            offset=1e-9
        )

inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 1 ['gamma_p']
  • Observed RVs   : 0 []
  • Deterministics : 2 ['mu_age_p', 'constrained_mu_age_p']
  • Potentials     : 3 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_derivative_potential_p']
  • Total Named RVs: 6


In [22]:
# process.asr() | (5) age_groups.py - age_standardize_approx()
#               | (6) covariate.py - mean_covariate_model()
import model.age_groups as age_groups
import model.covariates as covariates
print(age_groups.__file__)
print(covariates.__file__)

with pm_model:    
    if len(data) > 0:
        data = data.copy()
        # 2-1) standard_error, effective_sample_size 채우기
        se = data['standard_error'].mask(
            data['standard_error'] < 0,
            (data['upper_ci'] - data['lower_ci']) / (2 * 1.96)
        )
        ess = data['effective_sample_size'].fillna(
            data['value'] * (1 - data['value']) / se**2
        )
        data['standard_error'] = se
        data['effective_sample_size'] = ess

        mu_interval_tv = age_groups.age_standardize_approx(mu_age=constrained_mu_age)

        # 2-2) covariate & pi
        if include_covariates:
            pi_tv, X_shift_tv, beta_tv, U_tv, alpha_tv = covariates.mean_covariate_model(mu=mu_interval_tv)

        else:
            pi_tv = mu_interval_tv


inspect_model(pm_model, show_shared_data=False)

/Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/age_groups.py
/Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/covariates.py
📊 Model Summary:
  • Free RVs       : 41 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex']
  • Observed RVs   : 0 []
  • Deterministics : 10 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p

In [23]:
# process.asr() | (7) covariate.py - mean_covariate_model()

with pm_model:    
    if len(data) <= 0:
        if include_covariates:
            pi_tv, X_shift_tv, beta_tv, U_tv, alpha_tv = covariates.mean_covariate_model(mu=None)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 41 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex']
  • Observed RVs   : 0 []
  • Deterministics : 10 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p']
  • Potentials     : 8 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_derivative_potential_p', 'sigma_al

In [24]:
rate_types = ['beta_binom', 'binom', 'neg_binom', 'poisson', 'log_normal', 'normal', 'offset_log_normal']
rate_type = rate_types[2]

In [25]:
# process.asr() | (8) covariate.py - dispersion_covariate_model()
# process.asr() | (9) likelihood.py - neg_binom()
import model.likelihood as likelihood
print(likelihood.__file__)

with pm_model:
    if len(data) > 0:
        if rate_type == 'neg_binom':
            bad_ess = (data['effective_sample_size'] <= 0) | data['effective_sample_size'].isna()
            if bad_ess.any():
                data.loc[bad_ess, 'effective_sample_size'] = 0.0

            big_ess = data['effective_sample_size'] >= 1e10
            if big_ess.any():
                data.loc[big_ess, 'effective_sample_size'] = 1e10

            hetero = parameters.get('heterogeneity', None)
            lower = {'Slightly': 9.0, 'Moderately': 3.0, 'Very': 1.0}.get(hetero, 1.0)
            if data_type == 'pf':
                lower = 1e12

            delta_tv = covariates.dispersion_covariate_model(delta_lb=lower, delta_ub=lower * 9.0)

            likelihood.neg_binom(pi=pi_tv, delta=delta_tv)            


inspect_model(pm_model, show_shared_data=False)

/Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/likelihood.py
📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 

In [26]:
# process.asr() | (10) likelihood.py - log_normal()

with pm_model:
    if len(data) > 0:
        if rate_type == 'log_normal':
            missing = data['standard_error'] < 0
            if missing.any():
                data.loc[missing, 'standard_error'] = 1e6

            sigma_tv = pm.Uniform(
                name=f'sigma_{data_type}',
                lower=1e-4,
                upper=1.0,
            )

            likelihood.log_normal(pi=pi_tv, sigma=sigma_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

[DEBUG] data_type=p: p 배열에서 0 이하인 값 (총 3개):
    index=0, p[0]=[4.09026798e-02 3.38512764e-02 7.23589001e-02 3.69973190e-02
 1.03015075e-01 8.70431894e-02 2.04188482e-01 1.37423313e-01
 2.46435845e-01 2.40051348e-01 3.14121037e-01 3.22265625e-01
 2.66355140e-01 3.56666667e-01 3.88888889e-01 2.16494845e-01
 1.41388175e-02 1.23456790e-02 2.70270270e-02 2.33333333e-02
 4.30622010e-02 4.27251732e-02 1.04395604e-01 1.13065327e-01
 1.68918919e-02 1.83486239e-02 3.16455696e-02 2.49307479e-02
 6.45161290e-02 4.60122699e-02 1.28834356e-01 1.36094675e-01
 5.05050505e-03 3.96825397e-03 1.89573460e-02 2.76679842e-02
 1.30434783e-02 2.77777778e-02 2.38095238e-02 8.73786408e-02
 2.70270270e-02 1.42857143e-02 2.61780105e-02 2.18579235e-02
 3.63636364e-02 5.34759358e-02 1.14285714e-01 1.20481928e-01
 0.00000000e+00 9.80392157e-03 3.12500000e-02 9.70873786e-03
 5.00000000e-02 4.95049505e-02 1.48936170e-01 6.97674419e-02
 1.67803547e-01 1.61971831e-01 1.36134454e-01 1.31944444e-01
 1.66666667e-01 1.26353791e-01 6.95364238e-02 3.88888889e-02
 5.69105691e-02 3.79746835e-02 7.69230769e-02 7.81250000e-02
 1.18811881e-01 1.83908046e-01 2.33333333e-01 2.85714286e-01
 2.85714286e-01 0.00000000e+00 3.42465753e-04 7.11297071e-02
 3.11973019e-02 2.87816490e-02 1.56402737e-01 1.60818713e-01
 1.69481982e-01 1.28455285e-01 6.93641618e-02 8.09716599e-02
 3.79746835e-02 3.30578512e-02 6.97674419e-02 1.34831461e-01
 1.55555556e-01 7.90697674e-02 1.52380952e-01 4.54545455e-02
 5.69105691e-02 1.37254902e-01 1.47540984e-01 2.19178082e-01
...
 2.29729730e-01 6.79839577e-03 2.98547390e-02 7.06447188e-02
 1.10350982e-01 2.30137091e-02 2.93895112e-02 4.69371519e-02
 8.76531574e-02 1.32307692e-01 1.70212766e-01 8.61553785e-02
 1.98497854e-01 1.60000699e-02 1.60001603e-02]
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[47], line 16
      8                 data.loc[missing, 'standard_error'] = 1e6
     10             sigma_tv = pm.Uniform(
     11                 name=f'sigma_{data_type}',
     12                 lower=1e-4,
     13                 upper=1.0,
     14             )
---> 16             likelihood.log_normal(pi=pi_tv, sigma=sigma_tv)
     19 inspect_model(pm_model, show_shared_data=False)

File /Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/likelihood.py:378, in log_normal(pi, sigma)
    375         print(f"    index={i}, s[{i}]={s[i]}")
    377 # 2) 관측값 유효성 검사
--> 378 assert np.all(p > 0), 'observed values must be positive'
    379 assert np.all(s >= 0), 'standard error must be non-negative'
    381 # 3) 관측 로그값은 NumPy로 미리 계산

AssertionError: observed values must be positive

In [27]:
# process.asr() | (11) likelihood.py - log_normal()

with pm_model:
    if len(data) > 0:
        if rate_type == 'normal':
            missing = data['standard_error'] < 0
            if missing.any():
                data.loc[missing, 'standard_error'] = 1e6

            sigma_tv = pm.Uniform(
                name=f'sigma_{data_type}',
                lower=1e-4,
                upper=1e-1,
                initval=1e-2
            )

            likelihood.normal(pi=pi_tv, sigma=sigma_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

In [28]:
# process.asr() | (12) likelihood.py - binom()

with pm_model:
    if len(data) > 0:
        if rate_type == 'binom':
            bad_ess = data['effective_sample_size'] < 0
            if bad_ess.any():
                data.loc[bad_ess, 'effective_sample_size'] = 0.0

            likelihood.binom(pi=pi_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

In [29]:
# process.asr() | (13) likelihood.py - beta_binom()

with pm_model:
    if len(data) > 0:
        if rate_type == 'beta_binom':

            ### NEWLY ADDED: Origianl code doesn't have delta_tv calculation
            bad_ess = (data['effective_sample_size'] <= 0) | data['effective_sample_size'].isna()
            if bad_ess.any():
                data.loc[bad_ess, 'effective_sample_size'] = 0.0

            big_ess = data['effective_sample_size'] >= 1e10
            if big_ess.any():
                data.loc[big_ess, 'effective_sample_size'] = 1e10

            hetero = parameters.get('heterogeneity', None)
            lower = {'Slightly': 9.0, 'Moderately': 3.0, 'Very': 1.0}.get(hetero, 1.0)
            if data_type == 'pf':
                lower = 1e12

            delta_tv = covariates.dispersion_covariate_model(delta_lb=lower, delta_ub=lower * 9.0)

            likelihood.beta_binom(pi=pi_tv, delta=delta_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[25], line 23
     19                 lower = 1e12
     21             delta_tv = covariates.dispersion_covariate_model(delta_lb=lower, delta_ub=lower * 9.0)
---> 23             likelihood.beta_binom(pi=pi_tv, delta=delta_tv)
     26 inspect_model(pm_model, show_shared_data=False)

File /Users/Dev/AMD/dismod_mr_migrated/reforged_mr/model/likelihood.py:275, in beta_binom(pi, delta)
    267 alpha_param = pi * delta * 50
    268 beta_param = (1 - pi) * delta * 50
    270 p_obs = pm.BetaBinomial(
    271     name=f'p_obs_{data_type}',
    272     n=n_int[mask],
    273     alpha=alpha_param[mask] if hasattr(alpha_param, 'shape') else alpha_param,
    274     beta=beta_param[mask] if hasattr(beta_param, 'shape') else beta_param,
--> 275     observed=obs_counts[mask]
    276 )
    278 # Posterior predictive counts: replace zero-sample cases
    279 n_pred = n_int.copy()

IndexError: boolean index did not match indexed array along dimension 0; dimension is 1 but corresponding boolean dimension is 207

In [30]:
# process.asr() | (14) likelihood.py - poisson()

with pm_model:
    if len(data) > 0:
        if rate_type == 'poisson':
            bad_ess = data['effective_sample_size'] < 0
            if bad_ess.any():
                data.loc[bad_ess, 'effective_sample_size'] = 0.0

            likelihood.poisson(pi=pi_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

In [31]:
# process.asr() | (15) likelihood.py - offset_log_normal()
#               | (16) else

with pm_model:
    if len(data) > 0:
        if rate_type == 'offset_log_normal':
            
            sigma_tv = pm.Uniform(
                name=f'sigma_{data_type}',
                lower=1e-4,
                upper=10.0,
                initval=1e-2
            )

            likelihood.offset_log_normal(pi=pi_tv, sigma=sigma_tv)

        # else:
        #     raise ValueError(f'Unsupported rate_type "{rate_type}"')


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 9 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_de

In [32]:
# process.asr() | (17) priors.py - covariate_level_constraints()

with pm_model:
    if include_covariates:
        covariate_constraint_tv = priors.covariate_level_constraints(X_shift_tv, beta_tv, U_tv, alpha_tv, constrained_mu_age)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 10 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_d

In [33]:
# process.asr() | (18) lower bound handling

with pm_model:
    if lb_data is not None and len(lb_data) > 0:
        lb = {}
        mu_interval_lb_tv = age_groups.age_standardize_approx(mu_age=constrained_mu_age, use_lb_data=True)

        if include_covariates:
            pi_lb_tv, X_shift_lb_tv, beta_lb_tv, U_lb_tv, alpha_lb_tv = covariates.mean_covariate_model(mu=mu_interval_lb_tv, use_lb_data=True)
        else:
            pi_lb_tv = mu_interval_lb_tv

        delta_lb_tv = covariates.dispersion_covariate_model(lower=1e12, upper=1e13, use_lb_data=True)

        se_lb = lb_data['standard_error'].mask(
            lb_data['standard_error'].le(0) | lb_data['standard_error'].isna(),
            (lb_data['upper_ci'] - lb_data['lower_ci']) / (2 * 1.96)
        )
        ess_lb = lb_data['effective_sample_size'].fillna(
            lb_data['value'] * (1 - lb_data['value']) / se_lb**2
        )
        lb_data['standard_error'] = se_lb
        lb_data['effective_sample_size'] = ess_lb

        lb_like = likelihood.neg_binom_lower_bound(pi=pi_lb_tv, delta=delta_lb_tv)


inspect_model(pm_model, show_shared_data=False)

📊 Model Summary:
  • Free RVs       : 42 ['gamma_p', 'sigma_alpha_p_0_z', 'sigma_alpha_p_1_z', 'sigma_alpha_p_2_z', 'sigma_alpha_p_3_z', 'sigma_alpha_p_4_z', 'alpha_p_31', 'alpha_p_56', 'alpha_p_62', 'alpha_p_64', 'alpha_p_70', 'alpha_p_71', 'alpha_p_65', 'alpha_p_67', 'alpha_p_68', 'alpha_p_69', 'alpha_p_100', 'alpha_p_102', 'alpha_p_73', 'alpha_p_81', 'alpha_p_83', 'alpha_p_84', 'alpha_p_86', 'alpha_p_89', 'alpha_p_92', 'alpha_p_137', 'alpha_p_138', 'alpha_p_142', 'alpha_p_158', 'alpha_p_159', 'alpha_p_163', 'alpha_p_164', 'alpha_p_4', 'alpha_p_5', 'alpha_p_6', 'alpha_p_8', 'alpha_p_9', 'alpha_p_18', 'beta_p_x_sdi', 'beta_p_x_tob', 'beta_p_x_sex', 'eta_p']
  • Observed RVs   : 0 []
  • Deterministics : 12 ['mu_age_p', 'constrained_mu_age_p', 'cum_sum_mu_p', 'mu_interval_p', 'sigma_alpha_p_0', 'sigma_alpha_p_1', 'sigma_alpha_p_2', 'sigma_alpha_p_3', 'sigma_alpha_p_4', 'pi_p', 'delta_p', 'p_pred_p']
  • Potentials     : 10 ['smooth_p', 'parent_similarity_p_level_constraints', 'mu_age_d

In [34]:
# fit.asr() | (1) 
import pymc as pm
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
verbose = True

with pm_model:
    if verbose:
        logger.info("  ▶ pm.find_MAP() 수행 중...")
    map_estimate = pm.find_MAP()

INFO:__main__:  ▶ pm.find_MAP() 수행 중...





In [35]:
# 0 divergences with these settings
draws  = 1000
tune   = 500
chains = 2 
cores  = 4
target_accept = 0.95
max_treedepth = 10
use_advi = False
use_metropolis = False
vi_iters = 20000
vi_lr = 1e-3
verbose = True

In [36]:
# fit.asr() | (2) 
import time

draws  = 2000
tune   = 1000
chains = 4
cores  = 4
target_accept = 0.95
max_treedepth = 10
use_advi = False
use_metropolis = False
vi_iters = 20000
vi_lr = 1e-3
verbose = True

t_start = time.time()
with pm_model:
    if use_advi:
        if verbose:
            logger.info("  ▶ ADVI 수행 중...")
        approx = pm.fit(
            n=vi_iters,
            method="advi",
            obj_optimizer=pm.adam(learning_rate=vi_lr),
            callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)],
        )
        idata = approx.sample(draws=draws)

    elif use_metropolis:
        if verbose:
            logger.info("  ▶ Metropolis 샘플링 수행 중...")
        step = pm.Metropolis()
        idata = pm.sample(
            draws=draws,
            tune=tune,
            chains=chains,
            cores=cores,
            step=step,
            start=map_estimate,
            return_inferencedata=True,
            progressbar=verbose,
        )

    else:
        if verbose:
            logger.info("  ▶ NUTS 샘플링 수행 중...")
        idata = pm.sample(
            draws=draws,
            tune=tune,
            chains=chains,
            cores=cores,
            start=map_estimate,
            target_accept=target_accept,
            nuts={"max_treedepth": max_treedepth},
            return_inferencedata=True,
            progressbar=verbose,
        )
    
t_end = time.time()
wall_time = t_end - t_start
if verbose:
    logger.info(f"[asr] 전체 소요 시간: {wall_time:.1f}초")

INFO:__main__:  ▶ NUTS 샘플링 수행 중...
  idata = pm.sample(
Auto-assigning NUTS sampler...
INFO:pymc:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [gamma_p, sigma_alpha_p_0_z, sigma_alpha_p_1_z, sigma_alpha_p_2_z, sigma_alpha_p_3_z, sigma_alpha_p_4_z, alpha_p_31, alpha_p_56, alpha_p_62, alpha_p_64, alpha_p_70, alpha_p_71, alpha_p_65, alpha_p_67, alpha_p_68, alpha_p_69, alpha_p_100, alpha_p_102, alpha_p_73, alpha_p_81, alpha_p_83, alpha_p_84, alpha_p_86, alpha_p_89, alpha_p_92, alpha_p_137, alpha_p_138, alpha_p_142, alpha_p_158, alpha_p_159, alpha_p_163, alpha_p_164, alpha_p_4, alpha_p_5, alpha_p_6, alpha_p_8, alpha_p_9, alpha_p_18, beta_p_x_sdi, beta_p_x_tob, beta_p_x_sex, eta_p]
INFO:pymc:NUTS: [gamma_p, sigma_alpha_p_0_z, sigma_alpha_p_1_z, sigma_alpha_p_2_z, sigma_alpha_p_3_z, sigma_alpha_p_4_z, alpha_p

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 333 seconds.
INFO:pymc:Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 333 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
INFO:pymc:The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
ERROR:pymc:The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
INFO:__main__:[asr] 전체 소요 시간: 356.9초
