In [113]:
import pandas as pd
import numpy as np
import itertools

In [200]:
df = pd.DataFrame(
    columns=["sick_and_old", "healthy_and_old", "sick_and_young", "healthy_and_young"],
    index=["sick_and_old", "healthy_and_old", "sick_and_young", "healthy_and_young"],
    data=[[0.8, 0.2, 0, 0],
         [0.6, 0.4, 0, 0],
         [0, 0, 0.3, 0.7],
         [0, 0, 0.1, 0.9]],
)

In [201]:
df

Unnamed: 0,sick_and_old,healthy_and_old,sick_and_young,healthy_and_young
sick_and_old,0.8,0.2,0.0,0.0
healthy_and_old,0.6,0.4,0.0,0.0
sick_and_young,0.0,0.0,0.3,0.7
healthy_and_young,0.0,0.0,0.1,0.9


In [204]:
def parse_exog_process_matrix(matrix, process_name):
    """
    matrix : pd.DataFrame
    
    """
    states = check_states_size(matrix)
    covariates = create_covariates(states, process_name)
    params = create_params(matrix, states, process_name)
    return covariates, params
    
def create_params(matrix, states, process_name):
    transformed_matrix = matrix.copy()
    transformed_matrix[transformed_matrix == 0] = -1e300
    transformed_matrix[transformed_matrix > 0] = np.log(transformed_matrix[transformed_matrix > 0])
    transformed_matrix.index = transformed_matrix.index.map(str)
    categories = [f"exogenous_process_{process_name}_{state}" for state in states]
    index = pd.MultiIndex.from_tuples(itertools.product(categories, states), names=["category", "name"])
    params = pd.DataFrame(index=index, columns=["value"])
    for category, origin_state in index:
        destination_state = category.split(f"exogenous_process_{process_name}_")[1]
        params.loc[(category, origin_state), "value"] = transformed_matrix.loc[destination_state, origin_state]
    return params
    
    
def create_covariates(states, process_name):
    covariates = {}
    for state in states:
        covariates[str(state)] = f"{process_name} == {state}"
    return covariates
    
def check_states_size(matrix):
    assert matrix.shape[0] == matrix.shape[1]
    n_states = matrix.shape[1]
    assert (matrix.columns == matrix.index).all()
    states = matrix.index
    check_numerics(matrix, n_states)
    return states
    
def check_numerics(matrix_values, n_states):
    assert (matrix_values.sum(axis=1) == np.full(n_states, 1)).all()
    assert( ((matrix_values >= 0) & (matrix_values <= 1)).all()).all()

In [205]:
covariates, params = parse_exog_process_matrix(df, "health_shock")

In [208]:
covariates

{'sick_and_old': 'health_shock == sick_and_old',
 'healthy_and_old': 'health_shock == healthy_and_old',
 'sick_and_young': 'health_shock == sick_and_young',
 'healthy_and_young': 'health_shock == healthy_and_young'}

In [206]:
covariates["sick_and_old"] = "health_shock == sick_and_old & age > 50"

{'sick_and_old': 'health_shock == sick_and_old',
 'healthy_and_old': 'health_shock == healthy_and_old',
 'sick_and_young': 'health_shock == sick_and_young',
 'healthy_and_young': 'health_shock == healthy_and_young'}

In [207]:
params

Unnamed: 0_level_0,Unnamed: 1_level_0,value
category,name,Unnamed: 2_level_1
exogenous_process_health_shock_sick_and_old,sick_and_old,-0.223144
exogenous_process_health_shock_sick_and_old,healthy_and_old,-1.60944
exogenous_process_health_shock_sick_and_old,sick_and_young,-1e+300
exogenous_process_health_shock_sick_and_old,healthy_and_young,-1e+300
exogenous_process_health_shock_healthy_and_old,sick_and_old,-0.510826
exogenous_process_health_shock_healthy_and_old,healthy_and_old,-0.916291
exogenous_process_health_shock_healthy_and_old,sick_and_young,-1e+300
exogenous_process_health_shock_healthy_and_old,healthy_and_young,-1e+300
exogenous_process_health_shock_sick_and_young,sick_and_old,-1e+300
exogenous_process_health_shock_sick_and_young,healthy_and_old,-1e+300
