# Workflow to calculate Measles transition probabilities

In [1]:
import pandas as pd
import json

Data from literature

In [2]:
index = ['measles','GP','hosp','death']

# table data from literature
data = {'0-4': [28730,11883,7470,97],
        '5-9': [6492, 1173, 612, 9],
        '10-19': [18580, 2369, 1612, 18],
        '20-29': [9161, 2656, 2075, 26],
        '30-': [4069, 1399, 1107, 27]}

columns = list(data.keys())

df = pd.DataFrame(data, index=index)

row_length = len(index) + 1

df

Unnamed: 0,0-4,5-9,10-19,20-29,30-
measles,28730,6492,18580,9161,4069
GP,11883,1173,2369,2656,1399
hosp,7470,612,1612,2075,1107
death,97,9,18,26,27


Checks to see if proportion table values are correct

In [3]:
print((28730 - 11883)/28730)
print((2369 - 1612) / 18580)
print(26 / 9161)

0.5863905325443787
0.04074273412271259
0.0028381181093767058


In [4]:
def process_rows(row1, row2, measles, final_row=False):
    """Function to calculate proportions table from literature data

    Args:
        row1 (float): value at row i
        row2 (float): value at row i+1
        measles (float): value at row 0
        final_row (bool, optional): True if evaluating the final row in table. Defaults to False.

    Returns:
        float: return proportion
    """
    # if final row, return d/m
    if final_row:

        return row1 / measles

    # else, return (m - gp) / m etc. 
    else:

        return (row1 - row2) / measles

Calculation proportions

In [5]:
prob_data = []
prob_index = ['prop_final_mild', 'prop_final_gp','prop_final_hosp','prop_final_death']

# calculate proportions for df
for i in range(len(df)):
    
    if i == len(df) - 1:

        new_row = process_rows(df.iloc[i], None, df.iloc[0], final_row=True)

    else:

        new_row = process_rows(df.iloc[i], df.iloc[i+1], df.iloc[0], final_row=False)

    prob_data.append(new_row)

# get new proportions table
prob_df = pd.DataFrame(prob_data, index=prob_index, columns=df.columns)
prob_df

Unnamed: 0,0-4,5-9,10-19,20-29,30-
prop_final_mild,0.586391,0.819316,0.872497,0.710075,0.656181
prop_final_gp,0.153603,0.086414,0.040743,0.063421,0.071762
prop_final_hosp,0.256631,0.092884,0.085791,0.223666,0.265421
prop_final_death,0.003376,0.001386,0.000969,0.002838,0.006636


Calculate probabilities from proportions data

In [6]:
prob_hosp_given_measles = [df.loc['hosp', columns[i]] / df.loc['measles', columns[i]] for i in range(row_length)]
prob_gp_given_measles = [df.loc['GP', columns[i]] / df.loc['measles', columns[i]] for i in range(row_length)]
prob_death_given_measles = [df.loc['death', columns[i]] / df.loc['measles', columns[i]] for i in range(row_length)]

prob_gp_to_hosp = [prob_hosp_given_measles[i] / prob_gp_given_measles[i] for i in range(row_length)]
prob_gp_to_recov = [1 - prob_gp_to_hosp[i] for i in range(row_length)]

prob_hosp_to_death = [prob_death_given_measles[i] / prob_hosp_given_measles[i] for i in range(row_length)]

prob_hosp_to_icu = [0.1, 0.1, 0.1, 0.1, 0.1] # to be changed

prob_exposed_to_asympt = [0.1, 0.1, 0.1, 0.9, 0.9] # to be looked at further


icu_death_mean = (4/15 + 18/58 + 2/14 + 1/11) / 4

print("mean of icu_to_death: ", icu_death_mean)

prob_icu_to_death = [icu_death_mean for _ in range(row_length)] # to be changed

prob_icu_to_recov = [1 - prob_icu_to_death[i] for i in range(row_length)]

prob_hosp_to_recov = [1 - prob_hosp_to_death[i] - prob_hosp_to_icu[i] for i in range(row_length)]

prob_exposed_to_mild =  prob_df.loc['prop_final_mild'].tolist()
prob_exposed_to_mild = [(1 - prob_exposed_to_asympt[i]) * prob_exposed_to_mild[i] for i in range(row_length)]

prob_exposed_to_gp =  prob_df.loc['prop_final_mild'].tolist()
prob_exposed_to_gp = [(1 - prob_exposed_to_asympt[i]) * (1 - prob_exposed_to_gp[i]) for i in range(row_length)]


mean of icu_to_death:  0.20269443200477685


Save data to .json

In [7]:
# probability data
save_data = {
    'prob_gp_to_hosp': prob_gp_to_hosp,
    'prob_gp_to_recov': prob_gp_to_recov,
    'prob_exposed_to_asympt': prob_exposed_to_asympt,
    'prob_exposed_to_gp': prob_exposed_to_gp,
    'prob_exposed_to_mild': prob_exposed_to_mild,
    'prob_hosp_to_death': prob_hosp_to_death,
    'prob_hosp_to_recov': prob_hosp_to_recov,
    'prob_hosp_to_icu': prob_hosp_to_icu,
    'prob_icu_to_death': prob_icu_to_death,
    'prob_icu_to_recov': prob_icu_to_recov
}

# save probabilities to .json
with open("probabilities.json", 'w') as json_file:

    json.dump(save_data, json_file)

print("Data saved as probabilities.json")

Data saved as probabilities.json


Display probabilities

In [8]:
index = list(save_data.keys())
probabilities_df = pd.DataFrame(save_data, index=columns, columns=index)
probabilities_df = probabilities_df.T
print(probabilities_df)



                             0-4       5-9     10-19     20-29       30-
prob_gp_to_hosp         0.628629  0.521739  0.680456  0.781250  0.791279
prob_gp_to_recov        0.371371  0.478261  0.319544  0.218750  0.208721
prob_exposed_to_asympt  0.100000  0.100000  0.100000  0.900000  0.900000
prob_exposed_to_gp      0.372249  0.162616  0.114752  0.028992  0.034382
prob_exposed_to_mild    0.527751  0.737384  0.785248  0.071008  0.065618
prob_hosp_to_death      0.012985  0.014706  0.011166  0.012530  0.024390
prob_hosp_to_recov      0.887015  0.885294  0.888834  0.887470  0.875610
prob_hosp_to_icu        0.100000  0.100000  0.100000  0.100000  0.100000
prob_icu_to_death       0.202694  0.202694  0.202694  0.202694  0.202694
prob_icu_to_recov       0.797306  0.797306  0.797306  0.797306  0.797306


Extend Age Ranges to Match with the 5yr intervals in Epiabm

In [9]:
column_10_19 = probabilities_df['10-19'].copy()
column_20_29 = probabilities_df['20-29'].copy()
column_30_ = probabilities_df['30-'].copy()


extended_df = probabilities_df

extended_df['10-14'] = column_10_19
extended_df['15-19'] = column_10_19

extended_df.insert(extended_df.columns.get_loc('10-19') + 1, '10-14', extended_df.pop('10-14'))
extended_df.insert(extended_df.columns.get_loc('10-14') + 1, '15-19', extended_df.pop('15-19'))
extended_df.drop(columns=['10-19'], inplace=True)

extended_df['20-24'] = column_20_29
extended_df['25-29'] = column_20_29

extended_df.insert(extended_df.columns.get_loc('20-29') + 1, '20-24', extended_df.pop('20-24'))
extended_df.insert(extended_df.columns.get_loc('20-24') + 1, '25-29', extended_df.pop('25-29'))
extended_df.drop(columns=['20-29'], inplace=True)

remaining_columns = ['30-','30-34','35-39','40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79','80-84']

for i in range(len(remaining_columns) - 1):

    col = remaining_columns[i + 1]

    extended_df[col] = column_30_

    extended_df.insert(extended_df.columns.get_loc(remaining_columns[i]) + 1, col, extended_df.pop(col))

extended_df.drop(columns=['30-'], inplace=True)

print(extended_df)


                             0-4       5-9     10-14     15-19     20-24  \
prob_gp_to_hosp         0.628629  0.521739  0.680456  0.680456  0.781250   
prob_gp_to_recov        0.371371  0.478261  0.319544  0.319544  0.218750   
prob_exposed_to_asympt  0.100000  0.100000  0.100000  0.100000  0.900000   
prob_exposed_to_gp      0.372249  0.162616  0.114752  0.114752  0.028992   
prob_exposed_to_mild    0.527751  0.737384  0.785248  0.785248  0.071008   
prob_hosp_to_death      0.012985  0.014706  0.011166  0.011166  0.012530   
prob_hosp_to_recov      0.887015  0.885294  0.888834  0.888834  0.887470   
prob_hosp_to_icu        0.100000  0.100000  0.100000  0.100000  0.100000   
prob_icu_to_death       0.202694  0.202694  0.202694  0.202694  0.202694   
prob_icu_to_recov       0.797306  0.797306  0.797306  0.797306  0.797306   

                           25-29     30-34     35-39     40-44     45-49  \
prob_gp_to_hosp         0.781250  0.791279  0.791279  0.791279  0.791279   
prob_gp_to_

In [10]:
data_dict = extended_df.to_dict(orient='records')

keys = ['prob_gp_to_hosp', 'prob_gp_to_recov', 'prob_exposed_to_asympt', 'prob_exposed_to_gp', 'prob_exposed_to_mild',
        'prob_hosp_to_death', 'prob_hosp_to_recov', 'prob_hosp_to_icu', 'prob_icu_to_death', 'prob_icu_to_recov']

prob_dict = {key: None for key in keys}

#print(data_dict)
count = 0
for row in data_dict:

    new_row = []

    for k, v in row.items():
        #print(k, v)

        new_row.append(v)

    prob_dict[keys[count]] = new_row

    count = count + 1

print(prob_dict)

{'prob_gp_to_hosp': [0.6286291340570562, 0.5217391304347826, 0.6804558885605742, 0.6804558885605742, 0.7812500000000001, 0.7812500000000001, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761, 0.7912794853466761], 'prob_gp_to_recov': [0.3713708659429438, 0.4782608695652174, 0.31954411143942585, 0.31954411143942585, 0.2187499999999999, 0.2187499999999999, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386, 0.20872051465332386], 'prob_exposed_to_asympt': [0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], 'prob_exposed_to_gp': [0.3722485207100592, 0.16261552680221814, 0.11475242195909584, 0.11475242195909584, 0.028992468071171267, 0.028992468071171267, 

In [11]:
# save probabilities to .json
with open("probabilities.json", 'w') as json_file:

    text = prob_dict
    #text = json.dumps(prob_dict, sort_keys=True, indent=1, separators=(',', ': '))
    json.dump(text, json_file)

print("Data saved as probabilities.json")

Data saved as probabilities.json
