In [53]:
import pandas as pd
import numpy as np
import plotly.express as px
import gbd_mapping as gm
# Use extract_data because it allows for skipping the validation step. Higher level
#  functions enforce validation and anomolies in the data mean that validation fails
#  and you don't get data back
from vivarium_inputs.extract import extract_data

In [43]:
lbw_cats = gm.risk_factors.low_birth_weight_and_short_gestation.categories.to_dict()

In [44]:
cats = []
for k, v in lbw_cats.items():
    chunks = v.split('[')
    cats.append((k, chunks[1].split(',')[0], chunks[2].split(',')[0]))
cats[:5]

[('cat2', '0', '0'),
 ('cat8', '0', '500'),
 ('cat10', '24', '500'),
 ('cat11', '26', '500'),
 ('cat14', '30', '500')]

In [45]:
# Make a dataframe from the list of tuples
cc = {'cat': [str(i[0]) for i in cats], 'ga': [i[1] for i in cats], 'bw': [i[2] for i in cats]}
df_cat = pd.DataFrame(cc)

In [47]:
# Note validate=False, only way to actually examine the data
df_rr = extract_data(gm.risk_factors.low_birth_weight_and_short_gestation, 'relative_risk', 163, validate=False)

In [48]:
draws = [f'draw_{i}' for i in range(1000)]
rr_by_cat = df_rr.groupby('parameter').apply(lambda x: x[draws].mean().mean())
df_ = pd.DataFrame({'cat': rr_by_cat.index, 'rr': rr_by_cat.values})
df_.cat = df_.cat.astype(str)

In [54]:
# Incorporate the RR data with the category data
df_with_rr = df_.merge(df_cat, on='cat')
df_with_rr = df_with_rr.sort_values('rr', ignore_index=True)
df_with_rr.head()

Unnamed: 0,cat,rr,ga,bw
0,cat56,1.0,40,4000
1,cat55,1.0,40,3500
2,cat54,1.0,38,3500
3,cat53,1.0,38,4000
4,cat51,1.109811,40,3000


In [51]:
# Assemble and format the category and RR data in a list of strings
cat_plus_rr = [f"<b>{i[1]}</b><br>{np.round(i[2], 4)}" for i in df_with_rr.itertuples()]

In [52]:
fig = px.scatter(
    data_frame=df_with_rr,    # data
    x='ga', y='bw',           # use these columns
    text=cat_plus_rr,         # use the concatenated and formatted list of category and RR
    labels={'ga': 'Gestational Age', 'bw': 'Birthweight'},  # readable axis labels
    title='Birthweight and Gestational Age with Relative Risk -- GBD 2019',
    size=np.zeros(len(tmp)))  # get rid of the markers by setting size to zero
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_yaxes(autorange="reversed")
fig.update_xaxes(autorange="reversed")
fig.update_layout(title_x=0.5)
