In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>")) #makes the display bars longer and spread out 100% across the screen width

In [2]:
from vivarium import Artifact
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from db_queries import get_ids, get_outputs
import scipy.stats

In [3]:
output_dirs = ['/ihme/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/india/2020_02_06_17_37_48/count_data/',
                '/ihme/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/pakistan/2020_02_06_17_38_45/count_data/',
              '/share/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/tanzania/2020_02_06_17_39_16/count_data/',
              '/share/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/mali/2020_02_06_17_38_33/count_data/']

locations = ['India',
            'Pakistan',
            'Tanzania',
            'Mali']

# 1) LOW BIRTH WEIGHT 

In [4]:
 #exploring the data strcutures for india
    
bw_india = pd.DataFrame()
    
bw_india = pd.read_hdf('/ihme/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/india/2020_02_06_17_37_48/count_data/' + 'birth_weight.hdf').reset_index().sort_values(by=['input_draw','scenario','treatment_group','mother_status','measure'])
bw_india.head(24)

Unnamed: 0,index,risk,treatment_group,mother_status,measure,input_draw,scenario,value
0,0,birth_weight,bep,malnourished,mean,3,baseline,0.0
200,200,birth_weight,bep,malnourished,proportion_below_2500g,3,baseline,0.0
400,400,birth_weight,bep,malnourished,sd,3,baseline,0.0
600,600,birth_weight,bep,normal,mean,3,baseline,0.0
800,800,birth_weight,bep,normal,proportion_below_2500g,3,baseline,0.0
1000,1000,birth_weight,bep,normal,sd,3,baseline,0.0
1200,1200,birth_weight,ifa,malnourished,mean,3,baseline,2732.796612
1400,1400,birth_weight,ifa,malnourished,proportion_below_2500g,3,baseline,0.308552
1600,1600,birth_weight,ifa,malnourished,sd,3,baseline,687.12654
1800,1800,birth_weight,ifa,normal,mean,3,baseline,3002.006919


In [5]:
pop_india = pd.read_hdf('/ihme/costeffectiveness/results/vivarium_gates_bep/scenarios_fixed/india/2020_02_06_17_37_48/count_data/' + 'population.hdf').reset_index().sort_values(by=['input_draw','scenario','treatment_group','mother_status'])
pop_india.head(13)

Unnamed: 0,index,treatment_group,mother_status,measure,input_draw,scenario,value
0,0,all,all,total_population,3,baseline,100000.0
200,200,all,all,total_population_dead,3,baseline,2649.0
400,400,all,all,total_population_living,3,baseline,97351.0
600,600,all,all,total_population_tracked,3,baseline,100000.0
800,800,all,all,total_population_untracked,3,baseline,0.0
1000,1000,bep,malnourished,total_population,3,baseline,0.0
1200,1200,bep,normal,total_population,3,baseline,0.0
1400,1400,ifa,malnourished,total_population,3,baseline,5881.0
1600,1600,ifa,normal,total_population,3,baseline,29235.0
1800,1800,mmn,malnourished,total_population,3,baseline,0.0


In [6]:
pop_india.measure.unique()

array(['total_population', 'total_population_dead',
       'total_population_living', 'total_population_tracked',
       'total_population_untracked'], dtype=object)

In [7]:
master_bw = pd.DataFrame()

for i in list(range(len(output_dirs))):
    bw = pd.read_hdf(output_dirs[i] + 'birth_weight.hdf').reset_index().sort_values(by=['risk','measure','scenario','treatment_group','mother_status'])
    pop = pd.read_hdf(output_dirs[i] + 'population.hdf').reset_index()
    pop = pop.where(pop['measure'] == 'total_population').dropna().rename(columns={'value':'total_population'}).drop(columns=['index','measure'])
    data = bw.merge(pop, right_on=['scenario','treatment_group','mother_status','input_draw'], left_on=['scenario','treatment_group','mother_status','input_draw'])
    data = data.groupby(['risk','measure','scenario','treatment_group','mother_status']).mean().drop(columns=['input_draw','index']).reset_index()
    data['weighted_value'] = data['value'] * data['total_population']
    data['location'] = locations[i]
    master_bw = master_bw.append(data)
    
master_bw.head()

Unnamed: 0,risk,measure,scenario,treatment_group,mother_status,value,total_population,weighted_value,location
0,birth_weight,mean,baseline,bep,malnourished,0.0,0.0,0.0,India
1,birth_weight,mean,baseline,bep,normal,0.0,0.0,0.0,India
2,birth_weight,mean,baseline,ifa,malnourished,2684.283214,5770.54,15489760.0,India
3,birth_weight,mean,baseline,ifa,normal,3038.537322,28261.14,85872530.0,India
4,birth_weight,mean,baseline,mmn,malnourished,0.0,0.0,0.0,India


In [8]:
india_bw = master_bw[master_bw['location'].isin(['India']) & master_bw['measure'].isin(['mean']) & master_bw['scenario'].isin(['baseline']) & master_bw['treatment_group'].isin(['ifa', 'none'])]
india_bw

Unnamed: 0,risk,measure,scenario,treatment_group,mother_status,value,total_population,weighted_value,location
2,birth_weight,mean,baseline,ifa,malnourished,2684.283214,5770.54,15489760.0,India
3,birth_weight,mean,baseline,ifa,normal,3038.537322,28261.14,85872530.0,India
6,birth_weight,mean,baseline,none,malnourished,2683.328356,11165.96,29961940.0,India
7,birth_weight,mean,baseline,none,normal,3038.579482,54802.36,166521300.0,India


In [9]:
master_bw['scenario'].unique()

array(['baseline', 'bep_scale_up', 'bep_targeted_scale_up',
       'mmn_scale_up'], dtype=object)

In [10]:
master_bw['treatment_group'].unique()

array(['bep', 'ifa', 'mmn', 'none'], dtype=object)

In [11]:
#mean birthweight by country and treatment groups
(master_bw.where(master_bw['treatment_group'] == 'ifa')
         .where(master_bw['scenario'] == 'baseline')
         .where(master_bw['risk'] == 'birth_weight')
         .where(master_bw['measure'] == 'mean').dropna())

Unnamed: 0,risk,measure,scenario,treatment_group,mother_status,value,total_population,weighted_value,location
2,birth_weight,mean,baseline,ifa,malnourished,2684.283214,5770.54,15489760.0,India
3,birth_weight,mean,baseline,ifa,normal,3038.537322,28261.14,85872530.0,India
2,birth_weight,mean,baseline,ifa,malnourished,2593.444746,2671.18,6927558.0,Pakistan
3,birth_weight,mean,baseline,ifa,normal,2943.289694,22047.76,64892940.0,Pakistan
2,birth_weight,mean,baseline,ifa,malnourished,3148.812417,2014.98,6344794.0,Tanzania
3,birth_weight,mean,baseline,ifa,normal,3358.446589,18955.92,63662440.0,Tanzania
2,birth_weight,mean,baseline,ifa,malnourished,2740.943104,2423.88,6643717.0,Mali
3,birth_weight,mean,baseline,ifa,normal,3029.55147,20875.32,63242860.0,Mali


In [12]:
#get birthweight mean by country and mom status in baseline scenario

#mean birthweight by country and treatment groups
baseline_bw = (master_bw.where(master_bw['treatment_group'] == 'ifa' | master_bw['treatment_group'] == 'none')
                        .where(master_bw['scenario'] == 'baseline')
                        .where(master_bw['risk'] == 'birth_weight')
                        .where(master_bw['measure'] == 'mean').dropna())

baseline_bw 

TypeError: cannot compare a dtyped [object] array with a scalar of type [bool]

In [None]:
master_bw.where(master_bw['measure'] == 'proportion_below_2500g').dropna().head()

In [None]:
#PREVALENCE OF LBW

stratified = master_bw.groupby(['location','measure','scenario','mother_status']).sum()
stratified['weighted_average'] = stratified['weighted_value'] / stratified['total_population']
stratified = stratified.reset_index()
stratified = stratified.where(stratified['measure'] == 'proportion_below_2500g').where(stratified['scenario'] == 'baseline').dropna()
overall = master_bw.groupby(['location','measure','scenario']).sum()
overall['weighted_average'] = overall['weighted_value'] / overall['total_population']
overall = overall.reset_index()
overall = overall.where(overall['measure'] == 'proportion_below_2500g').where(overall['scenario'] == 'baseline').dropna()
overall['mother_status'] = 'all'
data = stratified.append(overall)
data = data.sort_values(by='location')

data



In [None]:
# bar width
barWidth = 0.2

# bar heights
overall = data['weighted_average'].where(data['mother_status'] == 'all').dropna()
normal = data['weighted_average'].where(data['mother_status'] == 'normal').dropna()
malnourished = data['weighted_average'].where(data['mother_status'] == 'malnourished').dropna()

# bar positions
a = np.arange(len(overall))
b = [x + barWidth for x in a]
c = [x + barWidth for x in b]

# plot 
plt.bar(a, overall, width=barWidth)
plt.bar(b, normal, width=barWidth)
plt.bar(c, malnourished, width=barWidth)
plt.xticks([r + barWidth for r in range(len(normal))], np.unique(data['location']))
plt.legend(['Overall','BMI > 18.5','BMI < 18.5'])
plt.title('Prevalence of Low Birth Weight (<2500 grams)')
plt.ylabel('Proportion')

In [None]:
# plot baseline mean birth weight stratified by maternal malnourishment

stratified = master_bw.groupby(['location','measure','scenario','mother_status']).sum()
stratified['weighted_average'] = stratified['weighted_value'] / stratified['total_population']
stratified = stratified.reset_index()
stratified = stratified.where(stratified['measure'] == 'mean').where(stratified['scenario'] == 'baseline').dropna()
overall = master_bw.groupby(['location','measure','scenario']).sum()
overall['weighted_average'] = overall['weighted_value'] / overall['total_population']
overall = overall.reset_index()
overall = overall.where(overall['measure'] == 'mean').where(overall['scenario'] == 'baseline').dropna()
overall['mother_status'] = 'all'
data = stratified.append(overall)
data = data.sort_values(by='location')

In [None]:
data

In [None]:

# bar width
barWidth = 0.2

# bar heights
overall = data['weighted_average'].where(data['mother_status'] == 'all').dropna()
normal = data['weighted_average'].where(data['mother_status'] == 'normal').dropna()
malnourished = data['weighted_average'].where(data['mother_status'] == 'malnourished').dropna()

# bar positions
a = np.arange(len(overall))
b = [x + barWidth for x in a]
c = [x + barWidth for x in b]

# plot 
plt.bar(a, overall, width=barWidth)
plt.bar(b, normal, width=barWidth)
plt.bar(c, malnourished, width=barWidth)
plt.xticks([r + barWidth for r in range(len(normal))], np.unique(data['location']))
plt.legend(['Overall','BMI > 18.5','BMI < 18.5'])
plt.title('Baseline Mean Birthweight')
plt.ylabel('Birth Weight (grams)')
plt.ylim(0,4200)

In [None]:
master_bw = pd.DataFrame()

for i in list(range(len(output_dirs))):
    bw = pd.read_hdf(output_dirs[i] + 'birth_weight.hdf').reset_index().sort_values(by=['risk','measure','scenario','treatment_group','mother_status'])
    pop = pd.read_hdf(output_dirs[i] + 'population.hdf').reset_index()
    pop = pop.where(pop['measure'] == 'total_population').dropna().rename(columns={'value':'total_population'}).drop(columns=['index','measure'])
    data = bw.merge(pop, right_on=['scenario','treatment_group','mother_status','input_draw'], left_on=['scenario','treatment_group','mother_status','input_draw'])
    data = data.groupby(['risk','measure','scenario','treatment_group','mother_status']).mean().drop(columns=['input_draw','index']).reset_index()
    data['weighted_value'] = data['value'] * data['total_population']
    data['location'] = locations[i]
    master_bw = master_bw.append(data)
    
master_bw.head()

# 2) CHILD GROWTH FAILURE

In [None]:
cgf = pd.read_hdf('/share/costeffectiveness/results/vivarium_gates_bep/malnutrition/india/2020_02_05_03_02_13/count_space/category_counts.hdf').reset_index()
cgf = cgf.where(cgf['sex'] == 'male').dropna()
cgf = cgf.groupby(['input_draw','cgf_risk','risk_category','malnourishment_category']).sum().reset_index()
pop = pd.read_hdf('/share/costeffectiveness/results/vivarium_gates_bep/malnutrition/india/2020_02_05_03_02_13/count_space/birth_prevalence.hdf').reset_index()
pop = pop.groupby(['input_draw','malnourishment_category']).sum().reset_index().rename(columns={'birth_prevalence':'total_population'})
data = cgf.merge(pop, right_on=['input_draw','malnourishment_category'], left_on=['input_draw','malnourishment_category'])
data['percent'] = data['category_counts'] / data['total_population'] 
data.head()

In [None]:
pop = pd.read_hdf('/share/costeffectiveness/results/vivarium_gates_bep/malnutrition/india/2020_02_05_03_02_13/count_space/birth_prevalence.hdf').reset_index()
#pop = pop.groupby(['input_draw','malnourishment_category']).sum().reset_index().rename(columns={'birth_prevalence':'total_population'})
pop.where(pop['input_draw'] == 29.0).dropna()

In [None]:
np.unique(pop['birth_state'])

In [None]:
pop = pd.read_hdf('/share/costeffectiveness/results/vivarium_gates_bep/malnutrition/india/2020_02_05_03_02_13/count_space/population.hdf').reset_index()
pop.head()