In [1]:
import pandas as pd
import numpy as np
import scipy.stats
from db_queries import get_covariate_estimates

In [2]:
bw = get_covariate_estimates(covariate_id=1136, location_id=163, gbd_round_id=5, year_id=2017)
bw

Unnamed: 0,model_version_id,covariate_id,covariate_name_short,location_id,location_name,year_id,age_group_id,age_group_name,sex_id,sex,mean_value,lower_value,upper_value
0,21269,1136,mean_birthweight,163,India,2017,22,All Ages,1,Male,3079.753121,3054.992529,3104.522096
1,21269,1136,mean_birthweight,163,India,2017,22,All Ages,2,Female,3044.228645,3016.321826,3072.567596


In [3]:
low_bmi = get_covariate_estimates(covariate_id=1252, location_id=163, gbd_round_id=5, year_id=2017)
low_bmi

Unnamed: 0,model_version_id,covariate_id,covariate_name_short,location_id,location_name,year_id,age_group_id,age_group_name,sex_id,sex,mean_value,lower_value,upper_value
0,22484,1252,females_underweight_reproageagestd,163,India,2017,27,Age-standardized,1,Male,0.0,0.0,0.0
1,22484,1252,females_underweight_reproageagestd,163,India,2017,27,Age-standardized,2,Female,0.197732,0.188499,0.20788


In [4]:
# set hypothetical population parameters
mean = 3061.5
sd = 550
rr_bw = 2
p_low_bmi = 0.2

In [5]:
# guess and check values so that: 
    # weighted p(LBW) = overall p(LBW)
    # low BMI p(LBW) / adequate BMI p(LBW) = 2
mean_low = 2862
mean_high = 3124

In [6]:
# risk-deleted overall prevalence of LBW
scipy.stats.norm(mean,sd).cdf(2500) * (1 - (rr_bw * p_low_bmi - p_low_bmi)/(rr_bw * p_low_bmi + 1 - p_low_bmi))

0.12804062984822007

In [7]:
# adequate BMI prevalence of LBW
scipy.stats.norm(mean_high,sd).cdf(2500)

0.12828290916989527

In [8]:
# population probability of LBW
scipy.stats.norm(mean,sd).cdf(2500)

0.15364875581786408

In [9]:
# weighted probability of LBW
p_low_bmi * scipy.stats.norm(mean_low,sd).cdf(2500) + (1 - p_low_bmi) * scipy.stats.norm(mean_high,sd).cdf(2500)

0.15366845798732537

In [10]:
# low BMI p(LBW) / adequate BMI p(LBW)
scipy.stats.norm(mean_low,sd).cdf(2500) / scipy.stats.norm(mean_high,sd).cdf(2500)

1.9894361213702287

In [11]:
# calculate shift of low BMI relative to high BMI 
shift = mean_low - mean_high
shift

-262

In [12]:
# calculate shift of adequate BMI relative to population mean 
adequate_bmi_shift = -p_low_bmi * shift
adequate_bmi_shift

52.400000000000006

In [13]:
# calculate shift of low BMI relative to population mean
low_bmi_shift = (1 - p_low_bmi) * shift
low_bmi_shift

-209.60000000000002

In [14]:
# make "population" of 10,000 based on population parameters
pop = pd.DataFrame(columns=['simulant','low_bmi','bw_0','bw_1'])
pop['simulant'] = list(range(0,10_000))
pop['low_bmi'] = np.random.binomial(1, 0.2, 10_000)
pop['bw_0'] = np.random.normal(mean, sd, 10_000)
pop['bw_1'] = np.where(pop['low_bmi'] == 0, pop['bw_0'] + adequate_bmi_shift,
                      pop['bw_0'] + low_bmi_shift)
pop['lbw_0'] = np.where(pop['bw_0'] < 2500, 1, 0)
pop['lbw_1'] = np.where(pop['bw_1'] < 2500, 1, 0)

In [15]:
results = pd.DataFrame()
results['baseline_overall_bw_mean'] = [float(pop['bw_0'].mean())]
results['shifted_overall_bw_mean'] = float(pop['bw_1'].mean())
results['baseline_overall_bw_sd'] = float(pop['bw_0'].std())
results['shifted_overall_bw_sd'] = float(pop['bw_1'].std())
results['baseline_overall_lbw_prob'] = float(pop['lbw_0'].mean())
results['shifted_overall_lbw_prob'] = float(pop['lbw_1'].mean())

results['shifted_low_bmi_bw'] = float(pop['bw_1'].where(pop['low_bmi'] == 1).dropna().mean())
results['shifted_high_bmi_bw'] = float(pop['bw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results['shifted_low_bmi_lbw_prob'] = float(pop['lbw_1'].where(pop['low_bmi'] == 1).dropna().mean())
results['shifted_high_bmi_lbw_prob'] = float(pop['lbw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results['shifted_rr'] = float(pop['lbw_1'].where(pop['low_bmi'] == 1).dropna().mean()) / float(pop['lbw_1'].where(pop['low_bmi'] == 0).dropna().mean())
results['shifted_shift'] = float(pop['bw_1'].where(pop['low_bmi'] == 1).dropna().mean()) - float(pop['bw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results.transpose()

Unnamed: 0,0
baseline_overall_bw_mean,3063.511558
shifted_overall_bw_mean,3061.074958
baseline_overall_bw_sd,546.847188
shifted_overall_bw_sd,555.117787
baseline_overall_lbw_prob,0.1492
shifted_overall_lbw_prob,0.1565
shifted_low_bmi_bw,2874.400948
shifted_high_bmi_bw,3110.487972
shifted_low_bmi_lbw_prob,0.248925
shifted_high_bmi_lbw_prob,0.132035


# Second iteration with lower population mean

In [16]:
bw = get_covariate_estimates(covariate_id=1136, location_id=211, gbd_round_id=5, year_id=2017)
bw

Unnamed: 0,model_version_id,covariate_id,covariate_name_short,location_id,location_name,year_id,age_group_id,age_group_name,sex_id,sex,mean_value,lower_value,upper_value
0,21269,1136,mean_birthweight,211,Mali,2017,22,All Ages,1,Male,3074.124714,3044.21638,3104.097951
1,21269,1136,mean_birthweight,211,Mali,2017,22,All Ages,2,Female,2999.596071,2958.331001,3035.668356


In [17]:
low_bmi = get_covariate_estimates(covariate_id=1252, location_id=211, gbd_round_id=5, year_id=2017)
low_bmi

Unnamed: 0,model_version_id,covariate_id,covariate_name_short,location_id,location_name,year_id,age_group_id,age_group_name,sex_id,sex,mean_value,lower_value,upper_value
0,22484,1252,females_underweight_reproageagestd,211,Mali,2017,27,Age-standardized,1,Male,0.0,0.0,0.0
1,22484,1252,females_underweight_reproageagestd,211,Mali,2017,27,Age-standardized,2,Female,0.110791,0.048098,0.197023


In [18]:
# set hypothetical population parameters
mean = 3030 
sd = 448
rr_bw = 2
p_low_bmi = 0.11

# guess and check values so that: 
    # weighted p(LBW) = overall p(LBW)
    # low BMI p(LBW) / adequate BMI p(LBW) = 2
mean_low = 2860
mean_high = 3057

In [19]:
# risk deleted probability of LBW
scipy.stats.norm(mean,sd).cdf(2500) * (1 - (rr_bw * p_low_bmi - p_low_bmi) /(rr_bw * p_low_bmi + 1 - p_low_bmi))

0.10666441147062775

In [20]:
# adequate BMI prob(LBW)
scipy.stats.norm(mean_high,sd).cdf(2500)

0.10687799432175404

In [21]:
# population probability of LBW
scipy.stats.norm(mean,sd).cdf(2500)

0.1183974967323968

In [22]:
# weighted probability of LBW
p_low_bmi * scipy.stats.norm(mean_low,sd).cdf(2500) + (1 - p_low_bmi) * scipy.stats.norm(mean_high,sd).cdf(2500)

0.11831186406349063

In [23]:
# low BMI p(LBW) / adequate BMI p(LBW)
scipy.stats.norm(mean_low,sd).cdf(2500) / scipy.stats.norm(mean_high,sd).cdf(2500)

1.972550720464508

In [24]:
# calculate shift of low BMI relative to high BMI 
shift = mean_low - mean_high
shift

-197

In [25]:
# calculate shift of adequate BMI relative to population mean 
adequate_bmi_shift = -p_low_bmi * shift
adequate_bmi_shift

21.67

In [26]:
# calculate shift of low BMI relative to population mean
low_bmi_shift = (1 - p_low_bmi) * shift
low_bmi_shift

-175.33

In [27]:
low_bmi_shift - adequate_bmi_shift

-197.0

In [30]:
# make "population" of 10,000 based on population parameters
pop = pd.DataFrame(columns=['simulant','low_bmi','bw_0','bw_1'])
pop['simulant'] = list(range(0,10_000))
pop['low_bmi'] = np.random.binomial(1, p_low_bmi, 10_000)
pop['bw_0'] = np.random.normal(mean, sd, 10_000)
pop['bw_1'] = np.where(pop['low_bmi'] == 0, pop['bw_0'] + adequate_bmi_shift,
                      pop['bw_0'] + low_bmi_shift)
pop['lbw_0'] = np.where(pop['bw_0'] < 2500, 1, 0)
pop['lbw_1'] = np.where(pop['bw_1'] < 2500, 1, 0)

In [31]:
results = pd.DataFrame()
results['baseline_overall_bw_mean'] = [float(pop['bw_0'].mean())]
results['shifted_overall_bw_mean'] = float(pop['bw_1'].mean())
results['baseline_overall_bw_sd'] = float(pop['bw_0'].std())
results['shifted_overall_bw_sd'] = float(pop['bw_1'].std())
results['baseline_overall_lbw_prob'] = float(pop['lbw_0'].mean())
results['shifted_overall_lbw_prob'] = float(pop['lbw_1'].mean())

results['shifted_low_bmi_bw'] = float(pop['bw_1'].where(pop['low_bmi'] == 1).dropna().mean())
results['shifted_high_bmi_bw'] = float(pop['bw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results['shifted_low_bmi_lbw_prob'] = float(pop['lbw_1'].where(pop['low_bmi'] == 1).dropna().mean())
results['shifted_high_bmi_lbw_prob'] = float(pop['lbw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results['shifted_rr'] = float(pop['lbw_1'].where(pop['low_bmi'] == 1).dropna().mean()) / float(pop['lbw_1'].where(pop['low_bmi'] == 0).dropna().mean())
results['shifted_shift'] = float(pop['bw_1'].where(pop['low_bmi'] == 1).dropna().mean()) - float(pop['bw_1'].where(pop['low_bmi'] == 0).dropna().mean())

results.transpose()

Unnamed: 0,0
baseline_overall_bw_mean,3034.678717
shifted_overall_bw_mean,3034.915117
baseline_overall_bw_sd,448.314076
shifted_overall_bw_sd,452.308085
baseline_overall_lbw_prob,0.1129
shifted_overall_lbw_prob,0.1146
shifted_low_bmi_bw,2863.227816
shifted_high_bmi_bw,3055.875146
shifted_low_bmi_lbw_prob,0.206801
shifted_high_bmi_lbw_prob,0.103344
