Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Empowerment calibration #240

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
075cac7
Pull DHS data from empowerment metrics
mzimmermann-IDM Aug 21, 2023
25289a1
update method for young women
mzimmermann-IDM Aug 24, 2023
d2a302e
Merge remote-tracking branch 'origin/Kenya_empowerment_DHS' into empo…
pausz Sep 1, 2023
b06d829
Include new empowerment attributes
pausz Sep 1, 2023
5d23ad5
Implement #189: intialise proportion of people who live in an urban b…
pausz Sep 1, 2023
5e721f0
#189: Pass positional arg 'n' to intialize_urban()
pausz Sep 1, 2023
ad9991f
#189: read urban proportion directly from csv file
pausz Sep 1, 2023
70a52be
Write microsim for testing new empwr attributes.
pausz Sep 1, 2023
7b1e9cb
Load empowerment data and set it as part of the parameter dictionary
pausz Sep 1, 2023
caf070a
Return urban_prop as a float not pandas series
pausz Sep 1, 2023
d60948c
Implement #185: intialisation of paid_employment from Kenya DHS data.
pausz Sep 1, 2023
e916de1
Implement #187 and #188: intialisation of sexual_autonomy and control…
pausz Sep 2, 2023
5bb5fd6
Implement #186: load partenership data
pausz Sep 2, 2023
ec4fd3c
Implement #186: initialised whether a woman is partnered or not
pausz Sep 2, 2023
6b5a573
Pass initial distribution of new attributes to People()
pausz Sep 2, 2023
ad5e0f9
merged main into current branch
emilydriano Oct 18, 2023
0d1175f
added variable to load empowerment data to kenya.py, initial setup of…
emilydriano Oct 31, 2023
5c90296
added paid_employment emp param to experiment class for calibration t…
emilydriano Nov 29, 2023
5051a99
merged changes from main
emilydriano Nov 29, 2023
100efbf
added education as calibration target
emilydriano Nov 29, 2023
4b64a31
merged main into current branch
emilydriano Feb 14, 2024
b051fad
updated calibrate_manual to plot paid employment
emilydriano Feb 27, 2024
1d99f88
finished empowerment param plotting in calibrate_manual script
emilydriano Feb 28, 2024
b9422a3
merged changes in main to local branch
emilydriano Apr 24, 2024
b7751dc
modified Experiment functions extracting paid work and education to c…
emilydriano Apr 25, 2024
d07b78b
merged changes from main
emilydriano Apr 25, 2024
1c4809f
reverted incorrect expected num keys expected value
emilydriano Apr 25, 2024
72ad6c2
added plotting functions to calibrate_empowerment.py for paid work an…
emilydriano Apr 26, 2024
14127bd
fixed employed count in empowerment calibration script
emilydriano May 7, 2024
7220c50
fixed ageparity data reference and changed settings to plot all metri…
emilydriano May 9, 2024
fcd506b
added error bars to empowerment plots
emilydriano May 13, 2024
c6a3991
updated version and changelog
emilydriano May 21, 2024
bc81598
added tests, updated experiment employment extraction functions
emilydriano May 21, 2024
7a106d4
merged main into current branch
emilydriano May 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -8,6 +8,12 @@ All notable changes to the codebase are documented in this file. Changes that ma
:local:
:depth: 1

Version 0.28.4 (2024-05-21)
---------------------------
- Adds empowerment metrics (paid work and education attainment) to calibration targets
- Creates script for empowerment calibration
- *GitHub info*: PR `https://github.com/fpsim/fpsim/pull/240>`_

Version 0.28.3 (2024-04-30)
--------------------------
- Creates subnational tutorial for Ethiopia
Expand Down
17 changes: 8 additions & 9 deletions examples/calibration_scripts/calibrate_auto.py
Expand Up @@ -123,8 +123,8 @@

calibration = fp.Calibration(pars, calib_pars=freepars)
calibration.calibrate()

sim = fp.Sim(pars=calibration.best_pars)
pars.update(calibration.best_pars)
sim = fp.Sim(pars=pars)
sim.run()

# Plot results from sim run
Expand Down Expand Up @@ -155,7 +155,7 @@ def pop_growth_rate(years, population):

return growth_rate

# Start series of options for plotting data to model comaprisons
# Start series of options for plotting data to model comparisons
if do_plot_asfr:
'''
Plot age-specific fertility rate between model and data
Expand Down Expand Up @@ -346,16 +346,15 @@ def pop_growth_rate(years, population):
# Plot ageparity
for key in ['Data', 'Model', 'Diff_data-model']:
fig = pl.figure(figsize=(20, 14))
ax = fig.add_subplot(111)

pl.pcolormesh(sky_arr[key])
im = ax.pcolormesh(sky_arr[key], cmap='parula')
pl.xlabel('Age', fontweight='bold')
pl.ylabel('Parity', fontweight='bold')
pl.title(f'{country.capitalize()}: Age-parity plot for the {key.lower()}\n\n', fontweight='bold')
pl.gca().set_xticks(pl.arange(n_age))
pl.gca().set_yticks(pl.arange(n_parity))
pl.gca().set_xticklabels(age_bins)
pl.gca().set_yticklabels(parity_bins)
pl.gca().view_init(30, 45)
pl.xticks(pl.arange(n_age), age_bins)
pl.yticks(pl.arange(n_parity), parity_bins)
pl.colorbar(im, label='Percentage')
pl.draw()


Expand Down
671 changes: 671 additions & 0 deletions examples/calibration_scripts/calibrate_empowerment.py

Large diffs are not rendered by default.

81 changes: 80 additions & 1 deletion fpsim/experiment.py
@@ -1,7 +1,7 @@
'''
Define classes and functions for the Experiment class (running sims and comparing them to data)
'''

import math

import yaml
import numpy as np
Expand Down Expand Up @@ -37,6 +37,7 @@
cbr = 1, # Crude birth rate (per 1000 inhabitants); 'crude_birth_rate'
tfr = 1, # Total fertility rate
asfr = 1, # Age-specific fertility rate
empowerment = 0, # Empowerment metrics, i.e. paid employment and education params
)


Expand All @@ -54,6 +55,7 @@ class Experiment(sc.prettyobj):

def __init__(self, pars=None, flags=None, label=None, **kwargs):
self.flags = sc.mergedicts(default_flags, flags, _copy=True) # Set flags for what gets run
self.flags['empowerment'] = 1 if pars['use_empowerment'] else 0
self.pars = pars if pars else fpp.pars(**kwargs)
self.location = self.pars['location']
self.model = sc.objdict()
Expand Down Expand Up @@ -464,6 +466,80 @@ def extract_methods(self):

return


def extract_employment(self):
# Extract paid work from data
data_empowerment = self.load_data('empowerment')
data_empowerment = data_empowerment.iloc[1:-1]
data_paid_work = data_empowerment[['age', 'paid_employment']].copy()
age_bins = np.arange(min_age, max_age+1, bin_size)
data_paid_work['age_group'] = pd.cut(data_paid_work['age'], bins=age_bins, right=False)

# Calculate mean and standard error for each age bin
employment_data_grouped = data_paid_work.groupby('age_group', observed=False)['paid_employment']
self.data['paid_employment'] = employment_data_grouped.mean().tolist()

# Extract paid work from model
employed_counts = {age_bin: 0 for age_bin in age_bins}
total_counts = {age_bin: 0 for age_bin in age_bins}

# Count the number of employed and total people in each age bin
ppl = self.people
for i in range(len(ppl)):
if ppl.alive[i] and not ppl.sex[i] and min_age <= ppl.age[i] < max_age:
age_bin = age_bins[sc.findinds(age_bins <= ppl.age[i])[-1]]
total_counts[age_bin] += 1
if ppl.paid_employment[i]:
employed_counts[age_bin] += 1

# Calculate the percentage of employed people in each age bin
percentage_employed = {}
age_bins = np.arange(min_age, max_age, bin_size)
for age_bin in age_bins:
total_ppl = total_counts[age_bin]
if total_ppl != 0:
employed_ratio = employed_counts[age_bin] / total_ppl
percentage_employed[age_bin] = employed_ratio
else:
percentage_employed[age_bin] = 0

self.model['paid_employment'] = list(percentage_employed.values())
return


def extract_education(self):
# Extract education from data
dhs_data_education = self.load_data('education')
data_edu = dhs_data_education[['age', 'edu']].sort_values(by='age')
data_edu = data_edu.query(f"{min_age} <= age < {max_age}").copy()
age_bins = np.arange(min_age, max_age + 1, bin_size)
data_edu['age_group'] = pd.cut(data_edu['age'], bins=age_bins, right=False)

# Calculate mean for each age bin
education_data_grouped = data_edu.groupby('age_group', observed=False)['edu']
self.data['education'] = education_data_grouped.mean().tolist()

# Extract education from model
model_edu_years = {age_bin: [] for age_bin in np.arange(min_age, max_age, bin_size)}
ppl = self.people
for i in range(len(ppl)):
if ppl.alive[i] and not ppl.sex[i] and min_age <= ppl.age[i] < max_age:
age_bin = age_bins[sc.findinds(age_bins <= ppl.age[i])[-1]]
model_edu_years[age_bin].append(ppl.edu_attainment[i])

# Calculate average # of years of educational attainment for each age
model_edu_mean = []
for age_group in model_edu_years:
if len(model_edu_years[age_group]) != 0:
avg_edu = sum(model_edu_years[age_group]) / len(model_edu_years[age_group])
model_edu_mean.append(avg_edu)
else:
model_edu_years[age_group] = 0
self.model['education'] = model_edu_mean

return


def compute_fit(self, *args, **kwargs):
''' Compute how good the fit is '''
data = sc.dcp(self.data)
Expand All @@ -486,6 +562,9 @@ def post_process_results(self, keep_people=False, compute_fit=True, **kwargs):
if self.flags.ageparity: self.extract_ageparity()
if self.flags.birth_space: self.extract_birth_spacing()
if self.flags.methods: self.extract_methods()
if self.flags.empowerment:
self.extract_employment()
self.extract_education()

# Remove people, they're large!
if not keep_people:
Expand Down
@@ -0,0 +1,132 @@
#########################################
# -- Education data from DHS - ##
# -- DHS data
# -- Marita Zimmermann
# -- August 2023
#########################################

rm(list=ls())

# -- Libraries -- #
library(tidyverse)
library(haven)
library(survey)

# -- Data -- #

# Kenya 2022 household data
data.raw.h <- read_dta("C:/Users/maritazi/OneDrive - Bill & Melinda Gates Foundation/DHS/KEHR8ADT/KEHR8AFL.DTA")
# Kenya 2022 individual recode
data.raw <- read_dta("C:/Users/maritazi/OneDrive - Bill & Melinda Gates Foundation/DHS/KEIR8ADT/KEIR8AFL.DTA")

# -- Clean data -- #

data <- data.raw %>%
mutate(age = v012, parity = v220, edu = v133,
paidwork = case_when(v731 %in% c(1,2,3) & v741 %in% c(1,2,3) ~ 1, # done work in the past year and paid in cash or in kind
v731 == 0 | v741 == 0 ~ 0),
decisionwages = case_when(v739 == 1 ~ 1, v739 %in% c(2,3) ~ 0.5, v739 %in% c(4,5) ~ 0), # 1 she decides, 0.5 she decides with someone else, 0 someone else decides
refusesex = case_when(v850a == 1 ~ 1, v850a == 8 ~ 0.5, v850a == 0 ~ 0), # 1 she can refuse sex, 0.5 don't know/it depends, 0 no
urban = ifelse(v025 == 1, 1, 0), # 1 if urban
age_partner = v511) # age at first cohabitation
svydes1 = svydesign(id = data$v001, strata=data$v023, weights = data$v005/1000000, data=data)

# table of ages at first birth
as.data.frame(svytable(~v212, svydes1)) %>% mutate(perc = Freq/sum(Freq), cumsum = cumsum(Freq)/sum(Freq))
# 5.7% of women had age at first birth before 15

# -- Education -- #

# table of education by age and parity for age 15+
table.edu.ind <- as.data.frame(svytable(~age+edu+parity, svydes1))

# For women who have a birth before age 15, create matrix for parity 1+ and age<15 using birth calendar
data.edu <- data %>%
select(caseid, v001, v023, v005, v212, v011, age = v012, edu = v133, starts_with("b3"), starts_with("bord")) %>%
filter(v212 < 15) %>% # age at first birth under 15
gather(var, val, -caseid, -v212, -v011, -age, -edu, -v001, -v023, -v005) %>% separate(var, c("var", "order")) %>% spread(var, val) %>% # one row per pregnancy
mutate(age_at_birth = floor((b3 - v011)/12)) %>%
filter(age_at_birth < 15) %>% # look at birth under age 15
mutate(edu_at_birth = pmin(age_at_birth - 6, edu)) # assume school starts at age 6, women continue education from then until age at birth or until their actual education level, whichever is less
svydes2 = svydesign(id = data.edu$v001, strata=data.edu$v023, weights = data.edu$v005/1000000, data=data.edu)
table.edu.young <- as.data.frame(svytable(~age_at_birth+edu_at_birth+bord, svydes2)) %>%
rename(age = age_at_birth, edu = edu_at_birth, parity = bord)

# Option 1: For women with no birth before age 15, create matrix for each year
data.edu.0 <- data %>%
select(caseid, v001, v023, v005, v212, v011, age = v012, parity = v220, edu = v133) %>%
filter(v212 < 15) %>% # include all women who didn't have a before age 15
slice(rep(1:n(), 9)) %>% group_by(caseid) %>% mutate(age_edu = 1:n() + 5, # repeat rows for school years from age 6-14
edu_yr = pmin(age_edu - 5, edu))
svydes3 = svydesign(id = data.edu.0$v001, strata=data.edu.0$v023, weights = data.edu.0$v005/1000000, data=data.edu.0)
table.edu.young.0 <- as.data.frame(svytable(~age_edu+edu_yr, svydes3)) %>%
rename(age = age_edu, edu = edu_yr) %>% mutate(parity = 0)


# Option 2: For 0 parity under age 15, create data frame for education for girls age 6-15 from household survey
data.h <- data.raw.h %>%
select(starts_with("hv103"), starts_with("hv105"), starts_with("hv104"), starts_with("hv109"),
starts_with("hhid"), starts_with("hv001"), starts_with("hv023"), starts_with("hv005")) %>%
gather(var, val, -c(hhid, hv001, hv023, hv005)) %>% mutate(num = substr(var,7,9), var = substr(var,1,5)) %>% spread(var, val) %>%
filter(hv103 == 1 & hv105 %in% c(6:14) & hv104 == 2) %>% # de facto hh member, age 6-15, female
rename(edu = hv109, age = hv105) %>%
mutate(parity = 0) # assume girls under age 15 are parity 0
svydes4 = svydesign(id = data.h$hv001, strata=data.h$hv023, weights = data.h$hv005/1000000, data=data.h)
table.edu.hh <- as.data.frame(svytable(~age+edu+parity, svydes4))

# Compare options 1 and 2
compare <- table.edu.young.0 %>%
left_join(table.edu.hh %>%
rename(Freq.hh = Freq) %>% mutate( parity = as.numeric(as.character(parity)))) %>%
mutate(edu = as.numeric(as.character(edu)),
age = as.numeric(as.character(age))) %>%
group_by(age, parity) %>% arrange(-edu) %>%
mutate(cum.percent = cumsum(Freq)/sum(Freq), cum.percent.hh = cumsum(ifelse(is.na(Freq.hh), 0, Freq.hh))/sum(Freq.hh, na.rm = T), dif = cum.percent - cum.percent.hh)
# There are some significant differences between these two approaches, for now use hh roster

# combine age group tables
table.edu <- table.edu.ind %>%
bind_rows(table.edu.young) %>%
bind_rows(table.edu.hh) %>%
mutate(edu = as.numeric(as.character(edu)),
age = as.numeric(as.character(age))) %>%
group_by(age, parity) %>% arrange(-edu) %>%
mutate(total = sum(Freq), sum = cumsum(Freq), cum.percent = sum/total) %>% # percentage with each year of education in the age/parity group
select(-total, -sum, -Freq)
# write.csv(table.edu, "fpsim/locations/kenya/education.csv", row.names = F)
# cum.percent is the percentage in the age/parity group with that year or more of education

# Visualize
table.edu %>%
filter(!is.na(cum.percent)) %>%
filter(edu >0) %>%
ggplot() +
#geom_point(aes(y = cum.percent, x = age, color = parity)) +
geom_smooth(aes(y = cum.percent, x = age, color = parity)) +
ylim(0,1) + ylab("prob of x+ yrs of edu") +
theme_bw(base_size = 13) +
theme(strip.background = element_rect(fill = "white")) +
facet_wrap(~edu, labeller = "label_both")

# -- Empowerment -- #

# Table of the three empowerment outcomes by age
table.emp <- as.data.frame(svyby(~paidwork, ~age, svydes1, svymean)) %>% rename(paidwork.se = se) %>%
left_join(as.data.frame(svyby(~decisionwages, ~age, svydes1, svymean, na.rm = T)) %>% rename(decisionwages.se = se)) %>%
left_join(as.data.frame(svyby(~refusesex, ~age, svydes1, svymean, na.rm = T)) %>% rename(refusesex.se = se))
# write.csv(table.emp, "fpsim/locations/kenya/empowerment.csv", row.names = F)

# -- urban/rural -- #

table.urban <- as.data.frame(svymean(~urban, svydes1)) %>% rename(urban.se = urban)
# write.csv(table.urban, "fpsim/locations/kenya/urban.csv", row.names = F)

# -- age at partnership -- #

table.partner <- as.data.frame(svytable(~age_partner, svydes1)) %>%
mutate(percent = Freq/sum(Freq)) %>% select(-Freq)
# write.csv(table.partner, "fpsim/locations/kenya/age_partnership.csv", row.names = F)