In [1]:
from risk import Group, fill_tags
import pandas as pd
import json
import os

In [2]:
def build_population(sel_pop):
    pop = list()

    for _, row in sel_pop.iterrows():
        age = row.Age
        pop.append(Group(age, 'm', row['PopMale']))
        pop.append(Group(age, 'f', row['PopFemale']))

    return pop    

In [3]:
gho_risk = pd.read_csv('data/gho_risk.csv')
wpp_pop = pd.read_csv('data/wpp_pop.csv')

In [4]:
gho_risk.head()

Unnamed: 0,Country,Und_all_a,HIV_1549_a,Smoking_15_f,Smoking_15_m,Dia_18_f,Dia_18_m,Alc_15_f,Alc_15_m
0,Brazil,0.025,0.005,0.095,0.17,0.087,0.078,0.016,0.069
1,India,0.14,0.002,0.014,0.22,0.083,0.091,0.005,0.091
2,Kenya,0.23,0.045,0.01,0.2,0.062,0.058,0.009,0.071
3,South Africa,0.057,0.19,0.071,0.34,0.13,0.097,0.018,0.12


In [5]:
gho_risk = {k: dict(v) for k, v in gho_risk.set_index('Country').iterrows()}

In [6]:
wpp_pop.head()

Unnamed: 0,iso3,iso2,Country,Year,Age,PopMale,PopFemale,PopTotal
0,KEN,KE,Kenya,2020,0,707808.0,697235.0,1405043.0
1,KEN,KE,Kenya,2020,1,695585.0,686771.0,1382357.0
2,KEN,KE,Kenya,2020,2,689356.0,681644.0,1371000.0
3,KEN,KE,Kenya,2020,3,687634.0,680136.0,1367771.0
4,KEN,KE,Kenya,2020,4,681460.0,675277.0,1356736.0


### Example run

In [7]:
country = 'India'

In [8]:
sel_risk = gho_risk[country]

sel_risk = [
    ('Und_all_a', 'Und', sel_risk['Und_all_a'], lambda gp: True), 
    ('HIV_1549_a', 'HIV', sel_risk['HIV_1549_a'], lambda gp: (50 > gp.Age >= 15)), 
    ('Smoking_15_f', 'Smk', sel_risk['Smoking_15_f'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'f')), 
    ('Smoking_15_m', 'Smk', sel_risk['Smoking_15_m'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'm')), 
    ('Dia_18_f', 'Dia', sel_risk['Dia_18_f'], lambda gp: (gp.Age >= 18) & (gp.Sex == 'f')), 
    ('Dia_18_m', 'Dia', sel_risk['Dia_18_m'], lambda gp: (gp.Age >= 18) & (gp.Sex == 'm')), 
    ('Alc_15_f', 'Alc', sel_risk['Alc_15_f'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'f')), 
    ('Alc_15_m', 'Alc', sel_risk['Alc_15_m'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'm'))
]


sel_risk

[('Und_all_a', 'Und', 0.14, <function __main__.<lambda>(gp)>),
 ('HIV_1549_a', 'HIV', 0.002, <function __main__.<lambda>(gp)>),
 ('Smoking_15_f', 'Smk', 0.0139999999999999, <function __main__.<lambda>(gp)>),
 ('Smoking_15_m', 'Smk', 0.22, <function __main__.<lambda>(gp)>),
 ('Dia_18_f', 'Dia', 0.083, <function __main__.<lambda>(gp)>),
 ('Dia_18_m', 'Dia', 0.091, <function __main__.<lambda>(gp)>),
 ('Alc_15_f', 'Alc', 0.005, <function __main__.<lambda>(gp)>),
 ('Alc_15_m', 'Alc', 0.091, <function __main__.<lambda>(gp)>)]

In [9]:
sel_pop = wpp_pop[wpp_pop.Country == country]
sel_pop = sel_pop[sel_pop.Year == 2021]
sel_pop

Unnamed: 0,iso3,iso2,Country,Year,Age,PopMale,PopFemale,PopTotal
505,IND,IN,India,2021,0,11722644.0,10877951.0,22600595.0
506,IND,IN,India,2021,1,11785525.0,10901323.0,22686847.0
507,IND,IN,India,2021,2,12010312.0,11083779.0,23094091.0
508,IND,IN,India,2021,3,12157120.0,11191548.0,23348668.0
509,IND,IN,India,2021,4,12291570.0,11285654.0,23577224.0
...,...,...,...,...,...,...,...,...
601,IND,IN,India,2021,96,30832.0,45422.0,76254.0
602,IND,IN,India,2021,97,21230.0,31497.0,52727.0
603,IND,IN,India,2021,98,14306.0,21346.0,35653.0
604,IND,IN,India,2021,99,9464.0,14163.0,23627.0


In [10]:
pop0 = build_population(sel_pop)

In [11]:
def print_pop(pop):
    print('N:', sum([gp.N for gp in pop]))
    
    for key, tag, p_targets, fil in sel_risk:
        eligible = [gp for gp in pop if fil(gp)]
        n_eligible = sum([gp.N for gp in eligible])
        n_risk = sum([gp.N for gp in eligible if tag in gp.Tags])
    
        print('--', key,  tag, f'Targets: {p_targets: .1%}, Filled: {n_risk / n_eligible:.1%}')
    

In [12]:
pop_independent = fill_tags(pop0, sel_risk, fn_rank = lambda gp: 1)

print_pop(pop_independent)

N: 1407563806.7608922
-- Und_all_a Und Targets:  14.0%, Filled: 14.0%
-- HIV_1549_a HIV Targets:  0.2%, Filled: 0.2%
-- Smoking_15_f Smk Targets:  1.4%, Filled: 1.4%
-- Smoking_15_m Smk Targets:  22.0%, Filled: 22.0%
-- Dia_18_f Dia Targets:  8.3%, Filled: 8.3%
-- Dia_18_m Dia Targets:  9.1%, Filled: 9.1%
-- Alc_15_f Alc Targets:  0.5%, Filled: 0.5%
-- Alc_15_m Alc Targets:  9.1%, Filled: 9.1%


In [13]:
pop_exclusive = fill_tags(pop0, sel_risk, fn_rank = lambda gp: - len(gp.Tags))

print_pop(pop_exclusive)

N: 1407563845.0
-- Und_all_a Und Targets:  14.0%, Filled: 14.0%
-- HIV_1549_a HIV Targets:  0.2%, Filled: 0.2%
-- Smoking_15_f Smk Targets:  1.4%, Filled: 1.4%
-- Smoking_15_m Smk Targets:  22.0%, Filled: 22.0%
-- Dia_18_f Dia Targets:  8.3%, Filled: 8.3%
-- Dia_18_m Dia Targets:  9.1%, Filled: 9.1%
-- Alc_15_f Alc Targets:  0.5%, Filled: 0.5%
-- Alc_15_m Alc Targets:  9.1%, Filled: 9.1%


In [14]:
pop_cluster = fill_tags(pop0, sel_risk, fn_rank = lambda gp: len(gp.Tags))

print_pop(pop_cluster)

N: 1407563845.0000002
-- Und_all_a Und Targets:  14.0%, Filled: 14.0%
-- HIV_1549_a HIV Targets:  0.2%, Filled: 0.2%
-- Smoking_15_f Smk Targets:  1.4%, Filled: 1.4%
-- Smoking_15_m Smk Targets:  22.0%, Filled: 22.0%
-- Dia_18_f Dia Targets:  8.3%, Filled: 8.3%
-- Dia_18_m Dia Targets:  9.1%, Filled: 9.1%
-- Alc_15_f Alc Targets:  0.5%, Filled: 0.5%
-- Alc_15_m Alc Targets:  9.1%, Filled: 9.1%


In [15]:

n00, n01, n10, n11 = 0, 0, 0, 0
tag1, tag2 = 'Dia', 'Smk'
for gp in pop_cluster:
    if tag1 in gp.Tags:
        if tag2 in gp.Tags:
            n11 += gp.N
        else:
            n10 += gp.N
    else:
        if tag2 in gp.Tags:
            n01 += gp.N
        else:
            n00 += gp.N

print(True, True, n11)
print(True, False, n10)
print(False, True, n01)
print(False, False, n00)

True True 51857679.348399654
True False 32582194.927600324
False True 73536916.1256002
False False 1249587054.5983994


#### Run

In [16]:
countries = list(gho_risk.keys())
countries

['Brazil', 'India', 'Kenya', 'South Africa']

In [17]:
for country in countries:
    os.makedirs(f'data/{country}', exist_ok=True)
    
    sel_risk = gho_risk[country]
    sel_risk = [
        ('Und_all_a', 'Und', sel_risk['Und_all_a'], lambda gp: True), 
        ('HIV_1549_a', 'HIV', sel_risk['HIV_1549_a'], lambda gp: (50 > gp.Age >= 15)), 
        ('Smoking_15_f', 'Smk', sel_risk['Smoking_15_f'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'f')), 
        ('Smoking_15_m', 'Smk', sel_risk['Smoking_15_m'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'm')), 
        ('Dia_18_f', 'Dia', sel_risk['Dia_18_f'], lambda gp: (gp.Age >= 18) & (gp.Sex == 'f')), 
        ('Dia_18_m', 'Dia', sel_risk['Dia_18_m'], lambda gp: (gp.Age >= 18) & (gp.Sex == 'm')), 
        ('Alc_15_f', 'Alc', sel_risk['Alc_15_f'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'f')), 
        ('Alc_15_m', 'Alc', sel_risk['Alc_15_m'], lambda gp: (gp.Age >= 15) & (gp.Sex == 'm'))
    ]
    
    
    sel_pop = wpp_pop[wpp_pop.Country == country]
    sel_pop = sel_pop[sel_pop.Year == 2021]
    pop0 = build_population(sel_pop)

    js = [gp.to_dict() for gp in pop0]
    pd.DataFrame(js).fillna(0).to_csv(f'data/{country}/pop.csv')
    json.dump(js, open(f'data/{country}/pop.json', 'w'))
    
    js = [gp.to_dict() for gp in pop_independent]
    pd.DataFrame(js).fillna(0).to_csv(f'data/{country}/pop_independent.csv')
    json.dump(js, open(f'data/{country}/pop_independent.json', 'w'))
    
    pop_independent = fill_tags(pop0, sel_risk, fn_rank = lambda gp: 1)
    js = [gp.to_dict() for gp in pop_independent]
    pd.DataFrame(js).fillna(0).to_csv(f'data/{country}/pop_independent.csv')
    json.dump(js, open(f'data/{country}/pop_independent.json', 'w'))
    
    pop_exclusive = fill_tags(pop0, sel_risk, fn_rank = lambda gp: - len(gp.Tags))
    js = [gp.to_dict() for gp in pop_exclusive]
    pd.DataFrame(js).fillna(0).to_csv(f'data/{country}/pop_exclusive.csv')
    json.dump(js, open(f'data/{country}/pop_exclusive.json', 'w'))
    
    pop_cluster = fill_tags(pop0, sel_risk, fn_rank = lambda gp: len(gp.Tags))
    js = [gp.to_dict() for gp in pop_cluster]
    pd.DataFrame(js).fillna(0).to_csv(f'data/pop_cluster.csv')
    json.dump(js, open(f'data/{country}/pop_cluster.json', 'w'))

In [18]:
os.makedirs(f'data/{country}', exist_ok=True)