In [1]:
import pandas as pd
from scipy.optimize import least_squares

import json

In [2]:
fil_agp = {
    '0-4' : lambda x: x['Age'] < 5,
    '5-14' : lambda x: 5 <= x['Age'] < 15,
    '0-14' : lambda x: x['Age'] < 15,
    '15-24' : lambda x: 15 <= x['Age'] < 25,
    '25-34' : lambda x: 25 <= x['Age'] < 35,
    '35-44' : lambda x: 35 <= x['Age'] < 45,
    '45-54' : lambda x: 45 <= x['Age'] < 55,
    '55-64' : lambda x: 55 <= x['Age'] < 65,
    '65plus' : lambda x: 65 <= x['Age'],
    '15plus' : lambda x: 15 <= x['Age'],
    '18plus' : lambda x: 18 <= x['Age'],
    'all' : lambda x: True
}

fil_sex = {
    'f': lambda x: x['Sex'] == 'f',
    'm': lambda x: x['Sex'] == 'm',
    'a': lambda x: True,
}

fil_risk = {
    'und': lambda x: 'Und' in x,
    'hiv': lambda x: 'HIV' in x,
    'dia': lambda x: 'Dia' in x,
    'alc': lambda x: 'Alc' in x,
    'smk': lambda x: 'Smk' in x,
    'all': lambda x: True
}

In [3]:
who_inc = pd.read_csv('data/who_inc.csv')
who_inc

Unnamed: 0,Country,iso2,iso3,iso_numeric,year,measure,unit,age_group,sex,risk_factor,M,L,U
0,Brazil,BR,BRA,76,2021,inc,num,0-14,a,all,5100,4300,5900
1,Brazil,BR,BRA,76,2021,inc,num,0-14,f,all,2500,2000,3000
2,Brazil,BR,BRA,76,2021,inc,num,0-14,m,all,2600,2100,3200
3,Brazil,BR,BRA,76,2021,inc,num,0-4,f,all,1100,570,1700
4,Brazil,BR,BRA,76,2021,inc,num,0-4,m,all,1300,680,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,South Africa,ZA,ZAF,710,2021,inc,num,all,a,all,304000,197000,412000
140,South Africa,ZA,ZAF,710,2021,inc,num,all,a,hiv,145000,69000,251000
141,South Africa,ZA,ZAF,710,2021,inc,num,all,a,und,38000,26000,52000
142,South Africa,ZA,ZAF,710,2021,inc,num,all,f,all,124000,63000,185000


In [4]:
countries = set(who_inc.Country)
countries

{'Brazil', 'India', 'Kenya', 'South Africa'}

### Test

In [5]:
country = 'India'
sc = 'independent'

sel_inc = who_inc[who_inc.Country == country]

rows = [dict(row) for _, row in sel_inc[['age_group', 'sex', 'risk_factor', 'M', 'L', 'U']].iterrows()]

pop = json.load(open(f'data/pop_{country}_{sc}.json', 'r'))
        
binded = list()

for row in rows:
    fa, fs, fr = fil_agp[row['age_group']], fil_sex[row['sex']], fil_risk[row['risk_factor']]
    d = dict(row)
    filtered = [gp for gp in pop if fa(gp) and fs(gp) and fr(gp)]
    d['N'] = n = sum([gp['N'] for gp in filtered])
    d['Dia'] = sum([gp['N'] for gp in filtered if 'Dia' in gp]) / n
    d['Smk'] = sum([gp['N'] for gp in filtered if 'Smk' in gp]) / n
    d['Alc'] = sum([gp['N'] for gp in filtered if 'Alc' in gp]) / n
    d['Und'] = sum([gp['N'] for gp in filtered if 'Und' in gp]) / n
    d['HIV'] = sum([gp['N'] for gp in filtered if 'HIV' in gp]) / n
    
    d['IncR'] = d['M'] / n * 1e5
    binded.append(d)

binded = pd.DataFrame(binded)

binded.head(10)

FileNotFoundError: [Errno 2] No such file or directory: 'data/pop_India_independent.json'

### Run

In [None]:
for country in countries:
    sel_inc = who_inc[who_inc.Country == country]
    rows = [dict(row) for _, row in sel_inc[['age_group', 'sex', 'risk_factor', 'M', 'L', 'U']].iterrows()]

    pop = json.load(open(f'data/{country}/pop_{sc}.json', 'r'))
        
    binded = list()
    
    for row in rows:
        if row['risk_factor'] == 'all':
            fa, fs = fil_agp[row['age_group']], fil_sex[row['sex']]
            d = dict(row)
            filtered = [gp for gp in pop if fa(gp) and fs(gp)]
            d['N'] = n = sum([gp['N'] for gp in filtered])
            
            d['IncR'] = d['M'] / n * 1e5
            binded.append(d)
    
    binded = pd.DataFrame(binded)
    binded.to_csv(f'data/{country}/inc.csv')
    
    for sc in ['independent', 'cluster', 'exclusive']:
    
        pop = json.load(open(f'data/{country}/pop_{sc}.json', 'r'))
        
        binded = list()
        
        for row in rows:
            fa, fs, fr = fil_agp[row['age_group']], fil_sex[row['sex']], fil_risk[row['risk_factor']]
            d = dict(row)
            filtered = [gp for gp in pop if fa(gp) and fs(gp) and fr(gp)]
            d['N'] = n = sum([gp['N'] for gp in filtered])
            d['Dia'] = sum([gp['N'] for gp in filtered if 'Dia' in gp]) / n
            d['Smk'] = sum([gp['N'] for gp in filtered if 'Smk' in gp]) / n
            d['Alc'] = sum([gp['N'] for gp in filtered if 'Alc' in gp]) / n
            d['Und'] = sum([gp['N'] for gp in filtered if 'Und' in gp]) / n
            d['HIV'] = sum([gp['N'] for gp in filtered if 'HIV' in gp]) / n
            
            d['IncR'] = d['M'] / n * 1e5
            binded.append(d)
        
        binded = pd.DataFrame(binded)
        binded.to_csv(f'data/{country}/inc_{sc}.csv')