In [112]:
import pandas as pd
import numpy as np

Read SPEW synthesis data

In [113]:
data_dir = '../spew_data'
data_prex = '2010_ver1_45079_synth_'
pdata_file = data_dir + '/' + data_prex + 'people.txt'
hdata_file = data_dir + '/' + data_prex + 'households.txt'

In [114]:
pdata = pd.read_csv(pdata_file, dtype = {'sp_id': object, 'sp_hh_id': object})
hdata = pd.read_csv(hdata_file, dtype = {'sp_id': object})

In [115]:
pdata.columns

Index(['sp_id', 'sp_hh_id', 'serialno', 'stcotrbg', 'age', 'sex', 'race',
       'sporder', 'relate', 'sp_school_id', 'sp_work_id'],
      dtype='object')

In [116]:
hdata.columns

Index(['sp_id', 'serialno', 'stcotrbg', 'hh_race', 'hh_income', 'hh_size',
       'hh_age', 'latitude', 'longitude'],
      dtype='object')

In [117]:
pdata = pdata.drop(columns=['serialno', 'stcotrbg', 'sp_school_id', 'sp_work_id'])

In [118]:
hdata = hdata[['sp_id', 'hh_income']]

In [119]:
phdata = pd.merge(pdata, hdata, left_on=['sp_hh_id'], right_on = ['sp_id'])

In [120]:
phdata

Unnamed: 0,sp_id_x,sp_hh_id,age,sex,race,sporder,relate,sp_id_y,hh_income
0,164099532,48930706,49,2,2,1,0,48930706,115000
1,164099533,48930706,57,1,2,2,1,48930706,115000
2,164099534,48954569,49,2,2,1,0,48954569,115000
3,164099535,48954569,57,1,2,2,1,48954569,115000
4,164099538,48964134,49,2,2,1,0,48964134,115000
...,...,...,...,...,...,...,...,...,...
352053,164066165,48841329,50,1,1,1,0,48841329,90000
352054,164066169,48822193,50,1,1,1,0,48822193,90000
352055,164066170,48815049,50,1,1,1,0,48815049,90000
352056,164066171,48811092,50,1,1,1,0,48811092,90000


In [121]:
cdn_data = phdata[phdata['age'] < 18]

In [122]:
cdn_data

Unnamed: 0,sp_id_x,sp_hh_id,age,sex,race,sporder,relate,sp_id_y,hh_income
102,164426008,48996117,4,1,2,3,2,48996117,97000
103,164426009,48996117,0,1,2,4,2,48996117,97000
106,164426012,49005231,4,1,2,3,2,49005231,97000
107,164426013,49005231,0,1,2,4,2,49005231,97000
110,164426016,48930748,4,1,2,3,2,48930748,97000
...,...,...,...,...,...,...,...,...,...
352036,163958884,48979669,3,1,1,2,2,48979669,42000
352038,163971928,48840456,5,1,2,2,2,48840456,46420
352041,163971943,48804862,5,1,2,2,2,48804862,46420
352044,163971964,48836724,5,1,2,2,2,48836724,46420


In [123]:
cdn_data.keys()

Index(['sp_id_x', 'sp_hh_id', 'age', 'sex', 'race', 'sporder', 'relate',
       'sp_id_y', 'hh_income'],
      dtype='object')

In [124]:
from mesa import Agent, Model
from mesa.time import RandomActivation
from mesa.datacollection import DataCollector

BRFSS distribution of ACEs according to house hold income

In [125]:
race_list

{1: 'White', 2: 'Black', 3: 'Hispanic', 4: 'Other', 5: 'Multi'}

In [126]:
income_list

{1: '< 15000',
 2: '15000 - 24999',
 3: '25000 - 34999',
 4: '35000 - 49999',
 5: '50000 +',
 9: "Don't Know"}

In [127]:
ace_list

{1: 'depress',
 2: 'alcoabuse',
 3: 'drugabuse',
 4: 'prison',
 5: 'patdivorce',
 6: 'phyabuse1',
 7: 'phyabuse2',
 8: 'verbalabuse',
 9: 'sexabuse1',
 10: 'sexabuse2',
 11: 'sexabuse3'}

In [128]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Proportion,Standard Error,L 95% CI,U 95% CI
alcoabuse,Black,15000 - 24999,0.218512,0.010002,0.198909,0.238115
alcoabuse,Black,25000 - 34999,0.208817,0.013844,0.181683,0.235951
alcoabuse,Black,35000 - 49999,0.209190,0.014143,0.181469,0.236910
alcoabuse,Black,50000 +,0.207373,0.011236,0.185351,0.229395
alcoabuse,Black,< 15000,0.219140,0.010893,0.197789,0.240491
...,...,...,...,...,...,...
verbalabuse,White,25000 - 34999,0.267847,0.010418,0.247429,0.288265
verbalabuse,White,35000 - 49999,0.274701,0.008635,0.257776,0.291625
verbalabuse,White,50000 +,0.276256,0.004956,0.266543,0.285969
verbalabuse,White,< 15000,0.345848,0.012781,0.320799,0.370898


In [129]:
df = df.sort_index(); df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Proportion,Standard Error,L 95% CI,U 95% CI
alcoabuse,Black,15000 - 24999,0.218512,0.010002,0.198909,0.238115
alcoabuse,Black,25000 - 34999,0.208817,0.013844,0.181683,0.235951
alcoabuse,Black,35000 - 49999,0.209190,0.014143,0.181469,0.236910
alcoabuse,Black,50000 +,0.207373,0.011236,0.185351,0.229395
alcoabuse,Black,< 15000,0.219140,0.010893,0.197789,0.240491
...,...,...,...,...,...,...
verbalabuse,White,25000 - 34999,0.267847,0.010418,0.247429,0.288265
verbalabuse,White,35000 - 49999,0.274701,0.008635,0.257776,0.291625
verbalabuse,White,50000 +,0.276256,0.004956,0.266543,0.285969
verbalabuse,White,< 15000,0.345848,0.012781,0.320799,0.370898


In [130]:
income_cat = [0, 15000, 24999, 34999, 49999]

race_cat = [1,2,3,4,5]
ace_dist = {1:[0.2117052, 0.1637898, 0.1467698, 0.1533333, 0.1418293, 0.1085915], 
            2:[0.0834492, 0.0799530, 0.0733411, 0.0677146, 0.0846154, 0.0606601], 
            3:[0.1746032, 0.0763359, 0.1388889, 0.0909091, 0.1901408, 0.2162162], 
            4:[0.2500000, 0.1785714, 0.2033898, 0.1836735, 0.0958904, 0.1282051], 
            5:[0.2638889, 0.2857143, 0.3529412, 0.1785714, 0.2692308, 0.2266667]}

def cat_income(hh_income):
    if pd.isnull(hh_income):
        return 9
    if hh_income >= income_cat[0] and hh_income < income_cat[1]:
        return 1
    if hh_income >= income_cat[1] and hh_income < income_cat[2]:
        return 2
    if hh_income >= income_cat[2] and hh_income < income_cat[3]:
        return 3
    if hh_income >= income_cat[3] and hh_income < income_cat[4]:
        return 4
    if hh_income >= income_cat[4]:
        return 5

def cat_race(chd_race):
    if pd.isnull(chd_race):
        return 4
    if chd_race == 1:
        return 1
    if chd_race == 2:
        return 2
    if chd_race == 9:
        return 5
    return 4
    

In [131]:
def output_model(model):
    # agents = [a.output() 
    #          for i, sch_list in model.sch_group.items()
    #          for scheduler in sch_list 
    #          for a in scheduler.agents ]
    agents = [a.output() for a in model.schedule.agents]
    return pd.DataFrame(agents, columns=['id', 'age', 'race', 'income'] + list(ace_list.values()))
    # return agents

In [155]:
class Children(Agent):
            
    def __init__(self, chd_info, model, pos = None):
        unique_id = chd_info['sp_id_x']
        super().__init__(unique_id, model)
        self.pos = pos
        # self.ace = 0
        self.age = chd_info['age']
        self.sex = chd_info['sex']
        self.income = chd_info['hh_income']
        self.race = chd_info['race']
        self.aces = {key:0 for key in ace_list.keys()}
        
    def step(self):
        r_code = race_list[cat_race(self.race)]
        i_code = income_list[cat_income(self.income)]
        dist_df = df.loc[list(ace_list.values()), r_code, i_code]
        for ace_key in ace_list.keys():
            p_aces = self.model.random.random()
            dist_cat = dist_df.loc[ace_list[ace_key], r_code, i_code]['Proportion']
            if p_aces < dist_cat:
                self.aces[ace_key] = 1
            else:
                self.aces[ace_key] = 0
            
    def get_cat(self):
        return cat_race(self.race), cat_income(self.income)
    
    def output(self):
        return [self.unique_id, self.age, self.race, self.income, ] + list(self.aces.values())

In [156]:
class AceModel(Model):
    def __init__(self, chd_data):
        self.num_agents = len(chd_data)
        self.schedule = RandomActivation(self)
        self.sch_group = {x: [RandomActivation(self) 
                              for i in range(len(income_cat) + 1)] 
                          for x in race_cat}
        
        for i,chd_info in chd_data.iterrows():
            a = Children(chd_info, self)
            cat_r, cat_i = a.get_cat()
            self.schedule.add(a)
            self.sch_group[cat_r][cat_i].add(a)
            
        self.datacollector = DataCollector(
            model_reporters = {'Output': output_model}            
        )
        
    def step(self):        
        for i, sch_list in self.sch_group.items():
            for scheduler in sch_list:
                self.reset_randomizer()
                scheduler.step()
        self.datacollector.collect(self)
        # self.schedule.step()

In [157]:
acemodel = AceModel(cdn_data)

In [158]:
acemodel.step()

In [159]:
res =acemodel.datacollector.model_vars

In [160]:
res = res['Output']; res

[              id  age  race  income  depress  alcoabuse  drugabuse  prison  \
 0      164426008    4     2   97000        0          1          0       0   
 1      164426009    0     2   97000        0          1          0       0   
 2      164426012    4     2   97000        0          0          0       0   
 3      164426013    0     2   97000        0          1          0       0   
 4      164426016    4     2   97000        0          1          0       0   
 ...          ...  ...   ...     ...      ...        ...        ...     ...   
 90309  163958884    3     1   42000        0          0          0       0   
 90310  163971928    5     2   46420        0          1          0       0   
 90311  163971943    5     2   46420        0          1          1       0   
 90312  163971964    5     2   46420        0          0          0       0   
 90313  163971973    5     2   46420        0          0          0       0   
 
        patdivorce  phyabuse1  phyabuse2  verbalab

In [161]:
test = res[-1]; test

Unnamed: 0,id,age,race,income,depress,alcoabuse,drugabuse,prison,patdivorce,phyabuse1,phyabuse2,verbalabuse,sexabuse1,sexabuse2,sexabuse3
0,164426008,4,2,97000,0,1,0,0,1,0,0,0,0,0,0
1,164426009,0,2,97000,0,1,0,0,0,0,1,0,0,0,0
2,164426012,4,2,97000,0,0,0,0,0,0,0,1,0,0,0
3,164426013,0,2,97000,0,1,0,0,0,1,0,0,0,0,0
4,164426016,4,2,97000,0,1,0,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90309,163958884,3,1,42000,0,0,0,0,0,0,0,0,0,0,0
90310,163971928,5,2,46420,0,1,0,0,0,0,0,0,0,0,0
90311,163971943,5,2,46420,0,1,1,0,0,1,0,0,0,0,0
90312,163971964,5,2,46420,0,0,0,0,0,0,0,1,0,0,0


In [162]:
test['race'] = test['race'].map(cat_race)
test['race'].unique()

array([2, 1, 4, 5])

In [163]:
test['income'] = test['income'].map(cat_income)
test['income'].unique()

array([5, 1, 4, 3, 2])

In [164]:
test

Unnamed: 0,id,age,race,income,depress,alcoabuse,drugabuse,prison,patdivorce,phyabuse1,phyabuse2,verbalabuse,sexabuse1,sexabuse2,sexabuse3
0,164426008,4,2,5,0,1,0,0,1,0,0,0,0,0,0
1,164426009,0,2,5,0,1,0,0,0,0,1,0,0,0,0
2,164426012,4,2,5,0,0,0,0,0,0,0,1,0,0,0
3,164426013,0,2,5,0,1,0,0,0,1,0,0,0,0,0
4,164426016,4,2,5,0,1,0,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90309,163958884,3,1,4,0,0,0,0,0,0,0,0,0,0,0
90310,163971928,5,2,4,0,1,0,0,0,0,0,0,0,0,0
90311,163971943,5,2,4,0,1,1,0,0,1,0,0,0,0,0
90312,163971964,5,2,4,0,0,0,0,0,0,0,1,0,0,0


In [None]:
test_index = [[], [], []]
test_dist = []
for ace in ace_list.values():
    for r in race_list.keys():
        for i in income_list.keys():
            gpdata = test[(test[['_RACE_G1', '_INCOMG']] == [r,i]).all(axis=1)]
            # print([r,i], len(gpdata), get_dist(gpdata, ace))
            test_dist.append(get_dist(gpdata, ace))
            test_index[0].append(ace)
            test_index[1].append(race_list[r])
            test_index[2].append(income_list[i])
# print(res_index)

In [None]:
test_df = pd.DataFrame(test_dist, columns=['Proportion', 'Standard Error', 
                                     'L 95% CI', 'U 95% CI'], index=test_index)
test_df

In [None]:
test_df['depress']