In [4]:
import numpy as np
import pandas as pd

In [5]:
NTA_ID = 'BK0101'

In [81]:
social_file = '../nta/soc_2021acs5yr_nta.xlsx'

social_df = pd.read_excel(social_file)

nta_social_df = social_df[social_df['GeoID'] == NTA_ID]

In [79]:
def compute_stats(nta_df, attr, estimation_vars=['E', 'M', 'C', 'P', 'Z'], filter=True):
    if filter:
        filter_vars = ['E', 'P']
        estimation_vars = filter_vars
    stats = [nta_df['{}{}'.format(attr, var)].values for var in estimation_vars]
    
    return stats

def merge_nta_stats(nta_df, attr_vals):
    '''estimate and percentage'''
    args = [compute_stats(nta_df, attr, filter=True) for attr in attr_vals]
    ret_e, ret_p = 0, 0
    for val in args:
        e, p = val
        ret_e += e
        ret_p += p
    
    return [ret_e, ret_p]

In [6]:
nta_households_file = '../generate_data/all_nta_households.npy'
nta_agents_file = '../generate_data/all_nta_agents.npy'

nta_households_data = np.load(nta_households_file, allow_pickle=True).item()
nta_agents_data = np.load(nta_agents_file, allow_pickle=True).item()

In [7]:
nta_metadata = np.load('nta_metadata.npy', allow_pickle=True).item()
nta_county = nta_metadata[NTA_ID][0]

county_level_household_sizes = '../data/metadata/360{}/agents_household_sizes.csv'.format(nta_county)
size_df = pd.read_csv(county_level_household_sizes)

In [20]:
household_sizes_prob = dict(zip(size_df.HouseholdSize, size_df.Number / sum(size_df.Number)))
household_sizes_list = size_df.HouseholdSize
household_sizes_prob_list = size_df.Number / sum(size_df.Number)

In [10]:
num_households_nta = nta_households_data[NTA_ID]['num_households']
num_agents_nta = nta_agents_data[NTA_ID]['num_agents']

In [18]:
def nta_agents_to_household(
    households_sizes_list,
    households_sizes_prob_list,
    num_agents, num_households, agents_idx):
    '''Assigns household index to agents based on the household size distribution
        Household and precinct should not be independent, 
        so here we also assign a precinct to the household
    '''

    children, male_adults, female_adults, elderly = agents_by_age_cat

    household_id = 0
    total_agents_unassigned = num_agents
    agent_households = []
    household_agents = []
    last_agent_id = 0
    while total_agents_unassigned > 0:
        household_size = np.random.choice(households_sizes_list, 
                            p=households_sizes_prob_list)
        if (household_size > total_agents_unassigned):
            household_size = total_agents_unassigned

        if household_size == 1:
            '''elderly with some p11 and adult with (1-p11)'''
            elderly_prob = prob_list['single_elderly_alone']
            
        
        elif household_size == 2:
            '''married adult with p21; male_adult + child with p22, female_adult + child with p23; random with (1 - p21, p22, p23)'''
        
        elif household_size == 3:
            '''married adult with child assign with p4'''
        
        elif household_size == 4:
            pass
            
        elif household_size > 4:
            '''family with prob p1; random set with (1-p1)'''


        agent_households.extend([household_id]*household_size)
        household_id += 1
        total_agents_unassigned -= household_size
        household_agents.append(list(range(last_agent_id, last_agent_id+household_size)))
        last_agent_id += household_size

    print("Assigned households: ", len(household_agents), num_households)

    return agent_households, household_agents

'''
To populate a household:
1. filter agents by age [children, adult, elderly]
3.  Sample num agents
    - if size 1: elderly with some p11 and adult with (1-p11)
    - if size 2: married adult with p21; male_adult + child with p22, female_adult + child with p23, adult + elderly p24
    - if size 3: married adult with child assign
    - if size 4: married adult with child
    - if size 5: married adult with children with p51; married adult with children and elderly p52
    - if size 6: married adult with children and elderly with 
Sample household race and num_agents.

'''

In [23]:
agent_households, household_agents = nta_agents_to_household(household_sizes_list, household_sizes_prob_list, num_agents_nta, num_households_nta)

Assigned households:  14798 [17487]


In [64]:
def assing_agent_properties(nta_agents_data):
    num_agents = nta_agents_data['num_agents']
    age_gender_prob_list = nta_agents_data['age_gender_prob']
    race_prob_list = nta_agents_data['race_prob']

def assign_age_gender_to_agents(nta_agents_data):
    num_agents = nta_agents_data['num_agents']
    age_gender_prob_list = nta_agents_data['age_gender_prob']
    age_gender_prob_list = np.array(age_gender_prob_list)/sum(age_gender_prob_list)

    agent_age_gender_list = np.random.choice(len(age_gender_prob_list), p=age_gender_prob_list, size=num_agents)
    
    return agent_age_gender_list

In [69]:
agent_age_gender_list = assign_age_gender_to_agents(nta_agents_data[NTA_ID])

agent_age_gender_list[:10]

array([ 1,  7,  1,  3,  7,  2,  0, 11,  0,  2])

In [75]:
print(np.unique(agent_age_gender_list, return_counts=True))

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), array([2189, 2657, 2931, 3765, 5817, 6035, 2493, 2953, 2355, 2413, 1459,
       2451]))


In [50]:
def fix_inconsistent_array(data_array):
    for ix in range(len(data_array)):
        try:
            print(data_array[ix].dtype)
            data_array[ix] = data_array[ix].item()
        except:
            continue
    
    return data_array

nta_agents_data[NTA_ID]['age_gender_prob'] = fix_inconsistent_array(nta_agents_data[NTA_ID]['age_gender_prob'])

In [84]:
total_households = compute_stats(nta_social_df, 'HH1')[0]
print(total_households)
# alone households

single_person_households = compute_stats(nta_social_df, 'NFamA')[0]

elderly_living_alone = compute_stats(nta_social_df, 'NFamA65pl')[0]
non_elderly_living_alone = single_person_households - elderly_living_alone

single_household_elderly_prob = elderly_living_alone / single_person_households

single_household_elderly_prob

[17487]


array([0.26937669])

In [33]:
age_mapping = {'U19': ['PopU5', 'Pop5t9', 'Pop10t14', 'Pop15t19'], '20t29': ['Pop20t24', 'Pop25t29'],  
            '30t39': ['Pop30t34', 'Pop35t39'], '40t49': ['Pop40t44', 'Pop45t49'], 
            '50t64': ['Pop50t54', 'Pop55t59', 'Pop60t64'], '65A': ['Pop65t69', 'Pop70t74', 'Pop75t79','Pop80t84', 'Pop85pl']}

age_gender_mapping_keys = []
for key in age_mapping.keys():
    age_gender_mapping_keys.append(key+'_male')
    age_gender_mapping_keys.append(key+'_female')

print(age_gender_mapping_keys), print(len(age_gender_mapping_keys))

['U19_male', 'U19_female', '20t29_male', '20t29_female', '30t39_male', '30t39_female', '40t49_male', '40t49_female', '50t64_male', '50t64_female', '65A_male', '65A_female']
12


(None, None)