In [1]:
import numpy as np
import pandas as pd

In [3]:
econ_file_path = '../nta/econ_2021acs5yr_nta.xlsx'

econ_df = pd.read_excel(econ_file_path)
econ_df.head()

Unnamed: 0,GeoType,NTAType,GeogName,GeoID,Borough,Pop16plE,Pop16plM,Pop16plC,Pop16plP,Pop16plZ,...,Pv400t499E,Pv400t499M,Pv400t499C,Pv400t499P,Pv400t499Z,Pv500plE,Pv500plM,Pv500plC,Pv500plP,Pv500plZ
0,NTA2020,0,Greenpoint,BK0101,Brooklyn,33164,1421.0,2.6,100.0,0.0,...,4397,592.0,8.2,11.8,1.5,18983,1191.0,3.8,50.8,2.3
1,NTA2020,0,Williamsburg,BK0102,Brooklyn,52924,2087.0,2.4,100.0,0.0,...,4416,660.0,9.1,7.0,1.0,28922,1564.0,3.3,45.8,1.6
2,NTA2020,0,South Williamsburg,BK0103,Brooklyn,24719,1651.0,4.1,100.0,0.0,...,1629,477.0,17.8,3.5,1.0,3716,844.0,13.8,7.9,1.7
3,NTA2020,0,East Williamsburg,BK0104,Brooklyn,45574,1817.0,2.4,100.0,0.0,...,4949,885.0,10.9,9.3,1.6,15435,1058.0,4.2,28.9,1.5
4,NTA2020,0,Brooklyn Heights,BK0201,Brooklyn,20952,1075.0,3.1,100.0,0.0,...,1318,344.0,15.9,5.8,1.5,17619,1276.0,4.4,77.0,3.5


In [5]:
NTA_ID = 'BK0101'

nta_econ_df = econ_df[econ_df['GeoID'] == NTA_ID]
nta_econ_df.head()

Unnamed: 0,GeoType,NTAType,GeogName,GeoID,Borough,Pop16plE,Pop16plM,Pop16plC,Pop16plP,Pop16plZ,...,Pv400t499E,Pv400t499M,Pv400t499C,Pv400t499P,Pv400t499Z,Pv500plE,Pv500plM,Pv500plC,Pv500plP,Pv500plZ
0,NTA2020,0,Greenpoint,BK0101,Brooklyn,33164,1421.0,2.6,100.0,0.0,...,4397,592.0,8.2,11.8,1.5,18983,1191.0,3.8,50.8,2.3


Individual employment, occupation and health-insurance status

Household earnings, income status and social security status

In [6]:
def compute_stats(nta_df, attr, estimation_vars=['E', 'M', 'C', 'P', 'Z'], filter=True):
    if filter:
        filter_vars = ['E', 'P']
        estimation_vars = filter_vars
    stats = [nta_df['{}{}'.format(attr, var)].values for var in estimation_vars]
    
    return stats

def merge_nta_stats(nta_df, attr_vals):
    '''estimate and percentage'''
    args = [compute_stats(nta_df, attr, filter=True) for attr in attr_vals]
    ret_e, ret_p = 0, 0
    for val in args:
        e, p = val
        ret_e += e
        ret_p += p
    
    return [ret_e, ret_p]

In [20]:
total_households = compute_stats(nta_econ_df, 'HH2')
print(total_households)

household_income_list =['HHIU10', 'HHI10t14', 'HHI15t24', 'HHI25t34', 'HHI35t49', 'HHI50t74', 'HHI75t99',
                        'HI100t149', 'HI150t199', 'HHI200pl']

household_income_data = {}

cumulative_prob = 0.0
for inc_band_attr in household_income_list:
    estimate, percentage = compute_stats(nta_econ_df, inc_band_attr)
    cumulative_prob += percentage

    household_income_data[inc_band_attr] = [estimate[0], percentage[0], cumulative_prob[0]]
    #{'estimate': estimate, 'percentage': percentage, 'cumulative_percentage': cumulative_prob}

print(household_income_data)

[array([17487]), array([100.])]
{'HHIU10': [713, 4.1, 4.1], 'HHI10t14': [633, 3.6, 7.699999999999999], 'HHI15t24': [671, 3.8, 11.5], 'HHI25t34': [941, 5.4, 16.9], 'HHI35t49': [1051, 6.0, 22.9], 'HHI50t74': [1998, 11.4, 34.3], 'HHI75t99': [1910, 10.9, 45.199999999999996], 'HI100t149': [3580, 20.5, 65.69999999999999], 'HI150t199': [2404, 13.7, 79.39999999999999], 'HHI200pl': [3586, 20.5, 99.89999999999999]}


In [53]:
income_mapping = {'Band1': ['HHIU10', 'HHI10t14', 'HHI15t24'],
                'Band2': ['HHI25t34', 'HHI35t49'], 
                'Band3': ['HHI50t74', 'HHI75t99'],
                'Band4': ['HI100t149', 'HI150t199'],
                'Band5': ['HHI200pl']}

def get_nta_household_income(df, nta_id, income_mapping):
    nta_df = df[df['GeoID'] == nta_id]
    
    nta_household_income = {}

    for band in income_mapping:
        nta_household_income[band] = {}
        estimate, percentage = merge_nta_stats(nta_df, income_mapping[band])

        nta_household_income[band]['estimate'] = estimate
        nta_household_income[band]['probability'] = percentage

    return nta_household_income

In [54]:
income_vals = get_nta_household_income(econ_df, 'BK0101', income_mapping)

print(income_vals)
print(sum([income_vals[ix][0] for ix in income_vals]))

{'Band1': [array([2017]), array([11.5])], 'Band2': [array([1992]), array([11.4])], 'Band3': [array([3908]), array([22.3])], 'Band4': [array([5984]), array([34.2])], 'Band5': [array([3586]), array([20.5])]}
[17487]


In [35]:
household_with_ss = compute_stats(nta_econ_df, 'Inc_SoSec')
print(household_with_ss)

household_with_ri = compute_stats(nta_econ_df, 'Inc_Rtrmt')
print(household_with_ri)

household_with_cpai = compute_stats(nta_econ_df, 'Inc_CPbA')
print(household_with_cpai)

household_with_supsec = compute_stats(nta_econ_df, 'Inc_SpSec')
print(household_with_supsec) 

[array([2864]), array([16.4])]
[array([1698]), array([9.7])]
[array([808]), array([4.6])]
[array([611]), array([3.5])]


Individual Employment and Health Insurance Status

In [45]:
# only agents in 18-64 are employed
labor_agents = compute_stats(nta_econ_df, 'CvLF18t64')
print(labor_agents)

# employed_agents = compute_stats(nta_econ_df, 'CvLFEm')
# print(employed_agents)
# unemployed_agents = compute_stats(nta_econ_df, 'UEm')
# print(unemployed_agents)

nolabor_agents = compute_stats(nta_econ_df, 'NLF2')
print(nolabor_agents)

print(labor_agents[0] + nolabor_agents[0])

[array([25325]), array([nan])]
[array([3435]), array([100.])]
[28760]


In [42]:
total_eligible_agents = compute_stats(nta_econ_df, 'CNI1864_2')
print(total_eligible_agents)

2532

[array([28760]), array([nan])]


In [52]:
# employment and insurance status of agents
employment_insurance_mapping = {'employed_insured': 'EmHIns', 'employed_uninsured': 'EmNHIns',
                                'unemployed_insured': 'UEmHIns', 'unemployed_uninsured': 'UEmNHIns',
                                'nolabor_insured': 'NLFHIns', 'nolabor_uninsured': 'NLFNHIns'}

def get_nta_employ_insure(df, nta_id, employ_insure_mapping):
    nta_df = df[df['GeoID']==nta_id]

    total_eligible_agents = compute_stats(nta_df, 'CNI1864_2')[0]

    nta_employ_insure = {}
    for category in employ_insure_mapping:
        estimate, _ = compute_stats(nta_df, employ_insure_mapping[category])

        nta_employ_insure[category] = {}
        nta_employ_insure[category]['estimate'] = estimate[0]
        nta_employ_insure[category]['percentage'] = estimate[0] / total_eligible_agents

    return nta_employ_insure

In [51]:
employ_insure = get_nta_employ_insure(econ_df, 'BK0101', employment_insurance_mapping)
print(employ_insure)

sum_t = 0
for key in employ_insure:
    sum_t += employ_insure[key]['estimate']

print(sum_t)

[28760]
{'employed_insured': {'estimate': 21557, 'percentage': array([0.74954798])}, 'employed_uninsured': {'estimate': 1853, 'percentage': array([0.06442976])}, 'unemployed_insured': {'estimate': 1446, 'percentage': array([0.05027816])}, 'unemployed_uninsured': {'estimate': 469, 'percentage': array([0.01630737])}, 'nolabor_insured': {'estimate': 3164, 'percentage': array([0.11001391])}, 'nolabor_uninsured': {'estimate': 271, 'percentage': array([0.00942281])}}
28760
