In [1]:
import numpy as np
import pandas as pd

In [6]:
housing_path = '../nta/hous_2021acs5yrr_nta.xlsx'
social_path = '../nta/soc_2021acs5yr_nta.xlsx'

house_df = pd.read_excel(housing_path)
social_df = pd.read_excel(social_path)

In [7]:
NTA_ID = 'BK0101'

In [4]:
# Mortgage, Rent, Mortgage/Rent Expense, House Value

In [5]:
def compute_stats(nta_df, attr, estimation_vars=['E', 'M', 'C', 'P', 'Z'], filter=True):
    if filter:
        filter_vars = ['E', 'P']
        estimation_vars = filter_vars
    stats = [nta_df['{}{}'.format(attr, var)].values for var in estimation_vars]
    
    return stats

def merge_nta_stats(nta_df, attr_vals):
    '''estimate and percentage'''
    args = [compute_stats(nta_df, attr, filter=True) for attr in attr_vals]
    ret_e, ret_p = 0, 0
    for val in args:
        e, p = val
        ret_e += e
        ret_p += p
    
    return [ret_e, ret_p]

In [8]:
nta_social_df = social_df[social_df['GeoID'] == NTA_ID]
nta_house_df = house_df[house_df['GeoID'] == NTA_ID]

In [28]:
total_households = compute_stats(nta_social_df, 'HH1')
print(total_households)

owner_occupied_households = compute_stats(nta_house_df, 'OOcHU1')
renter_occupied_households = compute_stats(nta_house_df, 'ROcHU1')

mortgage_owner_occupied_households = compute_stats(nta_house_df, 'HUwMrtg')

print(owner_occupied_households[0] + renter_occupied_households[0])

print(owner_occupied_households)
print(mortgage_owner_occupied_households)

[array([17487]), array([100.])]
[17487]
[array([3169]), array([18.1])]
[array([1798]), array([56.7])]


In [29]:
# ownership and living expense stats
def get_nta_household_ownership(df, nta_id, ownership_mapping):
    nta_df = df[df['GeoID'] == nta_id]
    
    nta_household_ownership = {}

    # compute estimates
    for ownership_type in ownership_mapping:
        nta_household_ownership[ownership_type] = {}
        estimate, percentage = merge_nta_stats(nta_df, ownership_mapping[ownership_type])

        nta_household_ownership[ownership_type]['estimate'] = estimate

    all_households = sum([nta_household_ownership[o_type]['estimate'] for o_type in ownership_mapping])
    
    # compute probability
    for ownership_type in ownership_mapping:
        nta_household_ownership[ownership_type]['probability'] = nta_household_ownership[ownership_type]['estimate'] / all_households

    return nta_household_ownership

In [30]:
ownership_mapping = {'renter': ['ROcHU1'], 'no_mortgage_owner': ['OOcHU1'], 'mortgage_owner': ['HUwMrtg']}

nta_household_ownership = get_nta_household_ownership(house_df, NTA_ID, ownership_mapping)

In [31]:
nta_household_ownership

{'renter': {'estimate': array([14318]), 'probability': array([0.74244231])},
 'no_mortgage_owner': {'estimate': array([3169]),
  'probability': array([0.1643246])},
 'mortgage_owner': {'estimate': array([1798]),
  'probability': array([0.09323308])}}