In [None]:
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from IPython.display import display


In [None]:
%load_ext autoreload
%autoreload 2


### Load ACS data:


In [None]:
CENSUS_DATA_ROOT = './data/clean_census_data/'


In [None]:
# Import total population:
census_population = pd.read_csv(CENSUS_DATA_ROOT+"population_totals.csv")

## Import employment data:
#census_employment = pd.read_csv(CENSUS_DATA_ROOT+"employment_population_ratio.csv")

# Import geography data:
census_geography = pd.read_csv(CENSUS_DATA_ROOT+"county_geometries.csv")

# Import employment data:
census_employment = pd.read_csv(CENSUS_DATA_ROOT+"employment_status.csv")

# Import geography data:
census_age_total = pd.read_csv(CENSUS_DATA_ROOT+"age_information_total.csv")

# Import urban/rural data:
census_urban_rural = pd.read_csv(CENSUS_DATA_ROOT+"urban_rural_by_county.csv")

# Import income data:
census_income = pd.read_csv(CENSUS_DATA_ROOT+"income_totals_household.csv")

# Import commuting data:
census_commuting = pd.read_csv(CENSUS_DATA_ROOT+"commuting.csv")

# Add identifiers to total population:
census_population['Geographic Area Name'] = census_population['CTYNAME']+', '+census_population['STNAME']
census_population = census_population.groupby(['Geographic Area Name','CTYNAME','STNAME']).sum().reset_index()
census_population = census_employment[['id','Geographic Area Name']].drop_duplicates().merge(
    census_population, left_on=['Geographic Area Name'], right_on=['Geographic Area Name'], how='outer',
)
census_population.head()


In [None]:
# Get geography data:
data = census_geography[['AFFGEOID','ALAND']].rename(columns={
    'AFFGEOID' : 'id',
    'ALAND' : 'area_square_meters',
})
# Get population data:
data = data.merge(census_population, left_on=['id'], right_on=['id'])
# Get employment data:
data = data.merge(census_employment,left_on=['id','Geographic Area Name'], right_on=['id','Geographic Area Name'])
# Get urban/rural data:
data = data.merge(
    census_urban_rural.rename(columns={
        '2013 code' : 'urbanrural_code_2013', 'type_2013' : 'urbanrural_type_2013',
    })[[
        'id','urbanrural_code_2013','urbanrural_type_2013'
    ]],
    left_on=['id'], right_on=['id'],
)
# Add commuting data:
data = data.merge(
    census_commuting.drop(['State Name','County Name'],axis=1),
    left_on=['id'], right_on=['id'],
)
# Add income data:
data = data.merge(
    census_income[[
        'id',
        #'Less than $10,000',
        #'$10,000 to $14,999', 
        #'$15,000 to $24,999',
        #'$25,000 to $34,999',
        #'$35,000 to $49,999',
        '$50,000 to $74,999',
        '$75,000 to $99,999',
        '$100,000 to $149,999',
        '$150,000 to $199,999',
        '$200,000 or more',
        'Median income (dollars)',
        'Mean income (dollars)',
    ]],
    left_on=['id'], right_on=['id'],
)
# Compute metrics:
data['pct_above_income_50000'] = census_income[[
    #'Less than $10,000',
    #'$10,000 to $14,999', 
    #'$15,000 to $24,999',
    #'$25,000 to $34,999',
    #'$35,000 to $49,999',
    '$50,000 to $74,999',
    '$75,000 to $99,999',
    '$100,000 to $149,999',
    '$150,000 to $199,999',
    '$200,000 or more',
]].sum(axis=1)
data['pop_density'] = data['TOT_POP']/(data['area_square_meters']/1e6)
data['pct_work_out_of_county'] = data['number_work_out_of_county']/(
    data['number_work_in_county']+data['number_work_out_of_county']
)/100
data['Labor Force Participation Rate'] = data['Labor Force Participation Rate'].replace('N',np.nan)
data['Labor Force Participation Rate'] = data['Labor Force Participation Rate'].astype(float)/100
data['urban_rural'] = np.where(data['urbanrural_code_2013'].isin([1,2,3]),'urban','rural')
# Rename columns:
data = data.rename(columns={'id':'county_id'})

data


In [None]:
location_attributes = data.copy().rename(columns={
    'county_id' : 'location_id',
    'CTYNAME' : 'location_city',
    'STNAME' : 'location_state',
    'Geographic Area Name' : 'location_name',
    'TOT_POP' : 'population',
    'urban_rural' : 'density',
    'Labor Force Participation Rate' : 'employment_rate',
    'pct_above_income_50000' : 'wealth_rate',
})[[
    'location_id',
    'location_city',
    'location_state',
    'location_name',
    'density',
    'employment_rate',
    'wealth_rate',
]]
location_attributes
