In [1]:
import os
import sys
import getpass
import requests
import numpy as np
import pandas as pd
import geopandas as gpd

user = getpass.getuser()
sys.dont_write_bytecode = True

# sys.path.insert(0, '/Users/{}/Box/Utility Code'.format(user))

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Library/CloudStorage/Box-Box/Utility Code'.format(user))

import utils_io as utils

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Documents/GitHub/hess_application/data_processing/6 - affh'.format(user))

import census

## Pull Decennial Census table variables and export as csv

In [12]:
census_key = getpass.getpass()

········


In [13]:
# pull decennial census table variables from api
rq = requests.get('https://api.census.gov/data/2020/dec/pl/variables.json')
data = rq.json()

In [14]:
# create dataframe from dictionary
tbl_vars = pd.DataFrame.from_dict(data['variables'], orient='index')

In [15]:
# reset index
tbl_vars.reset_index(inplace=True)

In [16]:
# rename index column set as name (for variable name)
tbl_vars.rename(columns={'index':'name'}, inplace=True)

In [17]:
# filter variables to only include 
p2_vars = [
    'P2_001N',
    'P2_002N',
    'P2_003N',
    'P2_004N',
    'P2_005N',
    'P2_006N',
    'P2_007N',
    'P2_008N',
    'P2_009N',
    'P2_010N',
    'P2_011N'
]
p2_race_vars = tbl_vars[tbl_vars['name'].isin(p2_vars)]

In [18]:
p2_race_vars.sort_values(by='name').to_csv('../Data/decennial_census_tbl_vars_p2.csv')

## Pull Decennial Census data from Census API

In [19]:
# create list of census variables to provide as a param in next step
dc_vars_lst = p2_race_vars['name'].to_list()

In [20]:
# pull census data
dc_df = census.pull_decennial_census_data(census_api_key=census_key, select_table_vars=dc_vars_lst)

In [22]:
rename_dict = {
    'P2_001N': 'total_pop',
    'P2_002N': 'hispanic_latino_pop',
    'P2_003N': 'non_hispanic_latino_pop',
    'P2_004N': 'non_hl_pop_one_race',
    'P2_005N': 'non_hl_white',
    'P2_006N': 'non_hl_black',
    'P2_007N': 'non_hl_indian_alaska_native',
    'P2_008N': 'non_hl_asian',
    'P2_009N': 'non_hl_hawaiian_pacific_islander',
    'P2_010N': 'non_hl_other_race',
    'P2_011N': 'non_hl_multi_racial'
}
dc_df.rename(columns=rename_dict, inplace=True)

In [23]:
col_order = [
    'fipco',
    'tract_geoid',
    'total_pop',
    'hispanic_latino_pop',
    'non_hl_white',
    'non_hl_black',
    'non_hl_indian_alaska_native',
    'non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'non_hl_multi_racial'
]
dc_df = dc_df[col_order].copy()

## Create share columns

Create columns which represent an estimate of a given race as a share of the universe which is the total population of a Census tract.

In [24]:
pop_cols = [
    'hispanic_latino_pop',
    'non_hl_white',
    'non_hl_black',
    'non_hl_indian_alaska_native',
    'non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'non_hl_multi_racial'
]

In [25]:
# Initialize dictionary with population column names and blank values
{key: 'pct_' + key for key in pop_cols}

{'hispanic_latino_pop': 'pct_hispanic_latino_pop',
 'non_hl_white': 'pct_non_hl_white',
 'non_hl_black': 'pct_non_hl_black',
 'non_hl_indian_alaska_native': 'pct_non_hl_indian_alaska_native',
 'non_hl_asian': 'pct_non_hl_asian',
 'non_hl_hawaiian_pacific_islander': 'pct_non_hl_hawaiian_pacific_islander',
 'non_hl_other_race': 'pct_non_hl_other_race',
 'non_hl_multi_racial': 'pct_non_hl_multi_racial'}

In [26]:
dc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1772 entries, 0 to 1771
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   fipco                             1772 non-null   object
 1   tract_geoid                       1772 non-null   object
 2   total_pop                         1772 non-null   int64 
 3   hispanic_latino_pop               1772 non-null   int64 
 4   non_hl_white                      1772 non-null   int64 
 5   non_hl_black                      1772 non-null   int64 
 6   non_hl_indian_alaska_native       1772 non-null   int64 
 7   non_hl_asian                      1772 non-null   int64 
 8   non_hl_hawaiian_pacific_islander  1772 non-null   int64 
 9   non_hl_other_race                 1772 non-null   int64 
 10  non_hl_multi_racial               1772 non-null   int64 
dtypes: int64(9), object(2)
memory usage: 152.4+ KB


In [27]:
pop_share_dict = {
    'hispanic_latino_pop': 'pct_hispanic_latino_pop',
    'non_hl_white': 'pct_non_hl_white',
    'non_hl_black': 'pct_non_hl_black',
    'non_hl_indian_alaska_native': 'pct_non_hl_indian_alaska_native',
    'non_hl_asian': 'pct_non_hl_asian',
    'non_hl_hawaiian_pacific_islander': 'pct_non_hl_hawaiian_pacific_islander',
    'non_hl_other_race': 'pct_non_hl_other_race',
    'non_hl_multi_racial': 'pct_non_hl_multi_racial'
}
# create share columns
dc_df = census.create_share_columns(df=dc_df, 
                                    universe_column='total_pop', 
                                    share_column_dict=pop_share_dict
                                   )

## Join tract attributes to geodata

In [28]:
tracts_gdf = census.pull_census_tracts_geodata(year=2020, cartographic=True)

In [29]:
tracts_gdf.explore()

In [30]:
dc_gdf = pd.merge(tracts_gdf, dc_df, how='inner', on='tract_geoid')

## Read Washington Ave corridor geodata & create buffer

In [31]:
work_dir = os.path.join(
    '/Users',
    user,
    'Library',
    'CloudStorage',
    'Box-Box',
    'DataViz Projects',
    'Spatial Analysis and Mapping',
    'I-880 LEP Analysis'
)

In [32]:
corridor_gdf = gpd.read_file(os.path.join(work_dir,'Data','i880_icm_washington_ave_corridor.json'))

In [33]:
corridor_gdf.explore()

In [34]:
# create a copy of the line gdf
corridor_buff_gdf = corridor_gdf.copy()

In [35]:
# create 3.5 mile (5632.7 meter) buffer
corridor_buff_gdf['geometry'] = corridor_gdf.geometry.buffer(distance=5632.7)

In [36]:
# review the results on an interactive map
corridor_buff_gdf.explore()

## Clip tracts within study area

In [37]:
corridor_buff_gdf.crs

<Derived Projected CRS: EPSG:26910>
Name: NAD83 / UTM zone 10N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.
- bounds: (-126.0, 30.54, -119.99, 81.8)
Coordinate Operation:
- name: UTM zone 10N
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [38]:
# project to same system as corridor data
dc_gdf_epsg26910 = dc_gdf.to_crs('EPSG:26910')

In [39]:
# perform clip operation
dc_corridor_gdf = gpd.clip(dc_gdf_epsg26910, corridor_buff_gdf)

  clipped.loc[


In [40]:
# review results on an interactive map
dc_corridor_gdf.explore()

## Export corridor data

In [41]:
reorder_cols = [
    'fipco',
    'tract_geoid',
    'total_pop',
    'hispanic_latino_pop',
    'pct_hispanic_latino_pop',
    'non_hl_white',
    'pct_non_hl_white',
    'non_hl_black',
    'pct_non_hl_black',
    'non_hl_indian_alaska_native',
    'pct_non_hl_indian_alaska_native',
    'non_hl_asian',
    'pct_non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'pct_non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'pct_non_hl_other_race',
    'non_hl_multi_racial',
    'pct_non_hl_multi_racial',
    'geometry'
]
dc_gdf = dc_gdf[reorder_cols]

In [42]:
# export geodata
path = os.path.join(work_dir, 'Data', 'Hispanic_Latino_Not_Hispanic_Latino_by_Race_Tracts_dc2020.geojson')
dc_gdf.to_file(path, driver='GeoJSON')

In [52]:
# Export tabular data
csv_cols = [
    'fipco',
    'tract_geoid',
    'total_pop',
    'hispanic_latino_pop',
    'pct_hispanic_latino_pop',
    'non_hl_white',
    'pct_non_hl_white',
    'non_hl_black',
    'pct_non_hl_black',
    'non_hl_indian_alaska_native',
    'pct_non_hl_indian_alaska_native',
    'non_hl_asian',
    'pct_non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'pct_non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'pct_non_hl_other_race',
    'non_hl_multi_racial',
    'pct_non_hl_multi_racial'
]
path = os.path.join(work_dir, 'Data', 'Hispanic_Latino_Not_Hispanic_Latino_by_Race_Tracts_dc2020.csv')
dc_gdf[csv_cols].to_csv(path, index=False)

## Summarize data

In [44]:
summary_cols = [
    'total_pop',
    'hispanic_latino_pop',
    'non_hl_white',
    'non_hl_black',
    'non_hl_indian_alaska_native',
    'non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'non_hl_multi_racial'
]
study_area_summary = dc_gdf[summary_cols].aggregate('sum').to_frame().T

In [45]:
study_area_summary

Unnamed: 0,total_pop,hispanic_latino_pop,non_hl_white,non_hl_black,non_hl_indian_alaska_native,non_hl_asian,non_hl_hawaiian_pacific_islander,non_hl_other_race,non_hl_multi_racial
0,7765640,1891985,2783589,435009,18254,2150320,43214,50035,393234


In [46]:
# create share columns
study_area_summary = census.create_share_columns(df=study_area_summary, 
                                                 universe_column='total_pop', 
                                                 share_column_dict=pop_share_dict
                                                )

In [47]:
study_area_summary.columns.to_list()

['total_pop',
 'hispanic_latino_pop',
 'non_hl_white',
 'non_hl_black',
 'non_hl_indian_alaska_native',
 'non_hl_asian',
 'non_hl_hawaiian_pacific_islander',
 'non_hl_other_race',
 'non_hl_multi_racial',
 'pct_hispanic_latino_pop',
 'pct_non_hl_white',
 'pct_non_hl_black',
 'pct_non_hl_indian_alaska_native',
 'pct_non_hl_asian',
 'pct_non_hl_hawaiian_pacific_islander',
 'pct_non_hl_other_race',
 'pct_non_hl_multi_racial']

In [48]:
# reorder summary columns
reorder_cols = [
    'total_pop',
    'hispanic_latino_pop',
    'pct_hispanic_latino_pop',
    'non_hl_white',
    'pct_non_hl_white',
    'non_hl_black',
    'pct_non_hl_black',
    'non_hl_indian_alaska_native',
    'pct_non_hl_indian_alaska_native',
    'non_hl_asian',
    'pct_non_hl_asian',
    'non_hl_hawaiian_pacific_islander',
    'pct_non_hl_hawaiian_pacific_islander',
    'non_hl_other_race',
    'pct_non_hl_other_race',
    'non_hl_multi_racial',
    'pct_non_hl_multi_racial'
]
study_area_summary = study_area_summary[reorder_cols]

In [49]:
# transpose summary data
study_area_transpose = study_area_summary.T

In [50]:
# rename index column 
study_area_transpose.rename(columns={0:'Summary_Statistics'}, inplace=True)

In [51]:
# export summary area data to csv
path = os.path.join(work_dir, 'Data', 'Hispanic_Latino_Not_Hispanic_Latino_by_Race_Summary_Statistics_dc2020.csv')
study_area_transpose.to_csv(path, index=True)