In [1]:
import os
import sys
import getpass
import numpy as np
import pandas as pd
import geopandas as gpd

user = getpass.getuser()
sys.dont_write_bytecode = True

# sys.path.insert(0, '/Users/{}/Box/Utility Code'.format(user))

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Library/CloudStorage/Box-Box/Utility Code'.format(user))

import utils_io as utils

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Documents/GitHub/hess_application/data_processing/6 - affh'.format(user))

import census

## Create dataframe from required Census variables

In [2]:
census_key = getpass.getpass()

········


In [3]:
# read american community survey (ACS) table variables csv
acs_vars = pd.read_csv('../data/acs_table_variables_income_language.csv',
                             dtype={'Table_Id':'str','Line_Num':'str'}
                      )

In [4]:
# Fill leading zeros for line number column to prepare for concatenation 
acs_vars['Line_Num'] = acs_vars['Line_Num'].apply(lambda x: x.zfill(3))

In [5]:
# Concat table id, line number, and add 'E' for estimate
acs_vars['ACS_Table_Variable'] = (acs_vars['Table_ID'].str.strip() + 
                                        '_' + 
                                  acs_vars['Line_Num'] + 'E')

In [6]:
# create a variable list for querying data
acs_vars_lst = acs_vars['ACS_Table_Variable'].tolist()

In [11]:
# query census api and return dataframe
acs_df = census.pull_acs_5_year_est_data(census_api_key=census_key, 
                                         acs_year=2020, 
                                         select_table_vars=acs_vars_lst
                                        )

In [12]:
#Rename columns to human-readable
cols = {'B01003_001E': 'total_population',
        'C17002_001E': 'total_pop_pov',
        'C17002_008E': 'pop_200_pct_and_over_fpl',
        'C16001_001E': 'pop_5_years_over',
        'C16001_002E': 'pop_english_only',
        'C16001_004E': 'spanish_english_vw',
        'C16001_005E': 'spanish_english_lvw',
        'C16001_007E': 'french_english_vw',
        'C16001_008E': 'french_english_lvw',
        'C16001_010E': 'german_english_vw',
        'C16001_011E': 'german_english_lvw',
        'C16001_013E': 'russian_english_vw',
        'C16001_014E': 'russian_english_lvw',
        'C16001_016E': 'other_indo_english_vw',
        'C16001_017E': 'other_indo_english_lvw',
        'C16001_019E': 'korean_english_vw',
        'C16001_020E': 'korean_english_lvw',
        'C16001_022E': 'chinese_english_vw',
        'C16001_023E': 'chinese_english_lvw',
        'C16001_025E': 'vietnamese_english_vw',
        'C16001_026E': 'vietnamese_english_lvw',
        'C16001_028E': 'tagalog_english_vw',
        'C16001_029E': 'tagalog_english_lvw',
        'C16001_031E': 'other_asian_english_vw',
        'C16001_032E': 'other_asian_english_lvw',
        'C16001_034E': 'arabic_english_vw',
        'C16001_035E': 'arabic_english_lvw',
        'C16001_037E': 'other_unspec_english_vw',
        'C16001_038E': 'other_unspec_english_lvw'
}
acs_df.rename(columns=cols, inplace=True)

In [13]:
acs_df.head(5)

Unnamed: 0,total_population,total_pop_pov,pop_200_pct_and_over_fpl,pop_5_years_over,pop_english_only,C16001_003E,spanish_english_vw,spanish_english_lvw,C16001_006E,french_english_vw,...,other_asian_english_vw,other_asian_english_lvw,C16001_033E,arabic_english_vw,arabic_english_lvw,C16001_036E,other_unspec_english_vw,other_unspec_english_lvw,fipco,tract_geoid
0,4367,4263,3019,4015,2408,754,453,301,0,0,...,5,0,130,74,56,77,77,0,13,6013313102
1,2740,2740,1819,2563,1092,740,514,226,0,0,...,0,0,187,86,101,0,0,0,13,6013313104
2,4701,4681,3464,4566,2777,1324,767,557,0,0,...,47,12,0,0,0,67,67,0,13,6013313105
3,6689,6643,5591,6165,3892,961,482,479,0,0,...,259,18,163,106,57,125,125,0,13,6013313106
4,2656,2651,2207,2452,1778,431,301,130,0,0,...,31,0,0,0,0,0,0,0,13,6013313107


## Flag low-income tracts

1. Calculate low-income population 
2. Calculate share of low-income population
3. Flag tracts .5 standard deviation from mean

This methodology follows the Equity Priority Community (EPC) methdology
[Equity Priority Community Methodology](https://github.com/BayAreaMetro/Spatial-Analysis-Mapping-Projects/tree/master/Project-Documentation/Equity-Priority-Communities)

In [15]:
#1. Calculate low income population
acs_df['pop_below_200_pct_fpl'] = (acs_df['total_pop_pov'] - 
                          acs_df['pop_200_pct_and_over_fpl'])

In [16]:
#2. Calculate share of low income population 
acs_df['share_below_200_pct_fpl'] = np.where(acs_df['total_pop_pov'] == 0,0,
                                             (acs_df['pop_below_200_pct_fpl'] / 
                                              acs_df['total_pop_pov']))

In [20]:
(acs_df['share_below_200_pct_fpl'].mean().round(decimals=2)) + (.5 * acs_df['share_below_200_pct_fpl'].std().round(decimals=2))

0.255

In [19]:
(.5 * acs_df['share_below_200_pct_fpl'].std().round(decimals=2))

0.065

In [22]:
#3. Flag tracts .5 standard deviation from mean
cond = ((acs_df['share_below_200_pct_fpl'].mean().round(decimals=2)) + 
        ((.5 * acs_df['share_below_200_pct_fpl'].std().round(decimals=2)).round(decimals=2)))
        
acs_df['low_income'] = np.where(acs_df['share_below_200_pct_fpl'] > cond, 1,0)

## Create share columns

Create columns which represent an estimate of a given language spoken as a share of the universe which is the population of people 5 years and over.

In [23]:
# Create a list of population columns
pop_columns_list = [
    'spanish_english_lvw', 
    'french_english_lvw',
    'german_english_lvw',
    'russian_english_lvw',
    'other_indo_english_lvw',
    'korean_english_lvw',
    'chinese_english_lvw',
    'vietnamese_english_lvw',
    'tagalog_english_lvw',
    'other_asian_english_lvw',
    'arabic_english_lvw',
    'other_unspec_english_lvw'
]

In [24]:
# Initialize dictionary with population column names and blank values
{key: 'pct_' + key for key in pop_columns_list}

{'spanish_english_lvw': 'pct_spanish_english_lvw',
 'french_english_lvw': 'pct_french_english_lvw',
 'german_english_lvw': 'pct_german_english_lvw',
 'russian_english_lvw': 'pct_russian_english_lvw',
 'other_indo_english_lvw': 'pct_other_indo_english_lvw',
 'korean_english_lvw': 'pct_korean_english_lvw',
 'chinese_english_lvw': 'pct_chinese_english_lvw',
 'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
 'tagalog_english_lvw': 'pct_tagalog_english_lvw',
 'other_asian_english_lvw': 'pct_other_asian_english_lvw',
 'arabic_english_lvw': 'pct_arabic_english_lvw',
 'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'}

In [25]:
# Create a dictionary with population columns as keys and share columns as values
pop_share_cols_dictionary = {
    'spanish_english_lvw': 'pct_spanish_english_lvw',
    'french_english_lvw': 'pct_french_english_lvw',
    'german_english_lvw': 'pct_german_english_lvw',
    'russian_english_lvw': 'pct_russian_english_lvw',
    'other_indo_english_lvw': 'pct_other_indo_english_lvw',
    'korean_english_lvw': 'pct_korean_english_lvw',
    'chinese_english_lvw': 'pct_chinese_english_lvw',
    'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
    'tagalog_english_lvw': 'pct_tagalog_english_lvw',
    'other_asian_english_lvw': 'pct_other_asian_english_lvw',
    'arabic_english_lvw': 'pct_arabic_english_lvw',
    'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'
}

In [26]:
# Run share column creation function
acs_df = census.create_share_columns(df=acs_df, 
                            universe_column='pop_5_years_over', 
                            share_column_dict=pop_share_cols_dictionary
                           )

In [27]:
# get share column values from dict and create a list
share_cols = list(pop_share_cols_dictionary.values())

In [30]:
# review share columns
acs_df[share_cols].sample(10)

Unnamed: 0,pct_spanish_english_lvw,pct_french_english_lvw,pct_german_english_lvw,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw
1536,0.011719,0.0,0.0,0.001608,0.020221,0.0,0.054917,0.0,0.0,0.017463,0.002528,0.001608
1754,0.023841,0.0,0.0,0.017483,0.005033,0.004503,0.010331,0.011921,0.0,0.004238,0.0,0.002119
1533,0.012974,0.0,0.0,0.0,0.012974,0.0,0.047904,0.0,0.0,0.025948,0.0,0.0
1100,0.094482,0.0,0.004845,0.0,0.050606,0.000538,0.038762,0.002153,0.0,0.011575,0.0,0.0
1052,0.006745,0.0,0.003597,0.0,0.016862,0.009442,0.038444,0.178732,0.057329,0.017311,0.0,0.0
1085,0.000496,0.0,0.0,0.0,0.0,0.001984,0.036706,0.0,0.0,0.010913,0.0,0.0
674,0.0,0.0,0.0,0.0,0.0,0.0,0.017157,0.0,0.0,0.0,0.0,0.0
900,0.33454,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002715,0.0,0.00362
1587,0.021048,0.0,0.0,0.0,0.0,0.0,0.002383,0.0,0.0,0.0,0.0,0.0
753,0.090365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002995,0.016382,0.0,0.0


## Flag Non-English speaking population greater than 5% threshold

In [31]:
def flag_threshold(acs_df):
    if ((acs_df['pct_spanish_english_lvw'] >= .05) | 
        (acs_df['pct_french_english_lvw'] >= .05) |
        (acs_df['pct_german_english_lvw'] >= .05) |
        (acs_df['pct_russian_english_lvw'] >= .05) |
        (acs_df['pct_other_indo_english_lvw'] >= .05) |
        (acs_df['pct_korean_english_lvw'] >= .05) |
        (acs_df['pct_chinese_english_lvw'] >= .05) |
        (acs_df['pct_vietnamese_english_lvw'] >= .05) |
        (acs_df['pct_tagalog_english_lvw'] >= .05) |
        (acs_df['pct_other_asian_english_lvw'] >= .05) |
        (acs_df['pct_arabic_english_lvw'] >= .05) |
        (acs_df['pct_other_unspec_english_lvw'] >= .05)):
        return 1
    else:
        return 0

In [32]:
acs_df['over_5_pct_thres'] = acs_df.apply(flag_threshold, axis=1)

In [33]:
# create dictonary of column names and abbriviated language values
{key: None for key in pop_share_cols_dictionary.values()}

{'pct_spanish_english_lvw': None,
 'pct_french_english_lvw': None,
 'pct_german_english_lvw': None,
 'pct_russian_english_lvw': None,
 'pct_other_indo_english_lvw': None,
 'pct_korean_english_lvw': None,
 'pct_chinese_english_lvw': None,
 'pct_vietnamese_english_lvw': None,
 'pct_tagalog_english_lvw': None,
 'pct_other_asian_english_lvw': None,
 'pct_arabic_english_lvw': None,
 'pct_other_unspec_english_lvw': None}

In [34]:
language_dict = {
    'pct_spanish_english_lvw': 'Spanish',
    'pct_french_english_lvw': 'French, Haitian, or Cajun',
    'pct_german_english_lvw': 'German or West Germanic',
    'pct_russian_english_lvw': 'Russian Polish of Slavic',
    'pct_other_indo_english_lvw': 'Other Indo-European',
    'pct_korean_english_lvw': 'Korean',
    'pct_chinese_english_lvw': 'Chinese',
    'pct_vietnamese_english_lvw': 'Vietnamese',
    'pct_tagalog_english_lvw': 'Tagalog',
    'pct_other_asian_english_lvw': 'Other Asian Pacific Island',
    'pct_arabic_english_lvw': 'Arabic',
    'pct_other_unspec_english_lvw': 'Other Unspecified'
}

In [35]:
# Create a column and classify top non-english language spoken at home
acs_df['top_non_english_language'] = (acs_df[share_cols]
                                      .apply(lambda row: language_dict[row.idxmax()], axis=1)
                                     )

In [37]:
# append newly created column to columns list
view_cols = share_cols
view_cols.append('top_non_english_language')

In [39]:
# review sample of columns for qa/qc of classify step
acs_df[view_cols].sample(10)

Unnamed: 0,pct_spanish_english_lvw,pct_french_english_lvw,pct_german_english_lvw,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw,top_non_english_language
1489,0.007482,0.0,0.0,0.007656,0.040891,0.016008,0.080912,0.001392,0.0,0.044197,0.0,0.0,Chinese
1292,0.001288,0.0,0.0,0.0,0.0,0.0,0.109194,0.09606,0.023178,0.015452,0.0,0.0,Chinese
1171,0.0,0.0,0.0,0.0,0.023585,0.004245,0.072877,0.062028,0.0,0.012972,0.0,0.0,Chinese
986,0.179104,0.0,0.0,0.0,0.0,0.001866,0.127565,0.0,0.024021,0.020289,0.0,0.0,Spanish
981,0.061279,0.002879,0.0,0.0,0.002879,0.0,0.003907,0.0,0.015628,0.0,0.0,0.0,Spanish
1142,0.002975,0.0,0.0,0.0192,0.0,0.007842,0.140887,0.023256,0.001622,0.018388,0.0,0.0,Chinese
278,0.029313,0.0,0.0,0.0,0.027625,0.008646,0.03585,0.008013,0.043442,0.006959,0.0,0.004639,Tagalog
794,0.007056,0.0,0.0,0.0,0.0,0.0,0.014449,0.0,0.0,0.002352,0.0,0.0,Chinese
222,0.112101,0.0,0.0,0.0,0.0,0.008907,0.058047,0.0,0.01812,0.059275,0.0,0.0,Spanish
1155,0.0,0.0,0.0,0.0,0.017127,0.0,0.100428,0.004282,0.0,0.010121,0.0,0.0,Chinese


## Join tracts attributes to geodata

In [40]:
# pull census tracts
tracts_gdf = census.pull_census_tracts_geodata(year=2020, cartographic=True)

In [41]:
tracts_gdf.explore()

In [42]:
acs_gdf = pd.merge(tracts_gdf,
                   acs_df,
                   on='tract_geoid',
                   how='inner'
                  )

In [43]:
reorder_cols = [
    'tract_geoid',
    'total_population',
    'total_pop_pov',
    'pop_200_pct_and_over_fpl',
    'pop_below_200_pct_fpl',
    'share_below_200_pct_fpl',
    'pop_5_years_over',
    'pop_english_only',
    'spanish_english_vw',
    'spanish_english_lvw',
    'pct_spanish_english_lvw',
    'french_english_vw',
    'french_english_lvw',
    'pct_french_english_lvw',
    'german_english_vw',
    'german_english_lvw',
    'pct_german_english_lvw',
    'russian_english_vw',
    'russian_english_lvw',
    'pct_russian_english_lvw',
    'other_indo_english_vw',
    'other_indo_english_lvw',
    'pct_other_indo_english_lvw',
    'korean_english_vw',
    'korean_english_lvw',
    'pct_korean_english_lvw',
    'chinese_english_vw',
    'chinese_english_lvw',
    'pct_chinese_english_lvw',
    'vietnamese_english_vw',
    'vietnamese_english_lvw',
    'pct_vietnamese_english_lvw',
    'tagalog_english_vw',
    'tagalog_english_lvw',
    'pct_tagalog_english_lvw',
    'other_asian_english_vw',
    'other_asian_english_lvw',
    'pct_other_asian_english_lvw',
    'arabic_english_vw',
    'arabic_english_lvw',
    'pct_arabic_english_lvw',
    'other_unspec_english_vw',
    'other_unspec_english_lvw',
    'pct_other_unspec_english_lvw',
    'top_non_english_language',
    'low_income',
    'over_5_pct_thres',
    'geometry'
]
acs_gdf = acs_gdf[reorder_cols].copy()

## Read Washington Ave corridor geodata & create buffer

In [46]:
work_dir = os.path.join(
    '/Users',
    user,
    'Library',
    'CloudStorage',
    'Box-Box',
    'DataViz Projects',
    'Spatial Analysis and Mapping',
    'I-880 LEP Analysis'
)

In [47]:
# create corridor data from local file 
corridor_gdf = gpd.read_file(os.path.join(work_dir,'Data','i880_icm_washington_ave_corridor.json'))

In [48]:
# review corridor on an interactive map
corridor_gdf.explore()

In [49]:
corridor_gdf.crs

<Derived Projected CRS: EPSG:26910>
Name: NAD83 / UTM zone 10N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.
- bounds: (-126.0, 30.54, -119.99, 81.8)
Coordinate Operation:
- name: UTM zone 10N
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [50]:
# create a copy of the line gdf
corridor_buff_gdf = corridor_gdf.copy()

In [51]:
# create 3.5 mile (5632.7 meter) buffer
corridor_buff_gdf['geometry'] = corridor_gdf.geometry.buffer(distance=5632.7)

In [52]:
# review corridor buffer on an interactive map
corridor_buff_gdf.explore()

In [75]:
acs_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1765 entries, 0 to 1764
Data columns (total 48 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   tract_geoid                   1765 non-null   object  
 1   total_population              1765 non-null   int64   
 2   total_pop_pov                 1765 non-null   int64   
 3   pop_200_pct_and_over_fpl      1765 non-null   int64   
 4   pop_below_200_pct_fpl         1765 non-null   int64   
 5   share_below_200_pct_fpl       1765 non-null   float64 
 6   pop_5_years_over              1765 non-null   int64   
 7   pop_english_only              1765 non-null   int64   
 8   spanish_english_vw            1765 non-null   int64   
 9   spanish_english_lvw           1765 non-null   int64   
 10  pct_spanish_english_lvw       1765 non-null   float64 
 11  french_english_vw             1765 non-null   int64   
 12  french_english_lvw            1765 non-n

In [76]:
m = acs_gdf[['total_population','pop_5_years_over','geometry']].explore(color='blue')
corridor_buff_gdf.explore(m=m, color='red')

## Clip tracts within study area

In [53]:
corridor_buff_gdf.crs

<Derived Projected CRS: EPSG:26910>
Name: NAD83 / UTM zone 10N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: North America - between 126°W and 120°W - onshore and offshore. Canada - British Columbia; Northwest Territories; Yukon. United States (USA) - California; Oregon; Washington.
- bounds: (-126.0, 30.54, -119.99, 81.8)
Coordinate Operation:
- name: UTM zone 10N
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [54]:
# project to same system as corridor data
acs_gdf_epsg26910 = acs_gdf.to_crs('EPSG:26910')

In [55]:
# perform clip operation
acs_corridor_gdf = gpd.clip(acs_gdf_epsg26910, corridor_buff_gdf)

  clipped.loc[


In [56]:
acs_corridor_gdf.explore()

In [57]:
acs_corridor_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 95 entries, 211 to 62
Data columns (total 48 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   tract_geoid                   95 non-null     object  
 1   total_population              95 non-null     int64   
 2   total_pop_pov                 95 non-null     int64   
 3   pop_200_pct_and_over_fpl      95 non-null     int64   
 4   pop_below_200_pct_fpl         95 non-null     int64   
 5   share_below_200_pct_fpl       95 non-null     float64 
 6   pop_5_years_over              95 non-null     int64   
 7   pop_english_only              95 non-null     int64   
 8   spanish_english_vw            95 non-null     int64   
 9   spanish_english_lvw           95 non-null     int64   
 10  pct_spanish_english_lvw       95 non-null     float64 
 11  french_english_vw             95 non-null     int64   
 12  french_english_lvw            95 non-null 

In [59]:
acs_corridor_gdf[['top_non_english_language','geometry']].explore(column='top_non_english_language')

## Export corridor data

In [60]:
# export geodata
path = os.path.join(work_dir, 'Data', 'low_income_over5_language_at_home_acs2016_2020.geojson')
acs_corridor_gdf.to_file(path, driver='GeoJSON')

In [61]:
csv_cols = [
    'tract_geoid',
    'total_population',
    'total_pop_pov',
    'pop_200_pct_and_over_fpl',
    'pop_below_200_pct_fpl',
    'share_below_200_pct_fpl',
    'pop_5_years_over',
    'pop_english_only',
    'spanish_english_vw',
    'spanish_english_lvw',
    'french_english_vw',
    'french_english_lvw',
    'german_english_vw',
    'german_english_lvw',
    'russian_english_vw',
    'russian_english_lvw',
    'other_indo_english_vw',
    'other_indo_english_lvw',
    'korean_english_vw',
    'korean_english_lvw',
    'chinese_english_vw',
    'chinese_english_lvw',
    'vietnamese_english_vw',
    'vietnamese_english_lvw',
    'tagalog_english_vw',
    'tagalog_english_lvw',
    'other_asian_english_vw',
    'other_asian_english_lvw',
    'arabic_english_vw',
    'arabic_english_lvw',
    'other_unspec_english_vw',
    'other_unspec_english_lvw',
    'pct_spanish_english_lvw',
    'pct_french_english_lvw',
    'pct_german_english_lvw',
    'pct_russian_english_lvw',
    'pct_other_indo_english_lvw',
    'pct_korean_english_lvw',
    'pct_chinese_english_lvw',
    'pct_vietnamese_english_lvw',
    'pct_tagalog_english_lvw',
    'pct_other_asian_english_lvw',
    'pct_arabic_english_lvw',
    'pct_other_unspec_english_lvw',
    'top_non_english_language',
    'low_income',
    'over_5_pct_thres'
]

In [62]:
# export tabular data
path = os.path.join(work_dir, 'Data', 'low_income_over5_language_at_home_acs2016_2020.csv')
acs_corridor_gdf[csv_cols].to_csv(path, index=False)

## Summarize data within study area

Summarize universe populations for each ACS Survey, and populations of speakers of a language other than English at home that speak English very well.

In [63]:
summary_cols = [
 'total_population',
 'total_pop_pov',
 'pop_200_pct_and_over_fpl',
 'pop_below_200_pct_fpl',
 'pop_5_years_over',
 'pop_english_only',
 'spanish_english_vw',
 'spanish_english_lvw',
 'french_english_vw',
 'french_english_lvw',
 'german_english_vw',
 'german_english_lvw',
 'russian_english_vw',
 'russian_english_lvw',
 'other_indo_english_vw',
 'other_indo_english_lvw',
 'korean_english_vw',
 'korean_english_lvw',
 'chinese_english_vw',
 'chinese_english_lvw',
 'vietnamese_english_vw',
 'vietnamese_english_lvw',
 'tagalog_english_vw',
 'tagalog_english_lvw',
 'other_asian_english_vw',
 'other_asian_english_lvw',
 'arabic_english_vw',
 'arabic_english_lvw',
 'other_unspec_english_vw',
 'other_unspec_english_lvw'
]
pop_summaries = acs_corridor_gdf[summary_cols].aggregate('sum').to_frame().T

## Create language shares

In [64]:
pop_share_cols_dictionary = {
    'pop_english_only':'pct_english_only',
    'spanish_english_lvw': 'pct_spanish_english_lvw',
    'french_english_lvw': 'pct_french_english_lvw',
    'german_english_lvw': 'pct_german_english_lvw',
    'russian_english_lvw': 'pct_russian_english_lvw',
    'other_indo_english_lvw': 'pct_other_indo_english_lvw',
    'korean_english_lvw': 'pct_korean_english_lvw',
    'chinese_english_lvw': 'pct_chinese_english_lvw',
    'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
    'tagalog_english_lvw': 'pct_tagalog_english_lvw',
    'other_asian_english_lvw': 'pct_other_asian_english_lvw',
    'arabic_english_lvw': 'pct_arabic_english_lvw',
    'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'
}

In [65]:
# Run share column creation function
pop_summaries = census.create_share_columns(df=pop_summaries,
                                            universe_column='pop_5_years_over', 
                                            share_column_dict=pop_share_cols_dictionary
                                           )

In [66]:
pop_summaries

Unnamed: 0,total_population,total_pop_pov,pop_200_pct_and_over_fpl,pop_below_200_pct_fpl,pop_5_years_over,pop_english_only,spanish_english_vw,spanish_english_lvw,french_english_vw,french_english_lvw,...,pct_german_english_lvw,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw
0,453332,448127,334254,113873,426264,215335,71632,44677,1438,380,...,0.00035,0.001802,0.011756,0.002684,0.045812,0.011469,0.014468,0.006731,0.002461,0.00308


In [67]:
# Rename pop below 200% fpl to population low income
pop_summaries.rename(columns={'pop_below_200_pct_fpl':'pop_low_income'},
                     inplace=True
                    )

In [68]:
# Create low income share
pop_summaries['low_income_pct'] = pop_summaries['pop_low_income'] / pop_summaries['total_pop_pov']

In [69]:
reorder_cols = [
    'total_population',
    'total_pop_pov',
    'pop_200_pct_and_over_fpl',
    'pop_low_income',
    'low_income_pct',
    'pop_5_years_over',
    'pop_english_only',
    'pct_english_only',
    'spanish_english_vw',
    'spanish_english_lvw',
    'pct_spanish_english_lvw',
    'french_english_vw',
    'french_english_lvw',
    'pct_french_english_lvw',
    'german_english_vw',
    'german_english_lvw',
    'pct_german_english_lvw',
    'russian_english_vw',
    'russian_english_lvw',
    'pct_russian_english_lvw',
    'other_indo_english_vw',
    'other_indo_english_lvw',
    'pct_other_indo_english_lvw',
    'korean_english_vw',
    'korean_english_lvw',
    'pct_korean_english_lvw',
    'chinese_english_vw',
    'chinese_english_lvw',
    'pct_chinese_english_lvw',
    'vietnamese_english_vw',
    'vietnamese_english_lvw',
    'pct_vietnamese_english_lvw',
    'tagalog_english_vw',
    'tagalog_english_lvw',
    'pct_tagalog_english_lvw',
    'other_asian_english_vw',
    'other_asian_english_lvw',
    'pct_other_asian_english_lvw',
    'arabic_english_vw',
    'arabic_english_lvw',
    'pct_arabic_english_lvw',
    'other_unspec_english_vw',
    'other_unspec_english_lvw',
    'pct_other_unspec_english_lvw',
]
pop_summaries_final = pop_summaries[reorder_cols].copy()

In [71]:
pop_summaries_final_t = pop_summaries_final.T

In [72]:
pop_summaries_final_t.rename(columns={0:'Summary_Statistics'}, inplace=True)

In [73]:
pop_summaries_final_t

Unnamed: 0,Summary_Statistics
total_population,453332.0
total_pop_pov,448127.0
pop_200_pct_and_over_fpl,334254.0
pop_low_income,113873.0
low_income_pct,0.254109
pop_5_years_over,426264.0
pop_english_only,215335.0
pct_english_only,0.505168
spanish_english_vw,71632.0
spanish_english_lvw,44677.0


In [65]:
# export tabular data
path = os.path.join(work_dir, 'Data', 'study_area_low_income_over5_language_at_home_acs2016_2020.csv')
pop_summaries_final_t.to_csv(path, index=True)