# Express Lanes START Program LEP and Income Analysis 

Using American Community Survey 5-Year Estimates, map top Non-English Languages spoken in
Low-Income Census Tracts in the I-880 corridor. Additionally, provide tabular data including total tract population, and total low-income population. Provide total population of people that speak English 'Less than Very Well' and the share a given population by languages spoken at home for every tract within the I-880 corridor. 

In [1]:
import sys
import getpass
import requests 
import json
import os

user = getpass.getuser()
sys.dont_write_bytecode = True

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Library/CloudStorage/Box-Box/Utility Code'.format(user))

from utils_io import *

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Documents/GitHub/hess_application/data_processing/6 - affh'.format(user))

import census

#Geoimports
import geopandas as gp

In [2]:
work_dir = os.path.join('/Users',
                    user,
                    'Library',
                    'CloudStorage',
                    'Box-Box',
                    'DataViz Projects',
                    'Spatial Analysis and Mapping',
                    'I-880 Express Lane START LEP Analysis'
                   )

## Query American Community Survey API

#### [Census American Community Survey 5-Year Data API Documentation](https://www.census.gov/data/developers/data-sets/acs-5year.html)

In [3]:
census_api_key = getpass.getpass()

········


In [4]:
acs_table_vars = pd.read_csv('data/acs_table_variables_income_language.csv',
                             dtype={'Table_Id':'str','Line_Num':'str'})

In [5]:
#Fill leading zeros for line number column to prepare for concatenation 
acs_table_vars['Line_Num'] = acs_table_vars['Line_Num'].apply(lambda x: x.zfill(3))

In [6]:
#Concat table id, line number, and add 'E' for estimate
acs_table_vars['ACS_Table_Variable'] = (acs_table_vars['Table_ID'].str.strip() + 
                                        '_' + 
                                        acs_table_vars['Line_Num'] + 'E')

In [7]:
acs_table_vars.head(5)

Unnamed: 0,Table_ID,Line_Num,ACS_Variable_Definitnion,ACS_Table_Definition,ACS_Table_Variable
0,B01003,1,Universe: Total population,Total Population,B01003_001E
1,C17002,1,Universe: Total population for whom poverty st...,Ratio of Income to Poverty Level in the Past 1...,C17002_001E
2,C17002,8,2.00 and over,Ratio of Income to Poverty Level in the Past 1...,C17002_008E
3,C16001,1,Universe: Population 5 years and over,Language Spoken at Home for the Population 5 Y...,C16001_001E
4,C16001,2,Speak only English,Language Spoken at Home for the Population 5 Y...,C16001_002E


In [8]:
acs_vars_lst = acs_table_vars['ACS_Table_Variable'].tolist()

In [9]:
acs_df = census.pull_acs_5_year_est_data(census_api_key=census_api_key, 
                                         acs_year=2020, 
                                         select_table_vars=acs_vars_lst
                                        )

In [10]:
#Rename columns to human-readable
cols = {'B01003_001E': 'total_population',
        'C17002_001E': 'total_pop_pov',
        'C17002_008E': 'pop_200_pct_and_over_fpl',
        'C16001_001E': 'pop_5_years_over',
        'C16001_002E': 'pop_english_only',
        'C16001_004E': 'spanish_english_vw',
        'C16001_005E': 'spanish_english_lvw',
        'C16001_007E': 'french_english_vw',
        'C16001_008E': 'french_english_lvw',
        'C16001_010E': 'german_english_vw',
        'C16001_011E': 'german_english_lvw',
        'C16001_013E': 'russian_english_vw',
        'C16001_014E': 'russian_english_lvw',
        'C16001_016E': 'other_indo_english_vw',
        'C16001_017E': 'other_indo_english_lvw',
        'C16001_019E': 'korean_english_vw',
        'C16001_020E': 'korean_english_lvw',
        'C16001_022E': 'chinese_english_vw',
        'C16001_023E': 'chinese_english_lvw',
        'C16001_025E': 'vietnamese_english_vw',
        'C16001_026E': 'vietnamese_english_lvw',
        'C16001_028E': 'tagalog_english_vw',
        'C16001_029E': 'tagalog_english_lvw',
        'C16001_031E': 'other_asian_english_vw',
        'C16001_032E': 'other_asian_english_lvw',
        'C16001_034E': 'arabic_english_vw',
        'C16001_035E': 'arabic_english_lvw',
        'C16001_037E': 'other_unspec_english_vw',
        'C16001_038E': 'other_unspec_english_lvw'
}
acs_df.rename(columns=cols, inplace=True)

In [11]:
acs_df.head(5)

Unnamed: 0,total_population,total_pop_pov,pop_200_pct_and_over_fpl,pop_5_years_over,pop_english_only,C16001_003E,spanish_english_vw,spanish_english_lvw,C16001_006E,french_english_vw,...,other_asian_english_vw,other_asian_english_lvw,C16001_033E,arabic_english_vw,arabic_english_lvw,C16001_036E,other_unspec_english_vw,other_unspec_english_lvw,fipco,tract_geoid
0,4367,4263,3019,4015,2408,754,453,301,0,0,...,5,0,130,74,56,77,77,0,13,6013313102
1,2740,2740,1819,2563,1092,740,514,226,0,0,...,0,0,187,86,101,0,0,0,13,6013313104
2,4701,4681,3464,4566,2777,1324,767,557,0,0,...,47,12,0,0,0,67,67,0,13,6013313105
3,6689,6643,5591,6165,3892,961,482,479,0,0,...,259,18,163,106,57,125,125,0,13,6013313106
4,2656,2651,2207,2452,1778,431,301,130,0,0,...,31,0,0,0,0,0,0,0,13,6013313107


In [12]:
#subset columns
sub_cols= [
    'tract_geoid',
    'total_population',
    'total_pop_pov',
    'pop_200_pct_and_over_fpl',
    'pop_5_years_over',
    'pop_english_only',
    'spanish_english_vw',
    'spanish_english_lvw',
    'french_english_vw',
    'french_english_lvw',
    'german_english_vw',
    'german_english_lvw',
    'russian_english_vw',
    'russian_english_lvw',
    'other_indo_english_vw',
    'other_indo_english_lvw',
    'korean_english_vw',
    'korean_english_lvw',
    'chinese_english_vw',
    'chinese_english_lvw',
    'vietnamese_english_vw',
    'vietnamese_english_lvw',
    'tagalog_english_vw',
    'tagalog_english_lvw',
    'other_asian_english_vw',
    'other_asian_english_lvw',
    'arabic_english_vw',
    'arabic_english_lvw',
    'other_unspec_english_vw',
    'other_unspec_english_lvw'
]
acs_df = acs_df[sub_cols].copy()

## Create share columns

Create columns which represent an estimate of a given language spoken as a share of the universe which is the population of people 5 years and over. 

In [13]:
#1. Calculate low income population
acs_df['pop_below_200_pct_fpl'] = (acs_df['total_pop_pov'] - 
                          acs_df['pop_200_pct_and_over_fpl'])

In [14]:
#2. Calculate share of low income population 
acs_df = census.create_share_columns(df=acs_df, 
                                     universe_column='total_pop_pov', 
                                     share_column_dict={'pop_below_200_pct_fpl':'share_below_200_pct_fpl'}
                                    )

In [15]:
pop_columns_list = [
    'spanish_english_lvw', 
    'french_english_lvw',
    'german_english_lvw',
    'russian_english_lvw',
    'other_indo_english_lvw',
    'korean_english_lvw',
    'chinese_english_lvw',
    'vietnamese_english_lvw',
    'tagalog_english_lvw',
    'other_asian_english_lvw',
    'arabic_english_lvw',
    'other_unspec_english_lvw'
]

In [16]:
#Initialize Dictionary with population column names and blank values
{key: 'pct_' + key for key in pop_columns_list}

{'spanish_english_lvw': 'pct_spanish_english_lvw',
 'french_english_lvw': 'pct_french_english_lvw',
 'german_english_lvw': 'pct_german_english_lvw',
 'russian_english_lvw': 'pct_russian_english_lvw',
 'other_indo_english_lvw': 'pct_other_indo_english_lvw',
 'korean_english_lvw': 'pct_korean_english_lvw',
 'chinese_english_lvw': 'pct_chinese_english_lvw',
 'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
 'tagalog_english_lvw': 'pct_tagalog_english_lvw',
 'other_asian_english_lvw': 'pct_other_asian_english_lvw',
 'arabic_english_lvw': 'pct_arabic_english_lvw',
 'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'}

In [17]:
#Create a dictionary with population columns as keys and share columns as values
pop_share_cols_dictionary = {
    'spanish_english_lvw': 'pct_spanish_english_lvw',
    'french_english_lvw': 'pct_french_english_lvw',
    'german_english_lvw': 'pct_german_english_lvw',
    'russian_english_lvw': 'pct_russian_english_lvw',
    'other_indo_english_lvw': 'pct_other_indo_english_lvw',
    'korean_english_lvw': 'pct_korean_english_lvw',
    'chinese_english_lvw': 'pct_chinese_english_lvw',
    'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
    'tagalog_english_lvw': 'pct_tagalog_english_lvw',
    'other_asian_english_lvw': 'pct_other_asian_english_lvw',
    'arabic_english_lvw': 'pct_arabic_english_lvw',
    'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'
}

In [18]:
#Run share column creation function
acs_df = census.create_share_columns(df=acs_df, 
                                     universe_column='pop_5_years_over', 
                                     share_column_dict=pop_share_cols_dictionary
                                    )

In [19]:
share_cols = list(pop_share_cols_dictionary.values())

In [20]:
acs_df[share_cols].sample(10)

Unnamed: 0,pct_spanish_english_lvw,pct_french_english_lvw,pct_german_english_lvw,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw
588,0.062191,0.0,0.0,0.0,0.011944,0.0,0.021005,0.0,0.0,0.007825,0.0,0.0
740,0.112444,0.0,0.0,0.001499,0.001,0.0,0.0,0.0,0.003998,0.0,0.0,0.0
162,0.009278,0.004343,0.0,0.006909,0.0,0.038689,0.099684,0.003356,0.016581,0.009278,0.0,0.0
238,0.096538,0.0,0.000333,0.0,0.00233,0.0,0.068908,0.0,0.01032,0.01498,0.0,0.00466
1420,0.073373,0.0,0.0,0.0,0.027742,0.003889,0.030853,0.0,0.013223,0.006741,0.0,0.0
628,0.024006,0.0,0.0,0.0,0.0,0.0,0.041322,0.008658,0.0,0.0,0.0,0.0122
1500,0.001334,0.0,0.0,0.01147,0.015738,0.017338,0.147773,0.0,0.0016,0.0,0.0,0.004268
475,0.023499,0.0,0.0,0.0,0.036554,0.002321,0.0,0.0,0.005802,0.0,0.0,0.0
1401,0.036834,0.0,0.001873,0.015358,0.024597,0.002997,0.072169,0.00924,0.027844,0.04882,0.001998,0.0
937,0.009447,0.0,0.000804,0.0,0.003015,0.001407,0.028141,0.001608,0.0,0.00201,0.0,0.002814


## Flag Non-English speaking population greater than 5% threshold

In [21]:
def flag_threshold(acs_df):
    if ((acs_df['pct_spanish_english_lvw'] >= .05) | 
        (acs_df['pct_french_english_lvw'] >= .05) |
        (acs_df['pct_german_english_lvw'] >= .05) |
        (acs_df['pct_russian_english_lvw'] >= .05) |
        (acs_df['pct_other_indo_english_lvw'] >= .05) |
        (acs_df['pct_korean_english_lvw'] >= .05) |
        (acs_df['pct_chinese_english_lvw'] >= .05) |
        (acs_df['pct_vietnamese_english_lvw'] >= .05) |
        (acs_df['pct_tagalog_english_lvw'] >= .05) |
        (acs_df['pct_other_asian_english_lvw'] >= .05) |
        (acs_df['pct_arabic_english_lvw'] >= .05) |
        (acs_df['pct_other_unspec_english_lvw'] >= .05)):
        return 1
    else:
        return 0

In [22]:
acs_df['over_5_pct_thres'] = acs_df.apply(flag_threshold, axis=1)

In [23]:
acs_df[acs_df['over_5_pct_thres'] == 1]

Unnamed: 0,tract_geoid,total_population,total_pop_pov,pop_200_pct_and_over_fpl,pop_5_years_over,pop_english_only,spanish_english_vw,spanish_english_lvw,french_english_vw,french_english_lvw,...,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw,over_5_pct_thres
0,06013313102,4367,4263,3019,4015,2408,453,301,0,0,...,0.000000,0.011208,0.000000,0.013200,0.000000,0.040598,0.000000,0.013948,0.000000,1
1,06013313104,2740,2740,1819,2563,1092,514,226,0,0,...,0.000000,0.093640,0.000000,0.000000,0.000000,0.015997,0.000000,0.039407,0.000000,1
2,06013313105,4701,4681,3464,4566,2777,767,557,0,0,...,0.000000,0.011827,0.000000,0.000000,0.007227,0.020149,0.002628,0.000000,0.000000,1
3,06013313106,6689,6643,5591,6165,3892,482,479,0,0,...,0.000000,0.008435,0.000000,0.013301,0.000000,0.020600,0.002920,0.009246,0.000000,1
4,06013313107,2656,2651,2207,2452,1778,301,130,0,0,...,0.000000,0.016721,0.000000,0.000000,0.011011,0.005302,0.000000,0.000000,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1759,06085512057,4116,4116,3668,3880,1629,806,574,0,0,...,0.000000,0.000000,0.000000,0.000000,0.099485,0.012629,0.000000,0.008505,0.000000,1
1762,06085512100,1468,1468,982,1383,619,360,197,0,0,...,0.000000,0.003615,0.000000,0.030369,0.007954,0.000000,0.018800,0.000000,0.000000,1
1768,06085512310,5328,5328,4191,4669,2984,530,366,0,0,...,0.005354,0.012637,0.001499,0.017563,0.016492,0.014993,0.000000,0.001071,0.000000,1
1769,06085512311,3931,3920,3422,3700,2209,586,413,0,0,...,0.000000,0.008649,0.000000,0.005676,0.005676,0.002703,0.001622,0.000000,0.000000,1


In [24]:
{key: None for key in pop_share_cols_dictionary.values()}

{'pct_spanish_english_lvw': None,
 'pct_french_english_lvw': None,
 'pct_german_english_lvw': None,
 'pct_russian_english_lvw': None,
 'pct_other_indo_english_lvw': None,
 'pct_korean_english_lvw': None,
 'pct_chinese_english_lvw': None,
 'pct_vietnamese_english_lvw': None,
 'pct_tagalog_english_lvw': None,
 'pct_other_asian_english_lvw': None,
 'pct_arabic_english_lvw': None,
 'pct_other_unspec_english_lvw': None}

In [25]:
language_dict = {
    'pct_spanish_english_lvw': 'Spanish',
    'pct_french_english_lvw': 'French, Haitian, or Cajun',
    'pct_german_english_lvw': 'German or West Germanic',
    'pct_russian_english_lvw': 'Russian Polish of Slavic',
    'pct_other_indo_english_lvw': 'Other Indo-European',
    'pct_korean_english_lvw': 'Korean',
    'pct_chinese_english_lvw': 'Chinese',
    'pct_vietnamese_english_lvw': 'Vietnamese',
    'pct_tagalog_english_lvw': 'Tagalog',
    'pct_other_asian_english_lvw': 'Other Asian Pacific Island',
    'pct_arabic_english_lvw': 'Arabic',
    'pct_other_unspec_english_lvw': 'Other Unspecified'
}

In [26]:
#Create a column to classify top non-english language spoken at home
acs_df['top_non_english_language'] = (acs_df[share_cols]
                                      .apply(lambda row: language_dict[row.idxmax()], axis=1)
                                     )

In [27]:
share_cols

['pct_spanish_english_lvw',
 'pct_french_english_lvw',
 'pct_german_english_lvw',
 'pct_russian_english_lvw',
 'pct_other_indo_english_lvw',
 'pct_korean_english_lvw',
 'pct_chinese_english_lvw',
 'pct_vietnamese_english_lvw',
 'pct_tagalog_english_lvw',
 'pct_other_asian_english_lvw',
 'pct_arabic_english_lvw',
 'pct_other_unspec_english_lvw']

In [28]:
view_cols = share_cols
view_cols.append('top_non_english_language')

In [29]:
acs_df[view_cols]

Unnamed: 0,pct_spanish_english_lvw,pct_french_english_lvw,pct_german_english_lvw,pct_russian_english_lvw,pct_other_indo_english_lvw,pct_korean_english_lvw,pct_chinese_english_lvw,pct_vietnamese_english_lvw,pct_tagalog_english_lvw,pct_other_asian_english_lvw,pct_arabic_english_lvw,pct_other_unspec_english_lvw,top_non_english_language
0,0.074969,0.000000,0.000000,0.000000,0.011208,0.000000,0.013200,0.000000,0.040598,0.000000,0.013948,0.000000,Spanish
1,0.088178,0.000000,0.000000,0.000000,0.093640,0.000000,0.000000,0.000000,0.015997,0.000000,0.039407,0.000000,Other Indo-European
2,0.121989,0.000000,0.000000,0.000000,0.011827,0.000000,0.000000,0.007227,0.020149,0.002628,0.000000,0.000000,Spanish
3,0.077697,0.000000,0.005353,0.000000,0.008435,0.000000,0.013301,0.000000,0.020600,0.002920,0.009246,0.000000,Spanish
4,0.053018,0.000000,0.000000,0.000000,0.016721,0.000000,0.000000,0.011011,0.005302,0.000000,0.000000,0.000000,Spanish
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1767,0.045307,0.002844,0.000000,0.002032,0.002438,0.000813,0.005892,0.003454,0.000000,0.001422,0.000000,0.000000,Spanish
1768,0.078389,0.000000,0.000000,0.005354,0.012637,0.001499,0.017563,0.016492,0.014993,0.000000,0.001071,0.000000,Spanish
1769,0.111622,0.000000,0.000000,0.000000,0.008649,0.000000,0.005676,0.005676,0.002703,0.001622,0.000000,0.000000,Spanish
1770,0.009592,0.000000,0.000000,0.000000,0.010791,0.002398,0.022182,0.003597,0.001799,0.000000,0.000000,0.000000,Chinese


## Pull American community Service Tracts Geodata

### Query Census TIGER Tracts to get Bay Area Census Tract geography

#### [Census TIGERweb REST API Documentation](https://tigerweb.geo.census.gov/tigerwebmain/TIGERweb_restmapservice.html)

## Join acs_df to tracts geodata

In [30]:
tracts_gdf = census.pull_census_tracts_geodata(year=2020, cartographic=True)

In [31]:
acs_inc_lang_geo = pd.merge(tracts_gdf,
                            acs_df,
                            on='tract_geoid',
                            how='inner'
                           )

In [32]:
reorder_cols = ['tract_geoid',
                'total_population',
                'total_pop_pov',
                'pop_200_pct_and_over_fpl',
                'pop_below_200_pct_fpl',
                'share_below_200_pct_fpl',
                'pop_5_years_over',
                'pop_english_only',
                'spanish_english_vw',
                'spanish_english_lvw',
                'french_english_vw',
                'french_english_lvw',
                'german_english_vw',
                'german_english_lvw',
                'russian_english_vw',
                'russian_english_lvw',
                'other_indo_english_vw',
                'other_indo_english_lvw',
                'korean_english_vw',
                'korean_english_lvw',
                'chinese_english_vw',
                'chinese_english_lvw',
                'vietnamese_english_vw',
                'vietnamese_english_lvw',
                'tagalog_english_vw',
                'tagalog_english_lvw',
                'other_asian_english_vw',
                'other_asian_english_lvw',
                'arabic_english_vw',
                'arabic_english_lvw',
                'other_unspec_english_vw',
                'other_unspec_english_lvw',
                'pct_spanish_english_lvw',
                'pct_french_english_lvw',
                'pct_german_english_lvw',
                'pct_russian_english_lvw',
                'pct_other_indo_english_lvw',
                'pct_korean_english_lvw',
                'pct_chinese_english_lvw',
                'pct_vietnamese_english_lvw',
                'pct_tagalog_english_lvw',
                'pct_other_asian_english_lvw',
                'pct_arabic_english_lvw',
                'pct_other_unspec_english_lvw',
                'top_non_english_language',
                'over_5_pct_thres',
                'geometry']
acs_lang_geo = acs_inc_lang_geo[reorder_cols].copy()

In [33]:
acs_lang_geo.to_file(os.path.join(work_dir,'data','Language_at_Home_Low_Income_Tracts_acs2016_2020.geojson'),
                     driver='GeoJSON'
                    )

## Read corridor area data

In [34]:
fgdb = os.path.join(work_dir,
                    'arcgis_projects',
                    'FastTrak START Pilot Mapping',
                    'FastTrak START Pilot Mapping.gdb'
                   )

In [35]:
corridor_study_area = gp.read_file(fgdb,
                                   driver='FileGDB',
                                   layer='interstate_880_corridor_study_area')

In [36]:
corridor_area_epsg4326 = corridor_study_area.to_crs('EPSG:4326')

## Clip tracts to study area

In [37]:
acs_clip_gdb = gp.clip(acs_lang_geo, corridor_area_epsg4326)

  clipped.loc[


In [38]:
acs_clip_gdb.explore()

## Summarize data within study area buffer

In [81]:
#Export study area data to csv
export_cols = [
 'tract_geoid',
 'total_population',
 'total_pop_pov',
 'pop_200_pct_and_over_fpl',
 'pop_below_200_pct_fpl',
 'share_below_200_pct_fpl',
 'pop_5_years_over',
 'pop_english_only',
 'spanish_english_vw',
 'spanish_english_lvw',
 'french_english_vw',
 'french_english_lvw',
 'german_english_vw',
 'german_english_lvw',
 'russian_english_vw',
 'russian_english_lvw',
 'other_indo_english_vw',
 'other_indo_english_lvw',
 'korean_english_vw',
 'korean_english_lvw',
 'chinese_english_vw',
 'chinese_english_lvw',
 'vietnamese_english_vw',
 'vietnamese_english_lvw',
 'tagalog_english_vw',
 'tagalog_english_lvw',
 'other_asian_english_vw',
 'other_asian_english_lvw',
 'arabic_english_vw',
 'arabic_english_lvw',
 'other_unspec_english_vw',
 'other_unspec_english_lvw',
 'pct_spanish_english_lvw',
 'pct_french_english_lvw',
 'pct_german_english_lvw',
 'pct_russian_english_lvw',
 'pct_other_indo_english_lvw',
 'pct_korean_english_lvw',
 'pct_chinese_english_lvw',
 'pct_vietnamese_english_lvw',
 'pct_tagalog_english_lvw',
 'pct_other_asian_english_lvw',
 'pct_arabic_english_lvw',
 'pct_other_unspec_english_lvw',
 'top_non_english_language',
 'over_5_pct_thres']
out_path = os.path.join(work_dir, 'data', 'Language_at_Home_Low_Income_Tracts_acs2016_2020.csv')
acs_clip_gdb[export_cols].to_csv(out_path, index=False)

In [83]:
summary_cols = [
 'total_population',
 'total_pop_pov',
 'pop_200_pct_and_over_fpl',
 'pop_below_200_pct_fpl',
 'pop_5_years_over',
 'pop_english_only',
 'spanish_english_vw',
 'spanish_english_lvw',
 'french_english_vw',
 'french_english_lvw',
 'german_english_vw',
 'german_english_lvw',
 'russian_english_vw',
 'russian_english_lvw',
 'other_indo_english_vw',
 'other_indo_english_lvw',
 'korean_english_vw',
 'korean_english_lvw',
 'chinese_english_vw',
 'chinese_english_lvw',
 'vietnamese_english_vw',
 'vietnamese_english_lvw',
 'tagalog_english_vw',
 'tagalog_english_lvw',
 'other_asian_english_vw',
 'other_asian_english_lvw',
 'arabic_english_vw',
 'arabic_english_lvw',
 'other_unspec_english_vw',
 'other_unspec_english_lvw'
]
pop_summaries = acs_clip_gdb[summary_cols].aggregate('sum').to_frame().T

In [84]:
pop_summaries

Unnamed: 0,total_population,total_pop_pov,pop_200_pct_and_over_fpl,pop_below_200_pct_fpl,pop_5_years_over,pop_english_only,spanish_english_vw,spanish_english_lvw,french_english_vw,french_english_lvw,...,vietnamese_english_vw,vietnamese_english_lvw,tagalog_english_vw,tagalog_english_lvw,other_asian_english_vw,other_asian_english_lvw,arabic_english_vw,arabic_english_lvw,other_unspec_english_vw,other_unspec_english_lvw
0,2146648,2110852,1668481,442371,2018853,981222,228017,135340,10159,1667,...,30572,45431,55962,27027,65353,22337,9031,3645,14238,6998


### create language shares

In [85]:
pop_share_cols_dictionary = {
    'pop_english_only':'pct_english_only',
    'spanish_english_lvw': 'pct_spanish_english_lvw',
    'french_english_lvw': 'pct_french_english_lvw',
    'german_english_lvw': 'pct_german_english_lvw',
    'russian_english_lvw': 'pct_russian_english_lvw',
    'other_indo_english_lvw': 'pct_other_indo_english_lvw',
    'korean_english_lvw': 'pct_korean_english_lvw',
    'chinese_english_lvw': 'pct_chinese_english_lvw',
    'vietnamese_english_lvw': 'pct_vietnamese_english_lvw',
    'tagalog_english_lvw': 'pct_tagalog_english_lvw',
    'other_asian_english_lvw': 'pct_other_asian_english_lvw',
    'arabic_english_lvw': 'pct_arabic_english_lvw',
    'other_unspec_english_lvw': 'pct_other_unspec_english_lvw'
}

In [86]:
# Run share column creation function
pop_summaries = census.create_share_columns(df=pop_summaries,
                                            universe_column='pop_5_years_over', 
                                            share_column_dict=pop_share_cols_dictionary
                                           )

In [87]:
# Rename pop below 200% fpl to population low income
pop_summaries.rename(columns={'pop_below_200_pct_fpl':'pop_low_income'},
                     inplace=True
                    )

In [88]:
# Run share column creation function for low income
pop_summaries = census.create_share_columns(df=pop_summaries,
                                            universe_column='total_pop_pov', 
                                            share_column_dict={'pop_low_income':'low_income_pct'}
                                           )

In [89]:
reorder_cols = [
    'total_population',
    'total_pop_pov',
    'pop_200_pct_and_over_fpl',
    'pop_low_income',
    'low_income_pct',
    'pop_5_years_over',
    'pop_english_only',
    'pct_english_only',
    'spanish_english_vw',
    'spanish_english_lvw',
    'pct_spanish_english_lvw',
    'french_english_vw',
    'french_english_lvw',
    'pct_french_english_lvw',
    'german_english_vw',
    'german_english_lvw',
    'pct_german_english_lvw',
    'russian_english_vw',
    'russian_english_lvw',
    'pct_russian_english_lvw',
    'other_indo_english_vw',
    'other_indo_english_lvw',
    'pct_other_indo_english_lvw',
    'korean_english_vw',
    'korean_english_lvw',
    'pct_korean_english_lvw',
    'chinese_english_vw',
    'chinese_english_lvw',
    'pct_chinese_english_lvw',
    'vietnamese_english_vw',
    'vietnamese_english_lvw',
    'pct_vietnamese_english_lvw',
    'tagalog_english_vw',
    'tagalog_english_lvw',
    'pct_tagalog_english_lvw',
    'other_asian_english_vw',
    'other_asian_english_lvw',
    'pct_other_asian_english_lvw',
    'arabic_english_vw',
    'arabic_english_lvw',
    'pct_arabic_english_lvw',
    'other_unspec_english_vw',
    'other_unspec_english_lvw',
    'pct_other_unspec_english_lvw',
]
pop_summaries_final = pop_summaries[reorder_cols].copy()

In [90]:
pop_summaries_final_t = pop_summaries_final.T

In [91]:
pop_summaries_final_t.rename(columns={0:'Summary_Statistics'}, inplace=True)

In [95]:
pop_summaries_final_t.to_csv(os.path.join(work_dir, 
                                          'data',
                                          'Language_at_Home_Low_Income_Summary_Statistics_acs2016_2020.csv'))