# I-80 Express Lane START Limited English Proficiency (LEP) Analysis

Using American Community Survey 5-Year Estimates, map top Non-English Languages spoken within the I-80 corridor, identifying low-income census tracts. Additionally, provide tabular data including total tract population, and total low-income population. Provide total population of people that speak English 'Less than Very Well' and the share a given population by languages spoken at home for every tract within the I-80 corridor.

In [33]:
import pandas as pd
import geopandas as gpd
from pathlib import Path
from mtcpy.census import pull_acs_data, create_share_columns

In [34]:
work_dir = Path(
    "/Users/jcroff/Library/CloudStorage/Box-Box/DSA Projects/Spatial Analysis and Mapping/I-80 Express Lane START LEP Analysis/"
)

## Limited English Proficiency Analysis

In [35]:
# read acs table variables
acs_vars = pd.read_csv(
    "data/acs_table_variables_language.csv", dtype={"Table_Id": "str", "Line_Num": "str"}
)

In [36]:
acs_vars

Unnamed: 0,Table_ID,Line_Num,ACS_Variable_Definitnion,ACS_Table_Definition
0,B01003,1,Universe: Total population,Total Population
1,C16001,1,Universe: Population 5 years and over,Language Spoken at Home for the Population 5 Y...
2,C16001,2,Speak only English,Language Spoken at Home for the Population 5 Y...
3,C16001,3,Spanish:,Language Spoken at Home for the Population 5 Y...
4,C16001,4,Speak English very well,Language Spoken at Home for the Population 5 Y...
5,C16001,5,Speak English less than very well,Language Spoken at Home for the Population 5 Y...
6,C16001,6,"French, Haitian, or Cajun:",Language Spoken at Home for the Population 5 Y...
7,C16001,7,Speak English very well,Language Spoken at Home for the Population 5 Y...
8,C16001,8,Speak English less than very well,Language Spoken at Home for the Population 5 Y...
9,C16001,9,German or other West Germanic languages:,Language Spoken at Home for the Population 5 Y...


In [37]:
# Fill leading zeros for line number column to prepare for concatenation 
acs_vars['Line_Num'] = acs_vars['Line_Num'].apply(lambda x: x.zfill(3))

# Concat table id, line number, and add 'E' for estimate
acs_vars['ACS_Table_Variable'] = (acs_vars['Table_ID'].str.strip() + 
                                        '_' + 
                                  acs_vars['Line_Num'] + 'E')

# to list
acs_vars_list = acs_vars['ACS_Table_Variable'].to_list()

In [38]:
acs_data_df = pull_acs_data(
    year=2023,
    acs_type="acs5",
    tbl_prof_type="Detailed",
    variable_list=acs_vars_list,
    geography_level="tract",
)

epc_df = gpd.read_file(
    "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/draft_equity_priority_communities_pba2050plus_acs2022a/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
)

In [39]:
# filter to only include relevant columns
epc_df = epc_df[
    ["tract_geoid", "tot_pop_po", "pop_below2", "pct_below2", "below2_1_2", "geometry"]
].copy()

# Rename EPC columns to be more human-readable
epc_df = epc_df.rename(
    columns={
        "tot_pop_po": "total_pop_poverty",
        "pop_below2": "pop_below_200_pct_fpl",
        "pct_below2": "pct_below_200_pct_fpl",
        "below2_1_2": "below_200_pct_fpl_epc_thres_flag",
    }
)

In [40]:
#Rename columns to human-readable
cols = {'B01003_001E': 'total_population',
        'C16001_001E': 'pop_5_years_over',
        'C16001_002E': 'pop_english_only',
        'C16001_004E': 'spanish_english_vw',
        'C16001_005E': 'spanish_english_lvw',
        'C16001_007E': 'french_english_vw',
        'C16001_008E': 'french_english_lvw',
        'C16001_010E': 'german_english_vw',
        'C16001_011E': 'german_english_lvw',
        'C16001_013E': 'russian_english_vw',
        'C16001_014E': 'russian_english_lvw',
        'C16001_016E': 'other_indo_english_vw',
        'C16001_017E': 'other_indo_english_lvw',
        'C16001_019E': 'korean_english_vw',
        'C16001_020E': 'korean_english_lvw',
        'C16001_022E': 'chinese_english_vw',
        'C16001_023E': 'chinese_english_lvw',
        'C16001_025E': 'vietnamese_english_vw',
        'C16001_026E': 'vietnamese_english_lvw',
        'C16001_028E': 'tagalog_english_vw',
        'C16001_029E': 'tagalog_english_lvw',
        'C16001_031E': 'other_asian_english_vw',
        'C16001_032E': 'other_asian_english_lvw',
        'C16001_034E': 'arabic_english_vw',
        'C16001_035E': 'arabic_english_lvw',
        'C16001_037E': 'other_unspec_english_vw',
        'C16001_038E': 'other_unspec_english_lvw'
}
acs_data_df.rename(columns=cols, inplace=True)

In [41]:
# Create a list of population columns
pop_columns_list = [
    'pop_english_only', 
    'spanish_english_lvw', 
    'french_english_lvw',
    'german_english_lvw',
    'russian_english_lvw',
    'other_indo_english_lvw',
    'korean_english_lvw',
    'chinese_english_lvw',
    'vietnamese_english_lvw',
    'tagalog_english_lvw',
    'other_asian_english_lvw',
    'arabic_english_lvw',
    'other_unspec_english_lvw'
]

# Initialize dictionary with population column names and blank values
pop_share_col_dict = {key: 'pct_' + key for key in pop_columns_list}

In [42]:
acs_data_df = create_share_columns(
    df=acs_data_df, universe_column="pop_5_years_over", share_column_dict=pop_share_col_dict
)

In [43]:
# flag non-english proficient tracts
def flag_threshold(acs_df):
    if (
        (acs_df["pct_spanish_english_lvw"] >= 0.05)
        | (acs_df["pct_french_english_lvw"] >= 0.05)
        | (acs_df["pct_german_english_lvw"] >= 0.05)
        | (acs_df["pct_russian_english_lvw"] >= 0.05)
        | (acs_df["pct_other_indo_english_lvw"] >= 0.05)
        | (acs_df["pct_korean_english_lvw"] >= 0.05)
        | (acs_df["pct_chinese_english_lvw"] >= 0.05)
        | (acs_df["pct_vietnamese_english_lvw"] >= 0.05)
        | (acs_df["pct_tagalog_english_lvw"] >= 0.05)
        | (acs_df["pct_other_asian_english_lvw"] >= 0.05)
        | (acs_df["pct_arabic_english_lvw"] >= 0.05)
        | (acs_df["pct_other_unspec_english_lvw"] >= 0.05)
    ):
        return 1
    else:
        return 0
    
acs_data_df['over_5_pct_thres'] = acs_data_df.apply(flag_threshold, axis=1)

In [44]:
# create dictonary of column names and abbriviated language values
{key: None for key in pop_share_col_dict.values()}

{'pct_pop_english_only': None,
 'pct_spanish_english_lvw': None,
 'pct_french_english_lvw': None,
 'pct_german_english_lvw': None,
 'pct_russian_english_lvw': None,
 'pct_other_indo_english_lvw': None,
 'pct_korean_english_lvw': None,
 'pct_chinese_english_lvw': None,
 'pct_vietnamese_english_lvw': None,
 'pct_tagalog_english_lvw': None,
 'pct_other_asian_english_lvw': None,
 'pct_arabic_english_lvw': None,
 'pct_other_unspec_english_lvw': None}

In [45]:
language_dict = {
    'pct_spanish_english_lvw': 'Spanish',
    'pct_french_english_lvw': 'French, Haitian, or Cajun',
    'pct_german_english_lvw': 'German or West Germanic',
    'pct_russian_english_lvw': 'Russian Polish of Slavic',
    'pct_other_indo_english_lvw': 'Other Indo-European',
    'pct_korean_english_lvw': 'Korean',
    'pct_chinese_english_lvw': 'Chinese',
    'pct_vietnamese_english_lvw': 'Vietnamese',
    'pct_tagalog_english_lvw': 'Tagalog',
    'pct_other_asian_english_lvw': 'Other Asian Pacific Island',
    'pct_arabic_english_lvw': 'Arabic',
    'pct_other_unspec_english_lvw': 'Other Unspecified'
}

In [46]:
# Create a column and classify top non-english language spoken at home
# get share column values from dict and create a list
share_cols = list(pop_share_col_dict.values())
share_cols.remove('pct_pop_english_only')
acs_data_df["top_non_english_language"] = acs_data_df[share_cols].apply(
    lambda row: language_dict[row.idxmax()], axis=1
)

## Join ACS Data to EPC Geodata

In [47]:
# Join ACS Data to EPC Geodata
merge_gdf = pd.merge(epc_df, acs_data_df, on="tract_geoid", how="left")

In [48]:
# reorder and filter columns
reorder_cols = [
    "tract_geoid",
    "total_population",
    "total_pop_poverty",
    "pop_5_years_over",
    "pop_below_200_pct_fpl",
    "pct_below_200_pct_fpl",
    "pop_english_only",
    "pct_pop_english_only",
    "spanish_english_vw",
    "spanish_english_lvw",
    "pct_spanish_english_lvw",
    "french_english_vw",
    "french_english_lvw",
    "pct_french_english_lvw",
    "german_english_vw",
    "german_english_lvw",
    "pct_german_english_lvw",
    "russian_english_vw",
    "russian_english_lvw",
    "pct_russian_english_lvw",
    "other_indo_english_vw",
    "other_indo_english_lvw",
    "pct_other_indo_english_lvw",
    "korean_english_vw",
    "korean_english_lvw",
    "pct_korean_english_lvw",
    "chinese_english_vw",
    "chinese_english_lvw",
    "pct_chinese_english_lvw",
    "vietnamese_english_vw",
    "vietnamese_english_lvw",
    "pct_vietnamese_english_lvw",
    "tagalog_english_vw",
    "tagalog_english_lvw",
    "pct_tagalog_english_lvw",
    "other_asian_english_vw",
    "other_asian_english_lvw",
    "pct_other_asian_english_lvw",
    "arabic_english_vw",
    "arabic_english_lvw",
    "pct_arabic_english_lvw",
    "other_unspec_english_vw",
    "other_unspec_english_lvw",
    "pct_other_unspec_english_lvw",
    "top_non_english_language",
    "over_5_pct_thres",
    "below_200_pct_fpl_epc_thres_flag",
    "geometry",
]
merge_gdf = merge_gdf[reorder_cols].copy()

## Read Corridor Area and Clip Tracts

In [49]:
fgdb_path = work_dir / "Data" / "I80_Express_Lane_START.gdb"
layer_name = "I80_Corridor_Solano_7_mi_Buffer"

corridor_area_gdf = gpd.read_file(fgdb_path, layer=layer_name)

In [50]:
corridor_area_gdf.to_crs("EPSG:4326").explore()

In [51]:
# perform clip operation
acs_gdf_epsg26910 = merge_gdf.to_crs("EPSG:26910")
acs_corridor_gdf = gpd.clip(acs_gdf_epsg26910, corridor_area_gdf)

In [52]:
m = merge_gdf.explore()
corridor_area_gdf.explore(color="red", m=m)

In [53]:
acs_corridor_gdf.explore()

In [54]:
acs_corridor_gdf[['top_non_english_language','geometry']].explore(column='top_non_english_language')

## Summarize and Export Corridor Data

In [55]:
# summarize data for entire corridor area, and pivot to wide format (demographic variables as columns and values as rows)
summary_cols = [
    "total_population",
    "total_pop_poverty",
    "pop_5_years_over",
    "pop_below_200_pct_fpl",
    "pop_english_only",
    "spanish_english_vw",
    "spanish_english_lvw",
    "french_english_vw",
    "french_english_lvw",
    "german_english_vw",
    "german_english_lvw",
    "russian_english_vw",
    "russian_english_lvw",
    "other_indo_english_vw",
    "other_indo_english_lvw",
    "korean_english_vw",
    "korean_english_lvw",
    "chinese_english_vw",
    "chinese_english_lvw",
    "vietnamese_english_vw",
    "vietnamese_english_lvw",
    "tagalog_english_vw",
    "tagalog_english_lvw",
    "other_asian_english_vw",
    "other_asian_english_lvw",
    "arabic_english_vw",
    "arabic_english_lvw",
    "other_unspec_english_vw",
    "other_unspec_english_lvw",
]
summary_df = acs_corridor_gdf[summary_cols].sum().reset_index()
summary_df = summary_df.rename(columns={"index": "demographic_variable", 0: "total"})
pivoted_summary_df = summary_df.pivot_table(columns='demographic_variable', values='total').reset_index(drop=True)

In [56]:
# Run share column creation function
pivoted_summary_df = create_share_columns(
    df=pivoted_summary_df, universe_column="pop_5_years_over", share_column_dict=pop_share_col_dict
)

In [57]:
# Run share column creation function for low income
pivoted_summary_df = create_share_columns(
    df=pivoted_summary_df,
    universe_column="total_pop_poverty",
    share_column_dict={"pop_below_200_pct_fpl": "pct_below_200_pct_fpl"},
)

In [58]:
summary_reorder_cols = reorder_cols.copy()
rm_cols = [
    "tract_geoid",
    "top_non_english_language",
    "over_5_pct_thres",
    "below_200_pct_fpl_epc_thres_flag",
    "geometry",
]
for col in rm_cols:
    if col in summary_reorder_cols:
        summary_reorder_cols.remove(col)
pivoted_summary_df = pivoted_summary_df[summary_reorder_cols].copy()

In [59]:
# export data
geojson_path = work_dir / "Data" / "Language_at_Home_Low_Income_Tracts_acs2019_2023.geojson"
excel_path = work_dir / "Data" / "Language_at_Home_Low_Income_Tracts_acs2019_2023.xlsx"
excel_cols = [col for col in acs_corridor_gdf.columns if col != 'geometry']

# Export GeoJSON
acs_corridor_gdf.to_file(geojson_path, driver="GeoJSON")

# Export Excel with multiple sheets using ExcelWriter
with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
    acs_corridor_gdf[excel_cols].to_excel(writer, sheet_name="tracts", index=False)
    pivoted_summary_df.to_excel(writer, sheet_name="corridor_summary", index=False)