This file assigns IRA-status to BGs and computes the share of neighboring BG with disadvantaged status.

In [None]:
import pandas as pd
import numpy as np
import geopandas as geopd
from shapely.ops import unary_union
import us
from tqdm import tqdm
import state_name_crs_mappings_ML as crsm
import matplotlib.pyplot as plt

In [None]:
root = ''
path = root + 'Data/'
path_US_data = root + 'Data/geodata/'
result_path = root + 'final_data/'
path_IRA = root + 'Data/IRA/1.0-shapefile-codebook/usa/'

In [None]:
# All states
states = []
for state in us.states.STATES:
    states +=[state.abbr]
states += ['DC']
print(states)

# Label BGs whether disadvantaged according to IRA

In [None]:
# Assign BG-level to IRA/CT
for state in (states):
    print(state)
    if state == 'DC':
        state_full = 'District of Columbia'
        fips = '11'
    else:
        state_full = us.states.lookup(state).name
        fips = us.states.lookup(state).fips
    # Read BG shapefile
    file_bg = path_US_data + 'tl_bg/tl_2020_'+fips+'_bg/tl_2020_'+fips+'_bg.shp'
    gdf_bg = geopd.read_file(file_bg)
    gdf_bg = gdf_bg[['GEOID','geometry']]
    gdf_bg.rename(columns={'GEOID':'BGFIPS'},inplace=True)
    # Read IRA data
    gdf_ira_state = geopd.read_file(path_IRA + state_full.replace(' ','') + '.shp')
    gdf_ira_state = gdf_ira_state.loc[gdf_ira_state['SN_C'] == 1] # Only IRA BGs
    gdf_ira_state = gdf_ira_state.to_crs(crs=gdf_bg.crs)
    # Sjoin
    gdf_BG_IRA = geopd.sjoin(gdf_bg, gdf_ira_state, how='left', predicate='intersects')
    # Assign share of IRA territory
    gdf_bg['share_IRA'] = np.nan
    BGFIPS_noIRA = gdf_BG_IRA.loc[gdf_BG_IRA['index_right'].isna()].BGFIPS.unique()
    gdf_bg.loc[gdf_bg['BGFIPS'].isin(BGFIPS_noIRA),'share_IRA'] = 0.
    # Iterate over BGs
    BGFIPS_IRA = gdf_BG_IRA.loc[~gdf_BG_IRA['index_right'].isna()].BGFIPS.unique()
    ira_only_state = unary_union(gdf_ira_state['geometry'])
    gdf_bg.set_index('BGFIPS', inplace=True)
    for BGFIPS in tqdm(BGFIPS_IRA):
        geometry_i = gdf_bg.loc[BGFIPS].geometry
        area_all = geometry_i.area
        area_ira = geometry_i.intersection(ira_only_state).area
        gdf_bg.loc[BGFIPS,'share_IRA'] = area_ira/area_all
    gdf_bg['is_disadvantaged_IRA'] = 0
    gdf_bg.loc[gdf_bg['share_IRA'] > 0.8,'is_disadvantaged_IRA'] = 1
    # Save
    gdf_bg.drop(columns=['geometry'], inplace=True)
    gdf_bg.to_csv(result_path + 'BGlevel/level_BG_'+state+'_IRA.csv')


# IRA neighbours

In [None]:
# IRA file
file = 'usa.shp'
gdf_ira = geopd.read_file(path_IRA + file)
len(gdf_ira)

In [None]:
# Only disadvantaged communities
gdf_ira_dis = gdf_ira.loc[gdf_ira['SN_C'] == 1]
len(gdf_ira_dis)

In [None]:
# Combine all IRA areas to one geometry
IRA_area = gdf_ira_dis.unary_union

In [None]:
# Find nb states
# nb_states_FIPS = list(gdf_bg_inclnb.STATEFP.unique())
# nb_states = [us.states.lookup(fips).name for fips in nb_states_FIPS]
# print(nb_states)

In [None]:
# Distances
distances = [10,50]

In [None]:
# Reproject to relevant metric system
for state in tqdm(states):

    # State
    if state == 'DC':
        state_full = 'District of Columbia'
        fips = '11'
    else:
        state_full = us.states.lookup(state).name
        fips = us.states.lookup(state).fips
    crs = crsm.us_state_to_crs[state_full]

    # Read BG shapefile
    file_bg = path_US_data + 'tl_bg/tl_2020_'+fips+'_bg/tl_2020_'+fips+'_bg.shp'
    gdf_bg = geopd.read_file(file_bg)
    gdf_bg = gdf_bg[['GEOID','geometry']]
    gdf_bg.rename(columns={'GEOID':'BGFIPS'},inplace=True)
    gdf_bg = gdf_bg.to_crs(crs=crs)

    # Read nb BGs
    try:
        gdf_bg_inclnb = geopd.read_file(result_path + 'neighboring_bgs/tl_2020_'+fips+'_bg_neighbors.shp')
    except:
        # If there are no neighbours, there is no file (AK for instance)
        gdf_bg_inclnb = geopd.read_file(path_US_data + 'tl_bg/tl_2020_'+fips+'_bg/tl_2020_'+fips+'_bg.shp')
    gdf_bg_inclnb = gdf_bg_inclnb.to_crs(crs=crs)
    land_area = gdf_bg_inclnb['geometry'].union_all()

    # Clip for IRA area
    # Filter for states in gdf_bg_inclnb
    nb_states_FIPS = list(gdf_bg_inclnb.STATEFP.unique())
    # nb_states = [us.states.lookup(fips).name for fips in nb_states_FIPS]
    nb_states = []
    for fips in nb_states_FIPS:
        if fips == '11':
            nb_states += ['District of Columbia']
        else:
            nb_states += [us.states.lookup(fips).name]
    gdf_ira_dis_nbstates = gdf_ira_dis[gdf_ira_dis['SF'].isin(nb_states)]
    
    if len(nb_states) == len(gdf_ira_dis_nbstates['SF'].unique()):
        # Then convert crs
        gdf_ira_dis_nbstates = gdf_ira_dis_nbstates.to_crs(crs=crs)
        IRA_area_state = gdf_ira_dis_nbstates.union_all()

        # Look at nb areas
        for d in distances:
            # Create buffers (can extend beyond state borders + into the sea)
            # print('Buffering')
            gdf_bg_buffer = gdf_bg.copy()
            gdf_bg_buffer['geometry'] = gdf_bg_buffer['geometry'].buffer(d*1000) # 10,000m buffer
            
            # Clip to land borders to get rid of seaside areas
            # print('Clip to land borders')
            gdf_bg_buffer['geometry'] = gdf_bg_buffer['geometry'].clip(land_area) # intersection
            # Deduct original BG geometry
            gdf_bg_buffer['geometry'] = gdf_bg_buffer['geometry'].difference(gdf_bg['geometry'])
            # Compute nb area
            gdf_bg['neighbArea_'+str(d)+'km'] = gdf_bg_buffer['geometry'].area
            
            # Clip buffered area to IRA area
            # print('Intersect IRA area with buffer')
            gdf_bg_buffer['geometry'] = gdf_bg_buffer['geometry'].intersection(IRA_area_state)
            gdf_bg['neighbAreaIRA_'+str(d)+'km'] = gdf_bg_buffer['geometry'].area
            # Compute share of IRA area
            gdf_bg['shareIRA_'+str(d)+'km'] = gdf_bg['neighbAreaIRA_'+str(d)+'km']/gdf_bg['neighbArea_'+str(d)+'km']
            
        # Save
        df_bg = gdf_bg.drop(columns=['geometry'])
        df_bg.to_csv(result_path + 'BGlevel/level_BG_'+state+'_nbIRA.csv')
    else:
        print('Not all states are in IRA data')
        print(state + ' not compiled')
