This file prepares the computation of the neighboring income of each BG by creating shapefiles of state BGs plus neighboring states.

In [None]:
import os
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import geopandas as geopds
from tqdm import tqdm
import matplotlib.pyplot as plt
import state_name_crs_mappings_ML as crsm
import us

In [None]:
# File path
root = ''
result_path = root + 'final_data/'
path_geodata = root + 'Data/geodata/'

In [None]:
# Check whether folder exists, if not create it
if not os.path.exists(result_path + 'neighboring_bgs/'):
    os.makedirs(result_path + 'neighboring_bgs/')

# Read original data

In [None]:
# Get socioeconomics per BG (output of 10b)
df_socioecon = pd.read_csv(result_path + 'BGlevel/level_BG.csv')
df_socioecon.head(3)

In [None]:
# COnvert FIPS to string
df_socioecon['BGFP'] = df_socioecon['BGFP'].astype(str).str.zfill(12)
df_socioecon['STATEFP'] = df_socioecon['STATEFP'].astype(str).str.zfill(2)
df_socioecon['COUNTYFP'] = df_socioecon['COUNTYFP'].astype(str).str.zfill(5)

In [None]:
# Set index
df_socioecon.set_index('BGFP', inplace=True)

# Create shapefiles of neighboring BGs

In [None]:
# List all states
states = []
for state in us.states.STATES:
    states +=[state.abbr]
states += ['DC']
len(states)

In [None]:
# Load geodata
gdf_states = geopds.read_file(path_geodata + 'cb_2018_us_state_500k/cb_2018_us_state_500k.shp')

In [None]:
# Compile dataset of neighboring bgs
for state in tqdm(states):
    # FIPS
    if state == 'DC':
        fips = '11'
        state_name = 'District of Columbia'
    else:
        fips = us.states.lookup(state).fips
        state_name = us.states.lookup(fips).name

    # Reproject to relevant metric system
    crs = crsm.us_state_to_crs[state_name]
    gdf_states = gdf_states.to_crs(crs=crs)

    # Buffer 100km
    state_100 = gdf_states[gdf_states['STUSPS'] == state].buffer(100000) # 100km as uppercommuting bound
    print(len(state_100))
    gdf_neighbors = gdf_states.clip(state_100)
    neighbor_fips = list(gdf_neighbors['STATEFP'])
    neighbor_fips.remove(fips)
    # print(neighbor_fips)

    # Read state's bg
    file_bg = path_geodata + 'tl_bg/tl_2020_'+fips+'_bg/tl_2020_'+fips+'_bg.shp'
    gdf_bg = geopds.read_file(file_bg)
    gdf_bg = gdf_bg.to_crs(crs=crs)

    # Get blockgroups of neighboring states
    for nb_fips in tqdm(neighbor_fips):
        # Read bgs
        file_bg_nb = path_geodata + 'tl_bg/tl_2020_'+nb_fips+'_bg/tl_2020_'+nb_fips+'_bg.shp'
        gdf_bg_nb = geopds.read_file(file_bg_nb)
        gdf_bg_nb = gdf_bg_nb.to_crs(crs=crs)
        # Clip
        gdf_bg_nb = gdf_bg_nb.clip(state_100)
        # Concat: state + neighboring state
        gdf_bg = geopds.GeoDataFrame(pd.concat([gdf_bg,gdf_bg_nb],ignore_index=True),crs=crs)
        gdf_bg.to_file(result_path + 'neighboring_bgs/tl_2020_'+fips+'_bg_neighbors.shp')