In [56]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [57]:
shortage_tracts_gdf = gpd.read_file('./data/shortage_tracts_gdf_1pcp220.geojson')
counties_gdf = gpd.read_file('./data/32counties.geojson')
counties_gdf = counties_gdf.to_crs(shortage_tracts_gdf.crs)
shortage_tracts_gdf["COUNTYFP"] = "36" + shortage_tracts_gdf["COUNTYFP"]

In [59]:
# Step 1: Group by 'COUNTYFP' and sum multiple columns
columns_to_sum = ['unserved_medicaid', 'unserved_commercial']
summed_data = shortage_tracts_gdf.groupby('COUNTYFP')[columns_to_sum].sum().reset_index()

# Step 2: Merge the summed data with 'counties_gdf'
merged_gdf = counties_gdf.merge(summed_data, how='left', left_on='FIPS_CODE', right_on='COUNTYFP')
merged_gdf = merged_gdf[[
    'NAME', 'Shape_Area', 'geometry', 'FIPS_CODE', 'unserved_medicaid', 'unserved_commercial'
]]
merged_gdf.rename(columns={'FIPS_CODE': 'id'}, inplace=True)


# Caclculate for unserved population / km2
merged_gdf['unserved_medicaid_per_km2'] = (merged_gdf['unserved_medicaid'] / merged_gdf['Shape_Area'] * 1000000).round(2)
merged_gdf['unserved_commercial_per_km2'] = (merged_gdf['unserved_commercial'] / merged_gdf['Shape_Area'] * 1000000).round(2)
merged_gdf['unserved_population_per_km2'] = merged_gdf['unserved_medicaid_per_km2'] + merged_gdf['unserved_commercial_per_km2']

In [63]:
# Reproject the GeoDataFrame to EPSG:4326
merged_gdf = merged_gdf.to_crs(epsg=4326)

# Replace NaN with 0
merged_gdf = merged_gdf.fillna(0)

In [65]:
merged_gdf.to_file('shortage_counties.geojson', driver='GeoJSON')