In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [2]:
shortage_tracts_gdf = gpd.read_file('./data/shortage_tracts_gdf_1pcp220.geojson')
counties_gdf = gpd.read_file('./data/32counties.geojson')
counties_gdf = counties_gdf.to_crs(shortage_tracts_gdf.crs)
shortage_tracts_gdf["COUNTYFP"] = "36" + shortage_tracts_gdf["COUNTYFP"]

In [3]:
# Step 1: Group by 'COUNTYFP' and sum multiple columns
columns_to_sum = ['unserved_medicaid', 'unserved_commercial']
summed_data = shortage_tracts_gdf.groupby('COUNTYFP')[columns_to_sum].sum().reset_index()

# Step 2: Merge the summed data with 'counties_gdf'
merged_gdf = counties_gdf.merge(summed_data, how='left', left_on='FIPS_CODE', right_on='COUNTYFP')
merged_gdf = merged_gdf[[
    'NAME', 'Shape_Area', 'geometry', 'FIPS_CODE', 'unserved_medicaid', 'unserved_commercial'
]]
merged_gdf.rename(columns={'FIPS_CODE': 'id'}, inplace=True)


# Caclculate for unserved population / km2
merged_gdf['unserved_medicaid_per_km2'] = (merged_gdf['unserved_medicaid'] / merged_gdf['Shape_Area'] * 1000000).round(2)
merged_gdf['unserved_commercial_per_km2'] = (merged_gdf['unserved_commercial'] / merged_gdf['Shape_Area'] * 1000000).round(2)
merged_gdf['unserved_population_per_km2'] = merged_gdf['unserved_medicaid_per_km2'] + merged_gdf['unserved_commercial_per_km2']

In [4]:
# Reproject the GeoDataFrame to EPSG:4326
merged_gdf = merged_gdf.to_crs(epsg=4326)

# Replace NaN with 0
merged_gdf = merged_gdf.fillna(0)

In [65]:
merged_gdf.to_file('shortage_counties.geojson', driver='GeoJSON')

In [6]:
from shapely.geometry import Polygon, MultiPolygon

def largest_polygon(geometry):
    if isinstance(geometry, MultiPolygon):
        # Return the largest polygon by area within the MultiPolygon
        # This will iterate over each polygon within the MultiPolygon to find the one with the largest area
        largest = max(geometry, key=lambda polygon: polygon.area)
        return largest
    elif isinstance(geometry, Polygon):
        # If the geometry is already a Polygon, return it as is
        return geometry
    else:
        # If the geometry is neither Polygon nor MultiPolygon, return it as is or handle accordingly
        return geometry

# Apply the function to each geometry in the GeoDataFrame
edited_gdf = merged_gdf.copy()  # Making a copy to preserve the original data
edited_gdf['geometry'] = edited_gdf['geometry'].apply(largest_polygon)

TypeError: 'MultiPolygon' object is not iterable