In [553]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import maup
from shapely.geometry import Polygon, MultiPolygon
from shapely.geometry.polygon import orient

# Import dataframes, dictionaries, and functions

In [554]:
economic_columns = ['MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
                     '125K_150K', '150K_MORE']
race_columns = ['TOT_POP22',
                     'NHSP_POP22', 'HSP_POP22', 'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22',
                     'ASN_NHSP22', 'HPI_NHSP22', 'OTH_NHSP22']

In [555]:
def orient_geometry(geom):
    if geom.type == 'Polygon':
        return orient(geom, sign=1.0)
    elif geom.type == 'MultiPolygon':
        return MultiPolygon([orient(part, sign=1.0) for part in geom.geoms])
    else:
        return geom

In [556]:
def aggregate_economic_data(block_gdf, precincts_gdf, variables):
    """
    Parameters:
    - block_gdf (GeoDataFrame): GeoDataFrame containing the economic data per census block
    - precincts_gdf (GeoDataFrame): GeoDataFrame representing precincts to which data will be aggregated
    - variables (list): List of column names to be aggregated.

    Returns:
    - GeoDataFrame: The updated precincts_gdf with aggregated economic data.
    """
    precincts_gdf['geometry'] =  precincts_gdf['geometry'].apply(orient_geometry)
    precincts_gdf =  precincts_gdf.to_crs(epsg=4326)
    block_gdf['geometry'] = block_gdf['geometry'].apply(orient_geometry)
    block_gdf = block_gdf.to_crs(epsg=4326)
    assignment = maup.assign(block_gdf, precincts_gdf)
    
    precincts_gdf[variables] = block_gdf[variables].groupby(assignment).sum()
    
    weighted_sum = (block_gdf['MEDN_INC22'] * block_gdf['TOT_HOUS22']).groupby(assignment).sum()
    total_households = block_gdf['TOT_HOUS22'].groupby(assignment).sum()
    
    precincts_gdf['MEDN_INC22'] = weighted_sum / total_households
    precincts_gdf = precincts_gdf.fillna(0)
    
    return precincts_gdf


In [557]:
def aggregate_racial_data(block_gdf, precincts_gdf, variables):
    """
    Parameters:
    - block_gdf (GeoDataFrame): GeoDataFrame containing the racial data per census block
    - precincts_gdf (GeoDataFrame): GeoDataFrame representing precincts to which data will be aggregated
    - variables (list): List of column names to be aggregated

    Returns:
    - GeoDataFrame: The updated precincts_gdf with aggregated economic data.
    """
    precincts_gdf['geometry'] =  precincts_gdf['geometry'].apply(orient_geometry)
    precincts_gdf =  precincts_gdf.to_crs(epsg=4326)
    block_gdf['geometry'] = block_gdf['geometry'].apply(orient_geometry)
    block_gdf = block_gdf.to_crs(epsg=4326)
    assignment = maup.assign(block_gdf, precincts_gdf)
    
    precincts_gdf[variables] = block_gdf[variables].groupby(assignment).sum()
    
    if 'TOT_POP22' in variables:
        variables.remove('TOT_POP22')
    precincts_gdf['TOT_POP22'] = precincts_gdf[['NHSP_POP22', 'HSP_POP22']].sum(axis=1)
    
    precincts_gdf = precincts_gdf.fillna(0)
    
    return precincts_gdf


# Aggregate Census Block data to precincts for South Carolina:

## Get block level geometry

In [558]:
sc_block_inc_gdf = gpd.read_file('raw/census_block/income/sc_inc_2022_bg_shape_file/sc_inc_2022_bg.shp')
sc_block_inc_gdf['geometry'] = sc_block_inc_gdf ['geometry'].apply(orient_geometry)
sc_block_inc_gdf = sc_block_inc_gdf .to_crs(epsg=4326)
print(len(sc_block_inc_gdf.columns))

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':


24


In [559]:
sc_block_geometry_gdf = sc_block_inc_gdf[['GEOID','STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry']]

## Get precincts level geoDataFrames

In [560]:
sc_precincts_gdf = gpd.read_file('states/south_carolina/geodata/south_carolina_precincts.geojson')
print(len(sc_precincts_gdf.columns))

386


## Get Census Block categories csv

In [561]:
sc_econ_df = pd.read_csv('processed_individual/sc_econ_block.csv')
sc_race_df = pd.read_csv('processed_individual/sc_race_block.csv')

## Merging census block geometry with economical data

In [562]:
sc_econ_df = sc_econ_df.drop(columns=['STATEFP', 'STATE', 'COUNTYFP', 'COUNTY'])
sc_block_geometry_gdf['GEOID'] = sc_block_geometry_gdf['GEOID'].astype(str)
sc_econ_df['GEOID'] = sc_econ_df['GEOID'].astype(str)
sc_block_geometry_gdf = sc_block_geometry_gdf.merge(sc_econ_df, on='GEOID')
print(sc_block_geometry_gdf.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry',
       'MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
       '125K_150K', '150K_MORE'],
      dtype='object')


In [563]:
sc_economic_gdf = sc_block_geometry_gdf[['GEOID', 'geometry'] + economic_columns].copy()
sc_economic_gdf['category'] = 'economic'

In [564]:
sc_block_geometry_gdf['geometry'] = sc_block_geometry_gdf['geometry'].apply(orient_geometry)
final_sc_block_gdf = sc_block_geometry_gdf.to_crs(epsg=4326)

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':


In [565]:
print(sc_economic_gdf.columns)

Index(['GEOID', 'geometry', 'MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K',
       '60K-100K', '100K_125K', '125K_150K', '150K_MORE', 'category'],
      dtype='object')


In [566]:
print(sc_economic_gdf.crs)
print(sc_precincts_gdf.crs)
# economic_gdf['geometry'] = economic_gdf['geometry'].buffer(0)
# precincts_gdf['geometry'] = precincts_gdf['geometry'].buffer(0)

EPSG:4326
EPSG:4326


In [567]:
variables = ['MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
             '125K_150K', '150K_MORE']

In [568]:
sc_precincts_gdf = aggregate_economic_data(final_sc_block_gdf, sc_precincts_gdf, variables)


  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':
  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':

  df = df[df.area > area_cutoff].reset_index(drop=True)

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)


## Merging census block geometry with racial data

In [569]:
sc_race_df = sc_race_df.drop(columns=['STATEFP', 'STATE', 'COUNTYFP', 'COUNTY'])
sc_block_geometry_gdf['GEOID'] = sc_block_geometry_gdf['GEOID'].astype(str)
sc_race_df['GEOID'] = sc_race_df['GEOID'].astype(str)
sc_block_geometry_gdf = sc_block_geometry_gdf.merge(sc_race_df, on='GEOID')
print(sc_block_geometry_gdf.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry',
       'MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
       '125K_150K', '150K_MORE', 'TOT_POP22', 'NHSP_POP22', 'HSP_POP22',
       'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22',
       'OTH_NHSP22'],
      dtype='object')


In [570]:
sc_race_gdf = sc_block_geometry_gdf[['GEOID', 'geometry'] + race_columns].copy()
sc_race_gdf['category'] = 'race'

In [571]:
print(sc_race_gdf.crs)
print(sc_precincts_gdf.crs)

EPSG:4326
EPSG:4326


In [572]:
print(sc_race_gdf.columns)

Index(['GEOID', 'geometry', 'TOT_POP22', 'NHSP_POP22', 'HSP_POP22',
       'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22',
       'OTH_NHSP22', 'category'],
      dtype='object')


In [573]:
variables = ['TOT_POP22', 'NHSP_POP22', 'HSP_POP22',
       'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22',
       'OTH_NHSP22']

In [574]:
sc_precincts_gdf = aggregate_racial_data(sc_race_gdf, sc_precincts_gdf, variables)

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':
  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':

  df = df[df.area > area_cutoff].reset_index(drop=True)

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)


## Output final file

In [575]:
sc_precincts_gdf.to_file("states/south_carolina/geodata/south_carolina_precincts.geojson", driver="GeoJSON")

# Aggregate Census Block data to precincts for Maryland:

## Get block level geometry

In [576]:
md_block_inc_gdf = gpd.read_file('raw/census_block/income/md_inc_2022_bg_shape_file/md_inc_2022_bg.shp')
md_block_inc_gdf['geometry'] = md_block_inc_gdf ['geometry'].apply(orient_geometry)
md_block_inc_gdf = md_block_inc_gdf .to_crs(epsg=4326)
print(len(md_block_inc_gdf.columns))

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':


24


In [577]:
md_block_geometry_gdf = md_block_inc_gdf[['GEOID','STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry']]

## Get precincts level geoDataFrames

In [578]:
md_precincts_gdf = gpd.read_file('states/maryland/geodata/maryland_precincts.geojson')
print(len(md_precincts_gdf.columns))

28


## Get Census Block categories csv

In [579]:
md_econ_df = pd.read_csv('processed_individual/md_econ_block.csv')
md_race_df = pd.read_csv('processed_individual/md_race_block.csv')

## Merging census block geometry with economical data

In [580]:
md_econ_df = md_econ_df.drop(columns=['STATEFP', 'STATE', 'COUNTYFP', 'COUNTY'])
md_block_geometry_gdf['GEOID'] = md_block_geometry_gdf['GEOID'].astype(str)
md_econ_df['GEOID'] = md_econ_df['GEOID'].astype(str)
md_block_geometry_gdf = md_block_geometry_gdf.merge(md_econ_df, on='GEOID')
print(md_block_geometry_gdf.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry',
       'MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
       '125K_150K', '150K_MORE'],
      dtype='object')


In [581]:
md_economic_gdf = md_block_geometry_gdf[['GEOID', 'geometry'] + economic_columns].copy()
md_economic_gdf['category'] = 'economic'

In [582]:
md_block_geometry_gdf['geometry'] = md_block_geometry_gdf['geometry'].apply(orient_geometry)
final_md_block_gdf = md_block_geometry_gdf.to_crs(epsg=4326)

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':


In [583]:
variables = ['MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
             '125K_150K', '150K_MORE']

In [584]:
md_precincts_gdf = aggregate_economic_data(md_economic_gdf, md_precincts_gdf, variables)

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':
  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':

  df = df[df.area > area_cutoff].reset_index(drop=True)

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)


## Merging census block geometry with racial data

In [585]:
md_race_df = md_race_df.drop(columns=['STATEFP', 'STATE', 'COUNTYFP', 'COUNTY'])
md_block_geometry_gdf['GEOID'] = sc_block_geometry_gdf['GEOID'].astype(str)
md_race_df['GEOID'] = md_race_df['GEOID'].astype(str)
md_block_geometry_gdf = md_block_geometry_gdf.merge(md_race_df, on='GEOID')
print(md_block_geometry_gdf.columns)

Index(['GEOID', 'STATEFP', 'STATE', 'COUNTYFP', 'COUNTY', 'geometry',
       'MEDN_INC22', 'TOT_HOUS22', '0_35K', '35K_60K', '60K-100K', '100K_125K',
       '125K_150K', '150K_MORE', 'TOT_POP22', 'NHSP_POP22', 'HSP_POP22',
       'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22',
       'OTH_NHSP22'],
      dtype='object')


In [586]:
md_race_gdf = md_block_geometry_gdf[['GEOID', 'geometry'] + race_columns].copy()
md_race_gdf['category'] = 'race'

In [587]:
variables = ['TOT_POP22', 'NHSP_POP22', 'HSP_POP22',
       'WHT_NHSP22', 'BLK_NHSP22', 'AIA_NHSP22', 'ASN_NHSP22', 'HPI_NHSP22',
       'OTH_NHSP22']

In [588]:
md_precincts_gdf = aggregate_racial_data(md_race_gdf, md_precincts_gdf, variables)

  if geom.type == 'Polygon':
  elif geom.type == 'MultiPolygon':


## Export dataframe to GeoJSON

In [589]:
md_precincts_gdf.to_file("states/maryland/geodata/maryland_precincts.geojson", driver="GeoJSON")