# Week 3: the modifiable areal unit problem (MAUP), change over time

In [None]:
!pip -q install maup census

In [None]:
%config InlineBackend.figure_formats = ["retina"]

import matplotlib.pyplot as plt
import maup
import pandas as pd
import geopandas as gpd

from math import isnan
from census import Census
from collections import Counter

## Introduction to Census blocks

In [None]:
state_fips = "36"    # New York
county_fips = "047"
county_name = "Kings County"  # contains Ithaca

In [None]:
census = Census("", year=2020)

In [None]:
block_gdf = gpd.read_file(f"https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/tl_2024_{state_fips}_tabblock20.zip")
block_gdf = block_gdf.to_crs("EPSG:2263").set_index("GEOID20")

In [None]:
county_block_gdf = block_gdf[block_gdf.COUNTYFP20 == county_fips]

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.set_title(f"{county_name} (blocks)", fontsize=18)
ax.axis('off')
county_block_gdf.plot(ax=ax, edgecolor="0.1", linewidth=1, color="#e1f1fd")
plt.axis('off')
plt.show()

### Block-level population data

In [None]:
p1_population_columns = {
    "P1_003N": "white",	      # White alone
    "P1_004N": "black",	      # Black or African American alone
    "P1_005N": "amin",        # American Indian and Alaska Native alone
    "P1_006N": "asian",       # Asian alone
    "P1_007N": "nhpi",        # Native Hawaiian and Other Pacific Islander alone
    "P1_008N": "other",       # Some Other Race alone
    "P1_009N": "two_or_more", # Two or more races
}

In [None]:
block_populations = census.pl.get(
    ("NAME", *p1_population_columns),
    geo={
        "for": "block:*",
        "in": f"county:{county_fips} state:{state_fips}",
    }
)

In [None]:
race_df = pd.DataFrame(block_populations).rename(
    columns={"NAME": "name", **p1_population_columns}
)

In [None]:
race_df["GEOID20"] = (
    race_df["state"]
    + race_df["county"]
    + race_df["tract"]
    + race_df["block"]
)
race_df = race_df.set_index("GEOID20").drop(columns=["name", "state", "county", "tract", "block"])

In [None]:
categories = list(p1_population_columns.values())

In [None]:
race_df["total"] = race_df[categories].sum(axis=1)

In [None]:
race_df

In [None]:
race_with_pcts_df = race_df.copy()

In [None]:
for col in categories:
    race_with_pcts_df[f"{col}_pct"] = (100 * race_df[col] / race_df["total"]).fillna(0)

In [None]:
race_with_pcts_df

In [None]:
county_block_with_race_gdf = county_block_gdf.join(race_with_pcts_df)
county_block_with_race_populated_gdf = county_block_with_race_gdf[county_block_with_race_gdf.total > 0]

In [None]:
choropleth_style = dict(
    edgecolor="0.1",
    linewidth=0.2,
    cmap="Blues",
    legend=True,
    legend_kwds={'shrink': 0.4},
)

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.axis('off')
ax.set_title(f"{county_name} Black population % (blocks)", fontsize=18)
county_block_with_race_populated_gdf.plot(
    ax=ax,
    column="black_pct",
    vmin=0,
    vmax=100,
    **choropleth_style,
)
plt.show()

## Scale effects: blocks vs. tracts

In [None]:
tract_gdf = gpd.read_file(f"https://www2.census.gov/geo/tiger/TIGER2024/TRACT/tl_2024_{state_fips}_tract.zip")
tract_gdf = tract_gdf.to_crs("EPSG:2263").set_index("GEOID")

In [None]:
county_tract_gdf = tract_gdf[tract_gdf.COUNTYFP == county_fips]

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.axis('off')
ax.set_title(f"{county_name} (tracts)", fontsize=18)
county_tract_gdf.plot(ax=ax, edgecolor="0.1", linewidth=1, color="#e1f1fd")
plt.axis('off')
plt.show()

In [None]:
county_tract_gdf.iloc[0]

In [None]:
county_block_with_race_gdf["tract"] = county_block_with_race_gdf.index.str.slice(0, 11)

In [None]:
county_tract_race_df = county_block_with_race_gdf[[*categories, "total", "tract"]].groupby("tract").sum()
county_tract_race_df

In [None]:
county_tract_with_race_gdf = county_tract_gdf.join(county_tract_race_df)
county_tract_with_race_gdf

In [None]:
for col in categories:
    county_tract_with_race_gdf[f"{col}_pct"] = (100 * county_tract_with_race_gdf[col] / county_tract_with_race_gdf["total"]).fillna(0)

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.axis('off')
ax.set_title(f"{county_name} Black population % (tracts)", fontsize=18)
county_tract_with_race_gdf.plot(
    ax=ax,
    column="asian_pct",
    vmin=0,
    vmax=100,
    **choropleth_style,
)
plt.show()

In [None]:
for col in categories:
    fig, axes = plt.subplots(1, 2, figsize=(40, 20))
    axes[0].axis('off')
    axes[0].set_title(f"{col} % (blocks)", fontsize=18)
    
    vmax = county_block_with_race_populated_gdf[f"{col}_pct"].quantile(.999)
    
    county_block_with_race_populated_gdf.plot(
        ax=axes[0],
        column=f"{col}_pct",
        vmin=0,
        vmax=vmax,
        **choropleth_style,
    )
    
    axes[1].axis('off')
    axes[1].set_title(f"{col} % (block groups)", fontsize=18)
    county_tract_with_race_gdf.plot(
        ax=axes[1],
        column=f"{col}_pct",
        vmin=0,
        vmax=vmax,
        **choropleth_style,
    )
    
    plt.show()

## Introduction to the American Community Survey (ACS)

In [None]:
# Estimate!!Median age --!!Total:	
median_age_column = "B01002_001E"

# Estimate!!Median income in the past 12 months --!!Total:	
median_income_column = "B06011_001E"

In [None]:
county_tract_acs_df = pd.DataFrame(
    census.acs5.get(
        (median_age_column, median_income_column),
        geo={
            "for": "tract:*",
            "in": f"county:{county_fips} state:{state_fips}",
        },
        year=2023,
    )
)
county_tract_acs_df["GEOID20"] = (
    county_tract_acs_df["state"]
    + county_tract_acs_df["county"]
    + county_tract_acs_df["tract"]
)
county_tract_acs_df = county_tract_acs_df.set_index("GEOID20").drop(
    columns=["state", "county", "tract"]
).rename(
    columns={
        median_age_column: "median_age",
        median_income_column: "median_income",
    }
)
county_tract_acs_df

In [None]:
county_tract_with_acs_gdf = county_tract_with_race_gdf.join(county_tract_acs_df)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(40, 20))
axes[0].axis('off')
axes[0].set_title(f"{county_name}: median age", fontsize=18)

county_tract_with_acs_gdf.plot(
    ax=axes[0],
    column="median_age",
    **choropleth_style,
    vmin=0,
)

axes[1].axis('off')
axes[1].set_title(f"{county_name}: median income ($)", fontsize=18)
county_tract_with_acs_gdf.plot(
    ax=axes[1],
    column="median_income",
    **choropleth_style,
    vmin=0,
)

plt.show()

## Introduction to precincts

In [None]:
!curl -OL https://github.com/PUBPOL-2130/notebooks/raw/refs/heads/main/data/week3-NY-precincts.zip
!unzip week3-NY-precincts.zip

In [None]:
ny_precinct_gdf = gpd.read_file("week3-NY-precincts")

In [None]:
ny_precinct_gdf

In [None]:
sen_columns = [col for col in ny_precinct_gdf if col.startswith("USSen_")]

In [None]:
ny_precinct_gdf["USSen_total"] = ny_precinct_gdf[sen_columns].sum(axis=1)
sen_columns.append("USSen_total")

In [None]:
county_precinct_gdf = ny_precinct_gdf[ny_precinct_gdf.CountyFP == county_fips]
county_precinct_gdf.geometry = county_precinct_gdf.geometry.buffer(0)

In [None]:
county_precinct_gdf

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.set_title(f"{county_name} (precincts)", fontsize=18)
ax.axis('off')
county_precinct_gdf.plot(ax=ax, edgecolor="0.1", linewidth=1, color="#e1f1fd")
plt.axis('off')
plt.show()

In [None]:
block_to_precinct_assignment = maup.assign(county_block_gdf, county_precinct_gdf)
block_to_precinct_assignment = block_to_precinct_assignment[~pd.isna(block_to_precinct_assignment)].astype(int)
block_to_precinct_assignment

In [None]:
pop_weights = (
    county_block_with_race_populated_gdf["total"]
    / block_to_precinct_assignment.map(county_block_with_race_populated_gdf["total"].groupby(block_to_precinct_assignment).sum())
).fillna(0)

In [None]:
pop_weights

In [None]:
prorated_votes_df = maup.prorate(block_to_precinct_assignment, county_precinct_gdf[sen_columns], weights=pop_weights)

In [None]:
prorated_votes_df

In [None]:
county_block_with_election_gdf = county_block_with_race_gdf.join(prorated_votes_df[["USSen_DEM", "USSen_total"]])
county_block_with_election_gdf["sen_dem_pct"] = (
    100 * county_block_with_election_gdf["USSen_DEM"]
    / county_block_with_election_gdf["USSen_total"]
)

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.axis('off')
ax.set_title(f"{county_name} 2022 Senate Dem % (disaggregated to blocks)", fontsize=18)
county_block_with_election_gdf[~pd.isna(county_block_with_election_gdf["sen_dem_pct"])].plot(
    ax=ax,
    column="sen_dem_pct",
    vmin=0,
    vmax=100,
    **choropleth_style,
)
county_precinct_gdf.boundary.plot(
    ax=ax,
    edgecolor="0.1",
    linewidth=2.5,
)
plt.show()

In [None]:
prorated_votes_df["tract"] = prorated_votes_df.index.str.slice(0, 11)

In [None]:
prorated_tract_votes_df = prorated_votes_df.groupby("tract")[sen_columns].sum()
prorated_tract_votes_df

In [None]:
prorated_tract_votes_df["sen_dem_pct"] = (
    100 * prorated_tract_votes_df["USSen_DEM"]
    / prorated_tract_votes_df["USSen_total"]
)

In [None]:
fig, ax = plt.subplots(figsize=(40, 20))
ax.axis('off')
ax.set_title(f"{county_name} 2022 Senate Dem % (aggregated up to tracts)", fontsize=18)
county_tract_gdf.join(prorated_tract_votes_df).plot(
    ax=ax,
    column="sen_dem_pct",
    vmin=0,
    vmax=100,
    **choropleth_style,
)
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(40, 20))
axes[0].axis('off')
axes[0].set_title(f"{county_name}: median age", fontsize=18)

county_tract_with_acs_gdf.plot(
    ax=axes[0],
    column="median_age",
    vmin=0,
    **choropleth_style,
)

axes[1].axis('off')
axes[1].set_title(f"{county_name}: median income ($)", fontsize=18)
county_tract_with_acs_gdf.plot(
    ax=axes[1],
    column="median_income",
    vmin=0,
    **choropleth_style,
)

axes[2].axis('off')
axes[2].set_title(f"{county_name}: 2022 Senate Dem %", fontsize=18)
county_tract_gdf.join(prorated_tract_votes_df).plot(
    ax=axes[2],
    column="sen_dem_pct",
    **choropleth_style,
)
plt.show()