In [None]:
import pandas as pd
import requests

# ==========================
# CONFIGURATION
# ==========================
API_KEY = "***********************"  # Replace with your Census API key
OUTPUT_CSV = "census_zip_pop.csv"

# Variables to pull from ACS 5-year 2022
# B01003_001E = Total population
# B17001_002E = Population below poverty
# B19013_001E = Median household income
# B25070_001E = Median gross rent
VARS = ["B01003_001E", "B17001_002E", "B19013_001E", "B25070_001E"]

# Endpoint for ACS 5-year ZCTA
BASE_URL = "https://api.census.gov/data/2022/acs/acs5"

# ==========================
# FETCH DATA
# ==========================
params = {
    "get": ",".join(VARS),
    "for": "zip code tabulation area:*",
    "key": API_KEY
}

print("Fetching Census data...")
response = requests.get(BASE_URL, params=params)
response.raise_for_status()  # Raise error if request fails

data = response.json()
columns = data[0]
rows = data[1:]

df = pd.DataFrame(rows, columns=columns)

# ==========================
# CLEAN AND PROCESS
# ==========================
# Convert numeric columns
num_cols = VARS
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Rename columns
df.rename(columns={
    "B01003_001E": "population",
    "B17001_002E": "poverty_count",
    "B19013_001E": "median_income",
    "B25070_001E": "median_gross_rent",
    "zip code tabulation area": "zipcode"
}, inplace=True)

# Compute derived metrics
df["poverty_rate"] = df["poverty_count"] / df["population"]
df["rent_burden"] = df["median_gross_rent"] / df["median_income"]

# Keep only useful columns
df = df[["zipcode", "population", "poverty_rate", "median_income", "median_gross_rent", "rent_burden"]]

# Sort by ZIP
df = df.sort_values("zipcode").reset_index(drop=True)

# Save to CSV
df.to_csv(OUTPUT_CSV, index=False)
print(f"Census data saved to {OUTPUT_CSV}")


Fetching Census data...
Census data saved to census_zip_pop.csv


In [3]:
boston_zips = ['02026', '02108', '02109', '02110', '02111', '02113', '02114', '02115', '02116', '02118', '02119', '02120', '02121', '02122', '02124', '02125', '02126', '02127', '02128', '02129', '02130', '02131', '02132', '02134', '02135', '02136', '02152', '02163', '02186', '02199', '02210', '02215', '02445', '02446', '02458', '02467']

# The full US ZCTA shapefile

- I downloaded this from the Census website:
https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html

- Then created a GeoJSON of just Boston ZIPs


In [4]:
import geopandas as gpd

# Load the full US ZCTA shapefile
shp_path = "/Users/dan/Downloads/tl_2025_us_zcta520/tl_2025_us_zcta520.shp"  # adjust path if inside zip
zcta_gdf = gpd.read_file(shp_path)

# Ensure ZIP code column is string and zero-padded
zcta_gdf['ZCTA5CE20'] = zcta_gdf['ZCTA5CE20'].astype(str).str.zfill(5)


# Filter only Boston ZIPs
boston_gdf = zcta_gdf[zcta_gdf['ZCTA5CE20'].isin(boston_zips)].copy()

# Save as GeoJSON for easy plotting
boston_gdf.to_file("boston_zcta.geojson", driver="GeoJSON")

print("Saved Boston ZCTA GeoJSON with", len(boston_gdf), "ZIPs")

Saved Boston ZCTA GeoJSON with 36 ZIPs
