In [None]:
# ACS Climate & Socioeconomic Analysis

This notebook explores socioeconomic characteristics of U.S. cities using the American Community Survey (ACS) 5-year estimates (2021), with a focus on factors that influence climate resilience.

**Key questions:**
- How does median household income vary across U.S. cities?
- Which cities have high renter populations (less housing stability)?
- How do commute patterns correlate with income and housing?
- Which cities may be structurally vulnerable to climate impacts?

**Future extension:** Overlay NOAA climate risk data (heat, flooding) to identify high-risk populations.

In [6]:
import os
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# display settings
pd.set_option('display.max_columns', 50)
pd.set_option('display.width',120)

# plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12,6)

In [10]:
## Data Source & API Configuration

data_source = "U.S. Census Bureau ACS 5-Year Estimates (2021)" 
geography = "Census Places (cities)"

In [12]:
API_KEY = os.getenv("be1583b7bba6d6bfd6a31b7e809ecdde2acbf1d7")

print("✓ API key loaded")

✓ API key loaded


In [18]:
## Data Acquisition

##"We're pulling city-level data for:"
##"Population"
##"Median household income"
##"Housing tenure (owner vs renter)"
##"Commute methods"


In [19]:
BASE_URL = "https://api.census.gov/data/2021/acs/acs5"

variables = [
    "NAME",
    "B01001_001E",  # Total population
    "B19013_001E",  # Median household income
    "B25003_002E",  # Owner-occupied units
    "B25003_003E",  # Renter-occupied units
    "B08301_001E",  # Total commuters
    "B08301_010E",  # Public transit commuters
    "B08301_019E",  # Walk/bike commuters
]

params = {
    "get": ",".join(variables),
    "for": "place:*",
    "key": API_KEY,
}

response = requests.get(BASE_URL, params=params)
response.raise_for_status()

data = response.json()
print(f"✓ Pulled data for {len(data) - 1} places")

✓ Pulled data for 31908 places


In [20]:
columns = data[0]
rows = data[1:]

df = pd.DataFrame(rows, columns=columns)
df.head()

Unnamed: 0,NAME,B01001_001E,B19013_001E,B25003_002E,B25003_003E,B08301_001E,B08301_010E,B08301_019E,state,place
0,"Abanda CDP, Alabama",231,25787,67,0,92,0,0,1,100
1,"Abbeville city, Alabama",2231,36944,591,331,706,15,7,1,124
2,"Adamsville city, Alabama",4381,46150,1182,308,1546,0,19,1,460
3,"Addison town, Alabama",697,44000,215,81,347,0,0,1,484
4,"Akron town, Alabama",385,28438,79,13,172,0,0,1,676


In [21]:
# Define which columns should be numeric

numeric_cols = [
    "B01001_001E",  # Total population
    "B19013_001E",  # Median household income
    "B25003_002E",  # Owner-occupied units
    "B25003_003E",  # Renter-occupied units
    "B08301_001E",  # Total commuters
    "B08301_010E",  # Public transit commuters
    "B08301_019E",  # Walk/bike commuters
]

# Convert to numeric
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')


# Rename to meaningful names
df.rename(columns={
    "NAME": "Place",
    "B01001_001E": "Total_Population",
    "B19013_001E": "Median_Household_Income",
    "B25003_002E": "Owner_Occupied_Units",
    "B25003_003E": "Renter_Occupied_Units",
    "B08301_001E": "Total_Commuters",
    "B08301_010E": "Public_Transit_Commuters",
    "B08301_019E": "Walk_Bike_Commuters",
}, inplace=True)

# Drop rows with missing income data
df.dropna(subset=["Median_Household_Income"], inplace=True)

print("✓ Data cleaned and prepped for analysis")
df.head()

✓ Data cleaned and prepped for analysis


Unnamed: 0,Place,Total_Population,Median_Household_Income,Owner_Occupied_Units,Renter_Occupied_Units,Total_Commuters,Public_Transit_Commuters,Walk_Bike_Commuters,state,place
0,"Abanda CDP, Alabama",231,25787,67,0,92,0,0,1,100
1,"Abbeville city, Alabama",2231,36944,591,331,706,15,7,1,124
2,"Adamsville city, Alabama",4381,46150,1182,308,1546,0,19,1,460
3,"Addison town, Alabama",697,44000,215,81,347,0,0,1,484
4,"Akron town, Alabama",385,28438,79,13,172,0,0,1,676
