# Census Data Analysis: Mean Age by Census Tract

This notebook analyzes and visualizes mean age data across census tracts using the Census Bureau's API through the `cenpy` library. We'll fetch demographic data, calculate mean age, and create visualizations to understand age distribution patterns across different census tracts.

In [None]:
# Import required libraries
import cenpy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
plt.style.use('default')  # Using default style instead of seaborn
sns.set_theme()  # This will apply seaborn's default theme

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

In [None]:
# Connect to Census API
conn = cenpy.remote.APIConnection('ACSDT5Y2021')  # Using 5-year ACS 2021 data

# Define variables we want to retrieve
variables = [
    'B01002_001E',  # Median age
    'B01001_001E',  # Total population
]

# Get variable descriptions
var_desc = conn.variables.loc[variables]
print("Variables we're retrieving:")
print(var_desc)

In [None]:
# Let's get data for California as an example
# You can change the state FIPS code for different states
ca_tracts = conn.query(
    cols=variables,
    geo_unit='tract',
    geo_filter={'state': '06'}  # FIPS code for California
)

# Convert to numeric and handle missing values
for var in variables:
    ca_tracts[var] = pd.to_numeric(ca_tracts[var], errors='coerce')

# Add readable column names
ca_tracts = ca_tracts.rename(columns={
    'B01002_001E': 'median_age',
    'B01001_001E': 'total_population'
})

print("First few rows of our data:")
print(ca_tracts.head())

print("\nSummary statistics:")
print(ca_tracts.describe())

In [None]:
# Create visualizations

# 1. Distribution of median age
plt.figure(figsize=(12, 6))
sns.histplot(data=ca_tracts, x='median_age', bins=50)
plt.title('Distribution of Median Age Across Census Tracts')
plt.xlabel('Median Age (years)')
plt.ylabel('Count of Census Tracts')
plt.show()

# 2. Scatter plot of median age vs population
plt.figure(figsize=(12, 6))
sns.scatterplot(data=ca_tracts, x='total_population', y='median_age', alpha=0.5)
plt.title('Median Age vs Total Population by Census Tract')
plt.xlabel('Total Population')
plt.ylabel('Median Age (years)')
plt.show()

# Calculate some summary statistics
print("\nKey Statistics:")
print(f"Average median age across all tracts: {ca_tracts['median_age'].mean():.1f} years")
print(f"Youngest tract median age: {ca_tracts['median_age'].min():.1f} years")
print(f"Oldest tract median age: {ca_tracts['median_age'].max():.1f} years")