In [2]:
# Final Project: Three Years of Mental Health
# Depression and Health Behaviors Across U.S. Counties (2021-2023)
# Hunter Tzou

# =============================================================================
# PART 1: SETUP AND DATA LOADING
# =============================================================================

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns

# Load the three releases
print("Loading datasets...")
places_2023 = pd.read_csv(r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\000 - Datasets\PLACES__Local_Data_for_Better_Health__County_Data_2023_release.csv")
places_2024 = pd.read_csv(r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\000 - Datasets\PLACES__Local_Data_for_Better_Health,_County_Data_2024_release_20251218.csv")
places_2025 = pd.read_csv(r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\000 - Datasets\PLACES__Local_Data_for_Better_Health,_County_Data,_2025_release_20251218.csv")

print("✓ Datasets loaded successfully!")

# =============================================================================
# PART 2: DATA FILTERING AND PREPARATION
# =============================================================================

# Our 6 key measures
key_measures = ['DEPRESSION', 'LPA', 'OBESITY', 'ACCESS2', 'CHECKUP', 'CSMOKING']

# Filter each dataset for our needs
def filter_places_data(df, year):
    """Filter PLACES data for age-adjusted prevalence and key measures"""
    filtered = df[
        (df['Year'] == year) &
        (df['Data_Value_Type'] == 'Age-adjusted prevalence') &
        (df['MeasureId'].isin(key_measures))
    ].copy()
    
    # Select only needed columns
    filtered = filtered[['StateAbbr', 'StateDesc', 'LocationName', 'LocationID', 
                         'MeasureId', 'Data_Value', 'Year']]
    
    return filtered

# Extract data for each year
print("\nFiltering data by year...")
data_2021 = filter_places_data(places_2023, 2021)
data_2022 = filter_places_data(places_2024, 2022)
data_2023 = filter_places_data(places_2025, 2023)

print(f"2021: {len(data_2021)} rows")
print(f"2022: {len(data_2022)} rows")
print(f"2023: {len(data_2023)} rows")

# Combine all years
combined_data = pd.concat([data_2021, data_2022, data_2023], ignore_index=True)
print(f"\nCombined dataset: {len(combined_data)} rows")

# =============================================================================
# PART 3: RESHAPE DATA TO WIDE FORMAT
# =============================================================================

# Pivot to wide format: one row per county per year
wide_data = combined_data.pivot_table(
    index=['LocationID', 'LocationName', 'StateAbbr', 'StateDesc', 'Year'],
    columns='MeasureId',
    values='Data_Value',
    aggfunc='first'
).reset_index()

# Rename columns to be more descriptive
wide_data.columns.name = None
wide_data = wide_data.rename(columns={
    'DEPRESSION': 'depression',
    'LPA': 'physical_inactivity',
    'OBESITY': 'obesity',
    'ACCESS2': 'no_insurance',
    'CHECKUP': 'routine_checkup',
    'CSMOKING': 'smoking'
})

# Remove rows with any missing values
wide_data_clean = wide_data.dropna()

print(f"\nWide format data: {len(wide_data_clean)} county-year observations")
print(f"Counties per year: {len(wide_data_clean[wide_data_clean['Year']==2021])}")

# =============================================================================
# PART 4: CALCULATE CHANGE SCORES (2023 - 2021)
# =============================================================================

# Separate by year
df_2021 = wide_data_clean[wide_data_clean['Year'] == 2021].copy()
df_2023 = wide_data_clean[wide_data_clean['Year'] == 2023].copy()

# Merge to calculate changes
change_data = df_2021.merge(
    df_2023,
    on=['LocationID', 'LocationName', 'StateAbbr', 'StateDesc'],
    suffixes=('_2021', '_2023')
)

# Calculate change scores
change_data['depression_change'] = change_data['depression_2023'] - change_data['depression_2021']
change_data['smoking_change'] = change_data['smoking_2023'] - change_data['smoking_2021']




Loading datasets...


  places_2024 = pd.read_csv(r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\000 - Datasets\PLACES__Local_Data_for_Better_Health,_County_Data_2024_release_20251218.csv")


✓ Datasets loaded successfully!

Filtering data by year...
2021: 18462 rows
2022: 18870 rows
2023: 17748 rows

Combined dataset: 55080 rows

Wide format data: 9176 county-year observations
Counties per year: 3076


  places_2025 = pd.read_csv(r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\000 - Datasets\PLACES__Local_Data_for_Better_Health,_County_Data,_2025_release_20251218.csv")


In [3]:
# Continue with the rest of the filtering...
key_measures = ['DEPRESSION', 'LPA', 'OBESITY', 'ACCESS2', 'CHECKUP', 'CSMOKING']

def filter_places_data(df, year):
    filtered = df[
        (df['Year'] == year) &
        (df['Data_Value_Type'] == 'Age-adjusted prevalence') &
        (df['MeasureId'].isin(key_measures))
    ].copy()
    
    filtered = filtered[['StateAbbr', 'StateDesc', 'LocationName', 'LocationID', 
                         'MeasureId', 'Data_Value', 'Year']]
    
    return filtered

print("Filtering data by year...")
data_2021 = filter_places_data(places_2023, 2021)
data_2022 = filter_places_data(places_2024, 2022)
data_2023 = filter_places_data(places_2025, 2023)

print(f"2021: {len(data_2021)} rows")
print(f"2022: {len(data_2022)} rows")
print(f"2023: {len(data_2023)} rows\n")

# Now let me print the KEY STATS you need for the report:
combined_data = pd.concat([data_2021, data_2022, data_2023], ignore_index=True)

wide_data = combined_data.pivot_table(
    index=['LocationID', 'LocationName', 'StateAbbr', 'StateDesc', 'Year'],
    columns='MeasureId',
    values='Data_Value',
    aggfunc='first'
).reset_index()

wide_data.columns.name = None
wide_data = wide_data.rename(columns={
    'DEPRESSION': 'depression',
    'LPA': 'physical_inactivity',
    'OBESITY': 'obesity',
    'ACCESS2': 'no_insurance',
    'CHECKUP': 'routine_checkup',
    'CSMOKING': 'smoking'
})

wide_data_clean = wide_data.dropna()

print("="*70)
print("KEY STATISTICS FOR YOUR REPORT")
print("="*70)

# Depression by year
print("\n1. DEPRESSION PREVALENCE BY YEAR:")
for year in [2021, 2022, 2023]:
    year_data = wide_data_clean[wide_data_clean['Year'] == year]
    print(f"   {year}: Mean = {year_data['depression'].mean():.2f}%, Std = {year_data['depression'].std():.2f}%, n = {len(year_data)} counties")

# Calculate changes
df_2021 = wide_data_clean[wide_data_clean['Year'] == 2021].copy()
df_2023 = wide_data_clean[wide_data_clean['Year'] == 2023].copy()

change_data = df_2021.merge(
    df_2023,
    on=['LocationID', 'LocationName', 'StateAbbr', 'StateDesc'],
    suffixes=('_2021', '_2023')
)

change_data['depression_change'] = change_data['depression_2023'] - change_data['depression_2021']
change_data['smoking_change'] = change_data['smoking_2023'] - change_data['smoking_2021']

print(f"\n2. OVERALL CHANGE (2021 → 2023):")
print(f"   Average change: +{change_data['depression_change'].mean():.2f} percentage points")
print(f"   Biggest increase: +{change_data['depression_change'].max():.2f} points")
print(f"   Biggest decrease: {change_data['depression_change'].min():.2f} points")

improved = len(change_data[change_data['depression_change'] < 0])
worsened = len(change_data[change_data['depression_change'] > 0])
print(f"   Counties improved: {improved} ({improved/len(change_data)*100:.1f}%)")
print(f"   Counties worsened: {worsened} ({worsened/len(change_data)*100:.1f}%)")

# Correlations
print(f"\n3. SMOKING-DEPRESSION CORRELATION BY YEAR:")
for year in [2021, 2022, 2023]:
    year_data = wide_data_clean[wide_data_clean['Year'] == year]
    corr = year_data['smoking'].corr(year_data['depression'])
    print(f"   {year}: r = {corr:.3f}")

# State-level changes
state_changes = change_data.groupby('StateAbbr')['depression_change'].mean().reset_index()
state_changes = state_changes.sort_values('depression_change')

print(f"\n4. TOP 5 STATES - BIGGEST INCREASES:")
top_increases = state_changes.nlargest(5, 'depression_change')
for idx, row in top_increases.iterrows():
    print(f"   {row['StateAbbr']}: +{row['depression_change']:.2f} points")

print(f"\n5. TOP 5 STATES - BIGGEST DECREASES:")
top_decreases = state_changes.nsmallest(5, 'depression_change')
for idx, row in top_decreases.iterrows():
    print(f"   {row['StateAbbr']}: {row['depression_change']:.2f} points")

print("\n" + "="*70)

Filtering data by year...
2021: 18462 rows
2022: 18870 rows
2023: 17748 rows

KEY STATISTICS FOR YOUR REPORT

1. DEPRESSION PREVALENCE BY YEAR:
   2021: Mean = 23.04%, Std = 3.28%, n = 3076 counties
   2022: Mean = 24.29%, Std = 3.45%, n = 3144 counties
   2023: Mean = 24.06%, Std = 3.46%, n = 2956 counties

2. OVERALL CHANGE (2021 → 2023):
   Average change: +1.36 percentage points
   Biggest increase: +6.40 points
   Biggest decrease: -5.50 points
   Counties improved: 553 (19.2%)
   Counties worsened: 2270 (79.0%)

3. SMOKING-DEPRESSION CORRELATION BY YEAR:
   2021: r = 0.516
   2022: r = 0.462
   2023: r = 0.382

4. TOP 5 STATES - BIGGEST INCREASES:
   SD: +4.49 points
   DE: +4.07 points
   WV: +4.04 points
   HI: +3.94 points
   CA: +3.70 points

5. TOP 5 STATES - BIGGEST DECREASES:
   NJ: -3.01 points
   NY: -2.29 points
   MS: -1.70 points
   RI: -0.62 points
   GA: -0.47 points



In [29]:
# =============================================================================
# VISUALIZATION 1: INTERACTIVE CHOROPLETH MAP (PLOTLY)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 1: Interactive Choropleth Map")
print("="*70)

# Prepare data for map - need FIPS codes
# Extract numeric FIPS from LocationID (format: varies by dataset)
wide_data_clean['fips'] = wide_data_clean['LocationID'].astype(str).str.zfill(5)

# Create formatted county name with state for hover
wide_data_clean['county_state'] = wide_data_clean['LocationName'] + ', ' + wide_data_clean['StateAbbr']

# Create interactive choropleth with animation
fig1 = px.choropleth(
    wide_data_clean,
    geojson="https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json",
    locations='fips',
    color='depression',
    animation_frame='Year',
    color_continuous_scale='YlOrRd',
    range_color=(10, 30),
    scope="usa",
    hover_name='county_state',
    hover_data={
        'fips': False,
        'county_state': False,  # Already in hover_name
        'depression': ':.1f',
        'smoking': ':.1f',
        'StateAbbr': False
    },
    labels={
        'depression': 'Depression %',
        'smoking': 'Smoking %'
    },
    title='Depression Prevalence Across U.S. Counties (2021-2023)'
)

fig1.update_layout(
    geo=dict(bgcolor='rgba(0,0,0,0)'),
    height=600,
    margin=dict(l=0, r=0, t=50, b=0)
)

fig1.write_html("viz1_depression_map.html")
print("✓ Saved: viz1_depression_map.html")

# =============================================================================
# VISUALIZATION 2: INTERACTIVE SCATTER PLOT (PLOTLY)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 2: Interactive Scatter Plot")
print("="*70)

# Create formatted county name with state for hover
if 'county_state' not in wide_data_clean.columns:
    wide_data_clean['county_state'] = wide_data_clean['LocationName'] + ', ' + wide_data_clean['StateAbbr']

# Create animated scatter plot: Smoking vs Depression
fig2 = px.scatter(
    wide_data_clean,
    x='smoking',
    y='depression',
    animation_frame='Year',
    hover_name='county_state',
    hover_data={
        'county_state': False,  # Already in hover_name
        'smoking': ':.1f',
        'depression': ':.1f',
        'obesity': ':.1f',
        'StateAbbr': False
    },
    color='depression',  # Color by depression instead of state
    color_continuous_scale='Reds',
    size='obesity',
    size_max=15,
    opacity=0.6,
    title='Smoking vs Depression Prevalence by County (2021-2023)<br><sub>Point size = Obesity prevalence | Color intensity = Depression level</sub>',
    labels={
        'smoking': 'Smoking Prevalence (%)',
        'depression': 'Depression Prevalence (%)',
        'obesity': 'Obesity %'
    },
    range_x=[5, 45],
    range_y=[10, 35]
)

# Update layout with clear legend
fig2.update_layout(
    height=650,
    coloraxis_colorbar=dict(
        title="Depression<br>Prevalence (%)",
        thicknessmode="pixels",
        thickness=15,
        lenmode="pixels",
        len=300
    ),
    annotations=[
        dict(
            text="<b>How to read this chart:</b><br>" +
                 "• Each dot = one U.S. county<br>" +
                 "• Dot SIZE = Obesity prevalence (bigger = more obesity)<br>" +
                 "• Dot COLOR = Depression level (darker red = more depression)<br>" +
                 "• Hover over any dot to see county details",
            xref="paper", yref="paper",
            x=0.02, y=0.98,
            xanchor='left', yanchor='top',
            showarrow=False,
            bgcolor="rgba(255, 255, 255, 0.8)",
            bordercolor="black",
            borderwidth=1,
            font=dict(size=10)
        )
    ]
)

fig2.write_html("viz2_smoking_depression_scatter.html")
print("✓ Saved: viz2_smoking_depression_scatter.html")

# =============================================================================
# VISUALIZATION 3: SMALL MULTIPLES MAP (STATIC)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 3: Small Multiples Comparison")
print("="*70)

# Create side-by-side static maps for each year
fig3, axes = plt.subplots(1, 3, figsize=(20, 6))

for idx, year in enumerate([2021, 2022, 2023]):
    year_data = wide_data_clean[wide_data_clean['Year'] == year]
    
    # Group by state and get average depression
    state_avg = year_data.groupby('StateAbbr')['depression'].mean().reset_index()
    
    # Create bar chart for this year
    state_avg_sorted = state_avg.sort_values('depression', ascending=False).head(15)
    
    axes[idx].barh(state_avg_sorted['StateAbbr'], state_avg_sorted['depression'], 
                   color='coral')
    axes[idx].set_xlabel('Average Depression Prevalence (%)')
    axes[idx].set_title(f'{year}')
    axes[idx].invert_yaxis()
    axes[idx].grid(axis='x', alpha=0.3)

plt.suptitle('Top 15 States by Depression Prevalence (2021-2023)', 
             fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('viz3_state_comparison.png', dpi=300, bbox_inches='tight')
plt.close()
print("✓ Saved: viz3_state_comparison.png")

# =============================================================================
# VISUALIZATION 4: CHANGE ANALYSIS LINE CHART (STATIC)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 4: Depression Change by State")
print("="*70)

# Calculate state-level changes
state_changes = change_data.groupby('StateAbbr')['depression_change'].mean().reset_index()
state_changes = state_changes.sort_values('depression_change')

# Separate increases and decreases
increases = state_changes[state_changes['depression_change'] > 0]
decreases = state_changes[state_changes['depression_change'] <= 0]

# Create diverging bar chart
fig4, ax = plt.subplots(figsize=(12, 10))

# Plot increases (red) and decreases (green)
ax.barh(increases['StateAbbr'], increases['depression_change'], color='#d62728', label='Increased')
ax.barh(decreases['StateAbbr'], decreases['depression_change'], color='#2ca02c', label='Decreased')

ax.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
ax.set_xlabel('Change in Depression Prevalence (2023 - 2021, percentage points)', fontsize=12)
ax.set_title('Change in Depression Prevalence by State (2021-2023)', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(loc='lower right')
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('viz4_depression_change.png', dpi=300, bbox_inches='tight')
plt.close()
print("✓ Saved: viz4_depression_change.png")

# =============================================================================
# VISUALIZATION 5: CORRELATION HEATMAP COMPARISON (STATIC)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 5: Correlation Comparison")
print("="*70)

# Calculate correlations for 2021 and 2023
variables = ['depression', 'smoking', 'obesity', 'physical_inactivity', 
             'no_insurance', 'routine_checkup']

corr_2021 = df_2021[variables].corr()
corr_2023 = df_2023[variables].corr()

# Create side-by-side heatmaps
fig5, axes = plt.subplots(1, 2, figsize=(16, 7))

# 2021 heatmap
sns.heatmap(corr_2021, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, ax=axes[0], vmin=-1, vmax=1, cbar_kws={'shrink': 0.8})
axes[0].set_title('2021 Correlations', fontsize=14, fontweight='bold', pad=20)

# 2023 heatmap
sns.heatmap(corr_2023, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, ax=axes[1], vmin=-1, vmax=1, cbar_kws={'shrink': 0.8})
axes[1].set_title('2023 Correlations', fontsize=14, fontweight='bold', pad=20)

plt.suptitle('Correlation Matrix Comparison: Health Behaviors and Depression', 
             fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('viz5_correlation_comparison.png', dpi=300, bbox_inches='tight')
plt.close()
print("✓ Saved: viz5_correlation_comparison.png")

# =============================================================================
# VISUALIZATION 6: DISTRIBUTION OF CHANGES (BONUS STATIC)
# =============================================================================

print("\n" + "="*70)
print("CREATING VISUALIZATION 6: Distribution of Depression Changes")
print("="*70)

fig6, ax = plt.subplots(figsize=(12, 6))

# Histogram of depression changes
ax.hist(change_data['depression_change'], bins=50, color='steelblue', 
        edgecolor='black', alpha=0.7)
ax.axvline(x=0, color='red', linestyle='--', linewidth=2, label='No change')
ax.axvline(x=change_data['depression_change'].mean(), color='orange', 
           linestyle='--', linewidth=2, label=f'Mean change: {change_data["depression_change"].mean():.2f}')

ax.set_xlabel('Change in Depression Prevalence (2023 - 2021, percentage points)', fontsize=12)
ax.set_ylabel('Number of Counties', fontsize=12)
ax.set_title('Distribution of Depression Prevalence Changes Across U.S. Counties (2021-2023)', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)

# Add text annotation
improved = len(change_data[change_data['depression_change'] < 0])
worsened = len(change_data[change_data['depression_change'] > 0])
ax.text(0.02, 0.98, f'Improved: {improved} counties\nWorsened: {worsened} counties',
        transform=ax.transAxes, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
        fontsize=11)

plt.tight_layout()
plt.savefig('viz6_depression_distribution.png', dpi=300, bbox_inches='tight')
plt.close()
print("✓ Saved: viz6_depression_distribution.png")

# =============================================================================
# PART 6: KEY FINDINGS SUMMARY
# =============================================================================

print("\n" + "="*70)
print("KEY FINDINGS SUMMARY")
print("="*70)

print(f"\n1. OVERALL TRENDS:")
print(f"   - Average depression 2021: {df_2021['depression'].mean():.2f}%")
print(f"   - Average depression 2023: {df_2023['depression'].mean():.2f}%")
print(f"   - Average change: {change_data['depression_change'].mean():.2f} percentage points")

print(f"\n2. GEOGRAPHIC PATTERNS:")
print(f"   - Counties with increased depression: {worsened} ({worsened/len(change_data)*100:.1f}%)")
print(f"   - Counties with decreased depression: {improved} ({improved/len(change_data)*100:.1f}%)")

print(f"\n3. TOP 5 STATES WITH BIGGEST INCREASES:")
top_increases = state_changes.nlargest(5, 'depression_change')
for idx, row in top_increases.iterrows():
    print(f"   - {row['StateAbbr']}: +{row['depression_change']:.2f} points")

print(f"\n4. TOP 5 STATES WITH BIGGEST DECREASES:")
top_decreases = state_changes.nsmallest(5, 'depression_change')
for idx, row in top_decreases.iterrows():
    print(f"   - {row['StateAbbr']}: {row['depression_change']:.2f} points")

print(f"\n5. SMOKING-DEPRESSION RELATIONSHIP:")
for year in [2021, 2023]:
    year_data = wide_data_clean[wide_data_clean['Year'] == year]
    corr = year_data['smoking'].corr(year_data['depression'])
    print(f"   - {year} correlation: {corr:.3f}")

print("\n" + "="*70)
print("✓ ALL VISUALIZATIONS CREATED SUCCESSFULLY!")
print("="*70)
print("\nGenerated files:")
print("  - viz1_depression_map.html (Interactive)")
print("  - viz2_smoking_depression_scatter.html (Interactive)")
print("  - viz3_state_comparison.png")
print("  - viz4_depression_change.png")
print("  - viz5_correlation_comparison.png")
print("  - viz6_depression_distribution.png")


CREATING VISUALIZATION 1: Interactive Choropleth Map
✓ Saved: viz1_depression_map.html

CREATING VISUALIZATION 2: Interactive Scatter Plot
✓ Saved: viz2_smoking_depression_scatter.html

CREATING VISUALIZATION 3: Small Multiples Comparison
✓ Saved: viz3_state_comparison.png

CREATING VISUALIZATION 4: Depression Change by State
✓ Saved: viz4_depression_change.png

CREATING VISUALIZATION 5: Correlation Comparison
✓ Saved: viz5_correlation_comparison.png

CREATING VISUALIZATION 6: Distribution of Depression Changes
✓ Saved: viz6_depression_distribution.png

KEY FINDINGS SUMMARY

1. OVERALL TRENDS:
   - Average depression 2021: 23.04%
   - Average depression 2023: 24.06%
   - Average change: 1.36 percentage points

2. GEOGRAPHIC PATTERNS:
   - Counties with increased depression: 2270 (79.0%)
   - Counties with decreased depression: 553 (19.2%)

3. TOP 5 STATES WITH BIGGEST INCREASES:
   - SD: +4.49 points
   - DE: +4.07 points
   - WV: +4.04 points
   - HI: +3.94 points
   - CA: +3.70 poin

In [27]:
# Check which specific measures are missing in each year
key_measures = ['DEPRESSION', 'LPA', 'OBESITY', 'ACCESS2', 'CHECKUP', 'CSMOKING']

print("=== DETAILED MEASURE CHECK ===\n")

# 2023 release - Year 2021
print("2023 release - Year 2021:")
year_2021_2023 = places_2023[places_2023['Year'] == 2021]
for measure in key_measures:
    exists = measure in year_2021_2023['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

print("\n2023 release - Year 2020:")
year_2020 = places_2023[places_2023['Year'] == 2020]
for measure in key_measures:
    exists = measure in year_2020['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

print("\n2024 release - Year 2022:")
year_2022_2024 = places_2024[places_2024['Year'] == 2022]
for measure in key_measures:
    exists = measure in year_2022_2024['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

print("\n2024 release - Year 2021:")
year_2021_2024 = places_2024[places_2024['Year'] == 2021]
for measure in key_measures:
    exists = measure in year_2021_2024['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

print("\n2025 release - Year 2023:")
year_2023 = places_2025[places_2025['Year'] == 2023]
for measure in key_measures:
    exists = measure in year_2023['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

print("\n2025 release - Year 2022:")
year_2022_2025 = places_2025[places_2025['Year'] == 2022]
for measure in key_measures:
    exists = measure in year_2022_2025['MeasureId'].values
    print(f"  {measure}: {'✓' if exists else '✗ MISSING'}")

=== DETAILED MEASURE CHECK ===

2023 release - Year 2021:
  DEPRESSION: ✓
  LPA: ✓
  OBESITY: ✓
  ACCESS2: ✓
  CHECKUP: ✓
  CSMOKING: ✓

2023 release - Year 2020:
  DEPRESSION: ✗ MISSING
  LPA: ✗ MISSING
  OBESITY: ✗ MISSING
  ACCESS2: ✗ MISSING
  CHECKUP: ✗ MISSING
  CSMOKING: ✗ MISSING

2024 release - Year 2022:
  DEPRESSION: ✓
  LPA: ✓
  OBESITY: ✓
  ACCESS2: ✓
  CHECKUP: ✓
  CSMOKING: ✓

2024 release - Year 2021:
  DEPRESSION: ✗ MISSING
  LPA: ✗ MISSING
  OBESITY: ✗ MISSING
  ACCESS2: ✗ MISSING
  CHECKUP: ✗ MISSING
  CSMOKING: ✗ MISSING

2025 release - Year 2023:
  DEPRESSION: ✓
  LPA: ✓
  OBESITY: ✓
  ACCESS2: ✓
  CHECKUP: ✓
  CSMOKING: ✓

2025 release - Year 2022:
  DEPRESSION: ✗ MISSING
  LPA: ✗ MISSING
  OBESITY: ✗ MISSING
  ACCESS2: ✗ MISSING
  CHECKUP: ✗ MISSING
  CSMOKING: ✗ MISSING


In [28]:
import os

# Set your desired output folder
output_folder = r"C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\DATA 110\Final_Project\FinalProject_Visualizations"

# Create the folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Change working directory to that folder
os.chdir(output_folder)

print(f"Visualizations will be saved to: {output_folder}")

Visualizations will be saved to: C:\Users\HunterTzou\OneDrive - Axle Informatics\Documents\000 - SCHOOL\DATA 110\Final_Project\FinalProject_Visualizations
