In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Slide Visuals Generator

This notebook produces exportable charts and images for the Mastercard Data Challenge 2025 slide deck. It loads IGS and ACS data, computes key comparisons, and saves figures into `generated_images/` for easy copy/paste into slides.

Outputs include:
- IGS gap and socioeconomic comparisons (income, poverty, unemployment, housing)
- IGS trends (2017–2024) with threshold line at 45
- Strategic pillar gap bars (per metric)
- 5-year baseline vs what-if projections using the Ridge regression model
- High-contrast stat tiles for key headline numbers


In [None]:
# Setup: imports, paths, and helpers
from __future__ import annotations
import os
import json
from pathlib import Path
from typing import Dict, Any, List

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Project root (assumes this notebook is under <repo>/notebooks)
REPO_ROOT = Path(__file__).resolve().parents[1]
DATA_DIR = REPO_ROOT / 'data'
PUBLIC_DIR = REPO_ROOT / 'public' / 'data'
OUT_DIR = REPO_ROOT / 'generated_images'
OUT_DIR.mkdir(exist_ok=True)

# Style
sns.set_theme(style='whitegrid')
plt.rcParams.update({
    'figure.dpi': 140,
    'savefig.dpi': 300,
    'font.size': 11,
    'axes.titlesize': 14,
    'axes.labelsize': 12,
})

PRIMARY = '#1e40af'
ACCENT = '#00d9ff'
SECONDARY = '#8b5cf6'
GRAY = '#64748b'


def savefig(fig: plt.Figure, name: str) -> None:
    """Save figure to PNG and SVG under generated_images/."""
    png_path = OUT_DIR / f"{name}.png"
    svg_path = OUT_DIR / f"{name}.svg"
    fig.tight_layout()
    fig.savefig(png_path, bbox_inches='tight')
    fig.savefig(svg_path, bbox_inches='tight')
    print(f"Saved: {png_path}")
    print(f"Saved: {svg_path}")



In [None]:
# Load data: IGS CSV, ACS CSV, and tract_comparison.json if present
igs_csv = DATA_DIR / 'igs_talladega_tracts.csv'
acs_csv = DATA_DIR / 'acs_talladega_tracts.csv'
json_path = PUBLIC_DIR / 'tract_comparison.json'

igs_df = pd.read_csv(igs_csv, dtype={'Census Tract FIPS code': str})
igs_df['Census Tract FIPS code'] = igs_df['Census Tract FIPS code'].str.zfill(10)

acs_df = None
if acs_csv.exists():
    acs_df = pd.read_csv(acs_csv, dtype={'tract_fips': str})
    acs_df['tract_fips'] = acs_df['tract_fips'].str.zfill(11)

comparison = None
if json_path.exists():
    with open(json_path, 'r') as f:
        comparison = json.load(f)

TRACT_105 = '1121010500'
TRACT_1100 = '1121011100'

# Quick sanity
print(f"IGS rows: {len(igs_df)} | Years: {igs_df['Year'].nunique()} | Tracts: {igs_df['Census Tract FIPS code'].nunique()}")
if acs_df is not None:
    print(f"ACS rows: {len(acs_df)}")
if comparison:
    print("comparison JSON loaded")



In [None]:
# Helper: extract ACS values for selected metrics for both tracts
sel_metrics = {
    'median_household_income': 'Median Household Income ($)',
    'poverty_rate': 'Poverty Rate (%)',
    'unemployment_rate': 'Unemployment Rate (%)',
    'housing_cost_burden_rate': 'Housing Cost Burden (%)',
}

def acs_row_for(ten_digit_fips: str):
    if acs_df is None:
        return None
    mask = acs_df['tract_fips'].str.endswith(ten_digit_fips)
    return acs_df.loc[mask].iloc[0] if mask.any() else None

acs_105 = acs_row_for(TRACT_105)
acs_1100 = acs_row_for(TRACT_1100)

if acs_df is not None and (acs_105 is None or acs_1100 is None):
    print("Warning: Missing ACS records for one or both tracts.")



In [None]:
# Chart 1: IGS gap (latest values) — bar comparison
# Derive latest IGS per tract from igs_df
latest_igs = (
    igs_df.sort_values('Year')
    .groupby('Census Tract FIPS code')['Inclusive Growth Score']
    .last()
)
igs_105 = float(latest_igs.get(TRACT_105, np.nan))
igs_1100 = float(latest_igs.get(TRACT_1100, np.nan))
igs_gap = igs_1100 - igs_105 if np.isfinite(igs_105) and np.isfinite(igs_1100) else np.nan

fig, ax = plt.subplots(figsize=(6, 3.6))
vals = [igs_105, igs_1100]
labels = ['Tract 105', 'Tract 1100']
colors = [PRIMARY, SECONDARY]
sns.barplot(x=labels, y=vals, palette=colors, ax=ax)
ax.axhline(45, color=GRAY, linestyle='--', linewidth=1.5, label='IGS threshold (45)')
ax.set_ylabel('Inclusive Growth Score')
ax.set_title(f'IGS Gap: {igs_gap:.1f} pts' if np.isfinite(igs_gap) else 'IGS Comparison')
ax.legend(loc='upper left')
for i, v in enumerate(vals):
    ax.text(i, v + 1, f"{v:.1f}", ha='center', va='bottom', fontsize=11)

savefig(fig, '01_igs_gap_bar')
plt.close(fig)



In [None]:
# Chart 2: Socioeconomic comparisons (income, poverty, unemployment, housing)
if acs_df is not None and acs_105 is not None and acs_1100 is not None:
    rows = []
    for key, label in sel_metrics.items():
        v105 = acs_105.get(key, np.nan)
        v1100 = acs_1100.get(key, np.nan)
        rows.append({'Metric': label, 'Tract': '105', 'Value': float(v105) if pd.notna(v105) else np.nan})
        rows.append({'Metric': label, 'Tract': '1100', 'Value': float(v1100) if pd.notna(v1100) else np.nan})
    df_plot = pd.DataFrame(rows)

    fig, ax = plt.subplots(figsize=(7.5, 4.8))
    sns.barplot(data=df_plot, x='Metric', y='Value', hue='Tract', palette=[PRIMARY, SECONDARY], ax=ax)
    ax.set_title('Socioeconomic Comparisons: Tract 105 vs 1100')
    ax.set_xlabel('')
    ax.set_ylabel('Value')
    ax.tick_params(axis='x', rotation=12)
    ax.legend(title='Tract', loc='best')
    savefig(fig, '02_socioeconomic_comparisons')
    plt.close(fig)
else:
    print('Skipping socioeconomic comparison (ACS data not available).')



In [None]:
# Chart 3: IGS trends (2017–2024) with threshold
if comparison and 'time_series' in comparison:
    ts = comparison['time_series']
    years = ts['years']
    y105 = ts['tract_105_igs']
    y1100 = ts['tract_1100_igs']

    fig, ax = plt.subplots(figsize=(7.5, 4.5))
    ax.plot(years, y105, color=ACCENT, linewidth=2.8, label='Tract 105')
    ax.plot(years, y1100, color=SECONDARY, linewidth=2.8, label='Tract 1100')
    ax.axhline(45, color=GRAY, linestyle='--', linewidth=1.5, label='Threshold 45')
    ax.set_title('IGS Trends (2017–2024)')
    ax.set_xlabel('Year')
    ax.set_ylabel('IGS')
    ax.set_ylim(0, 100)
    ax.legend(loc='best')
    savefig(fig, '03_igs_trends')
    plt.close(fig)
else:
    # Fallback: compute from CSV
    years = sorted(igs_df['Year'].unique())
    def series_for(fips):
        return igs_df[igs_df['Census Tract FIPS code'] == fips].sort_values('Year')['Inclusive Growth Score'].tolist()
    y105 = series_for(TRACT_105)
    y1100 = series_for(TRACT_1100)
    fig, ax = plt.subplots(figsize=(7.5, 4.5))
    ax.plot(years, y105, color=ACCENT, linewidth=2.8, label='Tract 105')
    ax.plot(years, y1100, color=SECONDARY, linewidth=2.8, label='Tract 1100')
    ax.axhline(45, color=GRAY, linestyle='--', linewidth=1.5, label='Threshold 45')
    ax.set_title('IGS Trends (2017–2024)')
    ax.set_xlabel('Year')
    ax.set_ylabel('IGS')
    ax.set_ylim(0, 100)
    ax.legend(loc='best')
    savefig(fig, '03_igs_trends')
    plt.close(fig)



In [None]:
# Chart 4: Strategic pillar gap bars (per metric)
# Uses comparison['strategic_pillars'] if available
if comparison and 'strategic_pillars' in comparison:
    pillars = comparison['strategic_pillars']
    # Flatten into rows: pillar, metric, gap
    rows = []
    for pkey, pdata in pillars.items():
        metrics = pdata.get('metrics', {})
        for mname, gap in metrics.items():
            if gap is not None:
                rows.append({'Pillar': pkey.replace('_', ' ').title(), 'Metric': mname, 'Gap': float(gap)})
    p_df = pd.DataFrame(rows)

    # If many metrics, we can facet by pillar
    if not p_df.empty:
        g = sns.catplot(
            data=p_df, kind='bar', x='Gap', y='Metric', col='Pillar', col_wrap=2,
            sharex=False, height=3.6, aspect=1.1, palette=[PRIMARY]
        )
        g.set_titles('{col_name}')
        for ax in g.axes.flatten():
            ax.axvline(0, color=GRAY, linewidth=1)
        g.fig.suptitle('Strategic Pillar Gaps (Tract 1100 − Tract 105)', y=1.02)
        savefig(g.fig, '04_pillar_gaps')
        plt.close(g.fig)
    else:
        print('No pillar gaps available to plot.')
else:
    print('Skipping pillar gaps (comparison JSON not available).')



In [None]:
# Chart 5: 5-year baseline vs what-if projections for Tract 105
# Uses the existing Ridge model and intervention deltas
from importlib import import_module

try:
    sys.path.insert(0, str((REPO_ROOT / 'src').resolve()))
    pred_mod = import_module('analysis.predict_future_igs')
    IGSPredictor = getattr(pred_mod, 'IGSPredictor')

    predictor = IGSPredictor()
    try:
        predictor.load_model()
    except FileNotFoundError:
        predictor.train_model()

    years_ahead = 5
    baseline = predictor.predict(TRACT_105, interventions=None, years_ahead=years_ahead)
    scenario = predictor.predict(TRACT_105, interventions=['digital','housing','entrepreneurship','workforce'], years_ahead=years_ahead)
    years = list(range(1, years_ahead+1))

    fig, ax = plt.subplots(figsize=(7.2, 4.3))
    ax.plot(years, baseline, color=GRAY, linestyle='--', linewidth=2.2, label='Baseline')
    ax.plot(years, scenario, color=PRIMARY, linewidth=3.0, label='What‑If (All Pillars)')
    ax.set_xticks(years)
    ax.set_xlabel('Years Ahead')
    ax.set_ylabel('Projected IGS')
    ax.set_title('5‑Year Projection: Tract 105')
    ax.set_ylim(0, 100)
    # Delta at year 5
    if baseline[-1] is not None and scenario[-1] is not None:
        delta5 = scenario[-1] - baseline[-1]
        ax.text(years[-1]+0.05, scenario[-1], f"+{delta5:.2f}", color=PRIMARY, va='center', fontweight='bold')
    ax.legend(loc='best')
    savefig(fig, '05_projection_baseline_vs_whatif')
    plt.close(fig)
except Exception as e:
    print(f'Skipping projections (model import/predict failed): {e}')



In [None]:
# Chart 6: Stat tiles for headline numbers (IGS gap, income gap, poverty multiple, unemployment multiple, housing burden)

def draw_stat_tile(title: str, value_str: str, filename: str, color: str = PRIMARY):
    fig, ax = plt.subplots(figsize=(3.6, 2.2))
    ax.axis('off')
    ax.text(0.02, 0.85, title, fontsize=11, color=GRAY, transform=ax.transAxes)
    ax.text(0.02, 0.35, value_str, fontsize=28, color=color, fontweight='bold', transform=ax.transAxes)
    savefig(fig, filename)
    plt.close(fig)

# IGS gap
if np.isfinite(igs_gap):
    draw_stat_tile('IGS Gap', f"{igs_gap:.1f} pts", 'tile_igs_gap')

# From ACS if available
if acs_105 is not None and acs_1100 is not None:
    # Income gap
    inc_gap = (acs_1100['median_household_income'] - acs_105['median_household_income']) if pd.notna(acs_105['median_household_income']) and pd.notna(acs_1100['median_household_income']) else np.nan
    if pd.notna(inc_gap):
        draw_stat_tile('Income Gap', f"${inc_gap:,.0f}", 'tile_income_gap', color=SECONDARY)
    # Poverty multiple (105 vs 1100)
    if pd.notna(acs_105['poverty_rate']) and pd.notna(acs_1100['poverty_rate']) and acs_1100['poverty_rate'] > 0:
        pov_mult = acs_105['poverty_rate'] / acs_1100['poverty_rate']
        draw_stat_tile('Poverty (x of Tract 1100)', f"{pov_mult:.1f}×", 'tile_poverty_multiple', color=PRIMARY)
    # Unemployment multiple
    if pd.notna(acs_105['unemployment_rate']) and pd.notna(acs_1100['unemployment_rate']) and acs_1100['unemployment_rate'] > 0:
        unemp_mult = acs_105['unemployment_rate'] / acs_1100['unemployment_rate']
        draw_stat_tile('Unemployment (x of Tract 1100)', f"{unemp_mult:.1f}×", 'tile_unemployment_multiple', color=PRIMARY)
    # Housing burden (show 105 value)
    if pd.notna(acs_105['housing_cost_burden_rate']):
        draw_stat_tile('Housing Burden (Tract 105)', f"{acs_105['housing_cost_burden_rate']:.1f}%", 'tile_housing_burden', color=SECONDARY)



In [None]:
# Export manifest: list generated images for quick reference
from pprint import pprint

manifest = sorted([str(p.name) for p in OUT_DIR.glob('*.*') if p.suffix.lower() in {'.png', '.svg'}])
pprint(manifest)
print(f"Total images: {len(manifest)}")
