# 🧭 Day 5 · Capstone Storyboard
Students weave together the week’s skills to produce a polished CO₂ trend narrative with checkpoints for integrity and storytelling.

## 🔄 How to Use This Solution
- Treat each loop as a milestone: prep data, construct baseline visual, then add narrative layers.
- Encourage students to draft captions using the Claim → Evidence → Visual → Takeaway scaffold before final polish.

> ### 🗂️ Data Card — Global CO₂ Emissions (Global Carbon Project)
> - **Source:** Our World in Data compilation of Global Carbon Project historical emissions.
> - **Temporal coverage:** 1750–2022.
> - **Metric:** Annual fossil CO₂ emissions (million tonnes of CO₂).
> - **Refresh cadence:** Annual update; retrieved January 2024.
> - **Caveats:** Excludes land-use change emissions in this series; values are global aggregates.
> - **Ethics & framing:** Emphasise cumulative responsibility and the difference between annual flow vs. cumulative stock.

In [None]:

from __future__ import annotations
from pathlib import Path
from typing import Mapping, Sequence

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

DATA_DIR = Path.cwd() / "data"
PLOTS_DIR = Path.cwd() / "plots"
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

sns.set_theme(style="whitegrid", context="notebook", palette="colorblind")
plt.rcParams.update({
    "figure.dpi": 120,
    "axes.titlesize": 18,
    "axes.labelsize": 13,
    "axes.titleweight": "semibold",
    "axes.grid": True,
})


def baseline_style() -> None:
    """Reset plot style to the shared course defaults."""
    sns.set_theme(style="whitegrid", context="notebook", palette="colorblind")
    plt.rcParams.update({
        "axes.grid": True,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "figure.dpi": 120,
        "font.size": 12,
    })


def load_data(path: Path, *, read_kwargs: Mapping[str, object] | None = None) -> pd.DataFrame:
    read_kwargs = dict(read_kwargs or {})
    df = pd.read_csv(path, **read_kwargs)
    print(f"✅ Loaded {path.name} with shape {df.shape}")
    return df


def validate_columns(df: pd.DataFrame, required: Sequence[str]) -> None:
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    print("✅ Column check passed:", ", ".join(required))


def expect_rows_between(df: pd.DataFrame, lower: int, upper: int) -> None:
    rows = len(df)
    if not (lower <= rows <= upper):
        raise ValueError(f"Expected between {lower} and {upper} rows, got {rows}")
    print(f"✅ Row count within expected range ({rows} rows)")


def quick_diagnose(df: pd.DataFrame, *, label: str = "Data preview", n: int = 5) -> None:
    print(f"
🔍 {label}")
    display(df.head(n))
    numeric = df.select_dtypes(include="number")
    if not numeric.empty:
        display(numeric.describe().T)
    nulls = df.isna().sum()
    if nulls.any():
        print("⚠️ Null values detected:
", nulls[nulls > 0])
    else:
        print("✅ No null values detected in this slice.")


def accessibility_check(ax: plt.Axes) -> None:
    title_ok = bool(ax.get_title())
    label_ok = bool(ax.get_xlabel()) and bool(ax.get_ylabel())
    if not (title_ok and label_ok):
        raise ValueError("Add a descriptive title and axis labels before proceeding.")
    xlabels = [tick.get_text() for tick in ax.get_xticklabels()]
    if len(xlabels) > 12:
        ax.tick_params(axis='x', labelrotation=35)
    print("✅ Accessibility check: title, labels, and readable ticks confirmed.")


def annotate_latest_point(ax: plt.Axes, x: float, y: float, text: str) -> None:
    ax.scatter([x], [y], color=ax.lines[0].get_color(), s=60, zorder=5)
    ax.annotate(
        text,
        xy=(x, y),
        xytext=(0.96, 0.85),
        textcoords="axes fraction",
        ha="right",
        arrowprops={"arrowstyle": "->", "color": "#333"},
        fontsize=11,
    )


def require_story_elements(story: Mapping[str, str]) -> None:
    required = ["claim", "evidence", "visual", "takeaway", "source", "units"]
    missing = [key for key in required if not story.get(key, "").strip()]
    if missing:
        raise ValueError(f"Fill in the storytelling scaffold: missing {missing}")
    print("✅ Story scaffold complete.")


def save_last_fig(filename: str, fig: plt.Figure | None = None) -> None:
    fig = fig or plt.gcf()
    if not fig.axes:
        raise ValueError("No Matplotlib figure found to save.")
    output_path = PLOTS_DIR / filename
    fig.savefig(output_path, bbox_inches="tight")
    print(f"💾 Figure saved to {output_path.relative_to(Path.cwd())}")


## Loop 1 · Load & Inspect
Goal: load the CO₂ series, clean column names, and confirm coverage before deriving any metrics.

✅ **You should see:** 273 rows (years 1750–2022) with no nulls.

In [None]:

baseline_style()

co2 = load_data(DATA_DIR / "global_co2.csv")
validate_columns(co2, ["Year", "CO2"])
expect_rows_between(co2, 250, 300)
quick_diagnose(co2.head(), label="CO₂ head")
quick_diagnose(co2.tail(), label="CO₂ tail")

co2 = co2.assign(Year=lambda df: df["Year"].astype(int), co2_mt=lambda df: df["CO2"].astype(float))


## Loop 2 · Derive Helper Metrics
Goal: compute rolling means and cumulative totals to give students narrative options.

✅ **You should see:** Columns for five-year rolling mean and cumulative emissions.

In [None]:

co2 = co2.assign(
    co2_5yr_avg=lambda df: df["co2_mt"].rolling(window=5, min_periods=1).mean(),
    co2_cumulative=lambda df: df["co2_mt"].cumsum(),
)
quick_diagnose(co2.tail(), label="Derived metrics tail")


## Loop 3 · Final Story Figure
Goal: produce a dual-panel figure highlighting annual emissions and cumulative load with full storytelling scaffold.

In [None]:

TITLE = "Global CO₂ Emissions Hit New Highs"
SUBTITLE = "Annual fossil CO₂ emissions and cumulative total, 1750–2022"
SOURCE = "Source: Global Carbon Project via Our World in Data (downloaded Jan 2024)"
UNITS = "Million tonnes of CO₂"

story = {
    "claim": "Annual CO₂ emissions have grown exponentially, pushing the cumulative total past 1.7 trillion tonnes.",
    "evidence": f"Emissions reached {co2.loc[co2['Year']==2022, 'co2_mt'].item():,.0f} Mt in 2022—over 70× higher than in 1850.",
    "visual": "Dual-panel line chart showing annual emissions and cumulative total.",
    "takeaway": "Stabilising climate requires bending the annual curve down; cumulative emissions already lock in significant warming.",
    "source": SOURCE,
    "units": UNITS,
}
require_story_elements(story)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 9), sharex=True)
ax1.plot(co2["Year"], co2["co2_mt"], linewidth=2.2)
ax1.set_ylabel("Annual emissions (Mt CO₂)")
annotate_latest_point(ax1, 2022, co2.loc[co2["Year"] == 2022, "co2_mt"].item(), "2022 high")
accessibility_check(ax1)

ax2.plot(co2["Year"], co2["co2_cumulative"], linewidth=2.2, color=sns.color_palette()[1])
ax2.set_ylabel("Cumulative emissions (Mt CO₂)")
ax2.set_xlabel("Year")
accessibility_check(ax2)

fig.suptitle(f"{TITLE}
{SUBTITLE}")
fig.text(0.01, 0.01, SOURCE, fontsize=9, color="#555")
fig.tight_layout(rect=[0, 0.03, 1, 0.97])

last_fig = fig
plt.show()

display(pd.DataFrame([story]).T.rename(columns={0: "Story Scaffold"}))


## Loop 4 · Capstone Reflection
Suggested prompts:
- What policy levers target the annual curve vs. the cumulative curve?
- How might you integrate a third panel (e.g., per capita emissions) as an extension?
- Draft an accompanying narrative paragraph using Claim → Evidence → Visual → Takeaway.

In [None]:
save_last_fig("day05_solution_plot.png", fig=last_fig)