# 🌎 Day 1 · Global Temperature Signals
Welcome back! Each notebook follows the same heartbeat: learn a bite-sized idea, try it immediately, and check your work before moving on. Today we tackle NASA's global temperature anomaly record and practise deliberate, well-instrumented plotting.

**Cadence for every loop**
1. Skim the concept notes (2–3 min)
2. Run the guided code cell (5–7 min)
3. Trigger the diagnostic/check cell (2–3 min)

Stay curious—small wins stack up quickly.

## 🔄 How to Use This Solution
- Run cells from top to bottom; each loop is self-contained.
- Use the provided utilities for loading, validating, and storytelling—they repeat throughout the course.
- The solution shows *one* clean path. When teaching, pause after each check cell to surface questions or extensions.

> ### 🗂️ Data Card — NASA GISTEMP v4
> - **Source:** NASA Goddard Institute for Space Studies, Global Surface Temperature Analysis (GISTEMP v4).
> - **Temporal coverage:** 1880–2024 fully observed; 2025 contains partial monthly updates (marked `***`).
> - **Metric:** Annual mean temperature anomaly (°C) relative to the 1951–1980 baseline.
> - **Refresh cadence:** Updated monthly; downloaded February 2025.
> - **Caveats:** Global mean masks regional variation; anomalies compare to a historical baseline rather than absolute temperatures.
> - **Ethics & framing:** Use clear units, avoid implying certainty beyond the data, and note partial years when reporting.

In [None]:

from __future__ import annotations
from pathlib import Path
from typing import Mapping, Sequence

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

DATA_DIR = Path.cwd() / "data"
PLOTS_DIR = Path.cwd() / "plots"
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

sns.set_theme(style="whitegrid", context="notebook", palette="colorblind")
plt.rcParams.update({
    "figure.dpi": 120,
    "axes.titlesize": 18,
    "axes.labelsize": 13,
    "axes.titleweight": "semibold",
    "axes.grid": True,
})


def baseline_style() -> None:
    """Reset plot style to the shared course defaults."""
    sns.set_theme(style="whitegrid", context="notebook", palette="colorblind")
    plt.rcParams.update({
        "axes.grid": True,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "figure.dpi": 120,
        "font.size": 12,
    })


def load_data(path: Path, *, read_kwargs: Mapping[str, object] | None = None) -> pd.DataFrame:
    read_kwargs = dict(read_kwargs or {})
    df = pd.read_csv(path, **read_kwargs)
    print(f"✅ Loaded {path.name} with shape {df.shape}")
    return df


def validate_columns(df: pd.DataFrame, required: Sequence[str]) -> None:
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    print("✅ Column check passed:", ", ".join(required))


def expect_rows_between(df: pd.DataFrame, lower: int, upper: int) -> None:
    rows = len(df)
    if not (lower <= rows <= upper):
        raise ValueError(f"Expected between {lower} and {upper} rows, got {rows}")
    print(f"✅ Row count within expected range ({rows} rows)")


def quick_diagnose(df: pd.DataFrame, *, label: str = "Data preview", n: int = 5) -> None:
    print(f"
🔍 {label}")
    display(df.head(n))
    numeric = df.select_dtypes(include="number")
    if not numeric.empty:
        display(numeric.describe().T)
    nulls = df.isna().sum()
    if nulls.any():
        print("⚠️ Null values detected:
", nulls[nulls > 0])
    else:
        print("✅ No null values detected in this slice.")


def accessibility_check(ax: plt.Axes) -> None:
    title_ok = bool(ax.get_title())
    label_ok = bool(ax.get_xlabel()) and bool(ax.get_ylabel())
    if not (title_ok and label_ok):
        raise ValueError("Add a descriptive title and axis labels before proceeding.")
    xlabels = [tick.get_text() for tick in ax.get_xticklabels()]
    if len(xlabels) > 12:
        ax.tick_params(axis='x', labelrotation=35)
    print("✅ Accessibility check: title, labels, and readable ticks confirmed.")


def annotate_latest_point(ax: plt.Axes, x: float, y: float, text: str) -> None:
    ax.scatter([x], [y], color=ax.lines[0].get_color(), s=60, zorder=5)
    ax.annotate(
        text,
        xy=(x, y),
        xytext=(0.96, 0.85),
        textcoords="axes fraction",
        ha="right",
        arrowprops={"arrowstyle": "->", "color": "#333"},
        fontsize=11,
    )


def require_story_elements(story: Mapping[str, str]) -> None:
    required = ["claim", "evidence", "visual", "takeaway", "source", "units"]
    missing = [key for key in required if not story.get(key, "").strip()]
    if missing:
        raise ValueError(f"Fill in the storytelling scaffold: missing {missing}")
    print("✅ Story scaffold complete.")


def save_last_fig(filename: str, fig: plt.Figure | None = None) -> None:
    fig = fig or plt.gcf()
    if not fig.axes:
        raise ValueError("No Matplotlib figure found to save.")
    output_path = PLOTS_DIR / filename
    fig.savefig(output_path, bbox_inches="tight")
    print(f"💾 Figure saved to {output_path.relative_to(Path.cwd())}")


## Loop 1 · Load & Inspect
Goal: bring the CSV into pandas, keep only the columns we need, and confirm the structure before touching visuals.

✅ **You should see:** ~145 rows, columns `Year` and `global_temp_anomaly_C`, no nulls after cleaning.

In [None]:

baseline_style()

temperature_path = DATA_DIR / "GLB.Ts+dSST.csv"
raw_temperature = load_data(temperature_path, read_kwargs={"skiprows": 1})

temperature = (
    raw_temperature[["Year", "J-D"]]
    .rename(columns={"J-D": "global_temp_anomaly_C"})
    .assign(global_temp_anomaly_C=lambda df: pd.to_numeric(df["global_temp_anomaly_C"], errors="coerce"))
    .dropna(subset=["global_temp_anomaly_C"])
    .assign(Year=lambda df: df["Year"].astype(int))
)

validate_columns(temperature, ["Year", "global_temp_anomaly_C"])
expect_rows_between(temperature, 140, 200)
quick_diagnose(temperature, label="NASA anomalies (first rows)")


## Loop 2 · Reasonableness Checks
Goal: sanity-check ranges and create a helper frame for recent context. This builds the habit of interpreting numbers before plotting them.

✅ **You should see:** Values mostly between −1.0 and +1.5 °C and a recent five-year mean above +1.1 °C.

In [None]:

assert temperature["global_temp_anomaly_C"].between(-1.5, 2.5).all(), "Unexpected anomaly outside expected bounds."
assert temperature["Year"].is_monotonic_increasing, "Years should be increasing after cleaning."

latest_year = int(temperature["Year"].max())
recent_window = temperature[temperature["Year"] >= latest_year - 4]
recent_mean = recent_window["global_temp_anomaly_C"].mean()
print(f"Most recent full year: {latest_year}")
print(f"Five-year mean anomaly: {recent_mean:.2f} °C")
quick_diagnose(recent_window, label="Recent five years")


## Loop 3 · Craft the Visual
Goal: build a single, legible line chart with annotation and visual emphasis around the crossing above 0 °C.

Use the storytelling scaffold before rendering to force clear intent.

In [None]:

TITLE = "Global Temperature Anomalies Keep Climbing"
SUBTITLE = f"Annual deviation from the 1951–1980 average, {temperature['Year'].min()}–{latest_year}"
ANNOTATION = f"{latest_year}: {recent_window.iloc[-1]['global_temp_anomaly_C']:.2f} °C above baseline"
SOURCE = "Source: NASA GISTEMP v4 (downloaded Feb 2025)"
UNITS = "Temperature anomaly in °C"

story = {
    "claim": "Global surface temperatures are now more than 1 °C warmer than the mid-20th century baseline.",
    "evidence": f"The five-year average anomaly ending {latest_year} is {recent_mean:.2f} °C above the 1951–1980 baseline.",
    "visual": "Line chart of NASA global temperature anomalies with shading above/below the zero line.",
    "takeaway": "The persistent upward trend underscores how quickly the climate system has warmed.",
    "source": SOURCE,
    "units": UNITS,
}

require_story_elements(story)

fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(temperature["Year"], temperature["global_temp_anomaly_C"], linewidth=2.5)
ax.fill_between(
    temperature["Year"],
    0,
    temperature["global_temp_anomaly_C"],
    where=temperature["global_temp_anomaly_C"] >= 0,
    alpha=0.15,
)
ax.axhline(0, color="#444", linewidth=1, linestyle="--")
ax.set_title(f"{TITLE}
{SUBTITLE}")
ax.set_xlabel("Year")
ax.set_ylabel(UNITS)
ax.text(
    0.01,
    0.04,
    SOURCE,
    transform=ax.transAxes,
    fontsize=9,
    color="#555",
)
annotate_latest_point(
    ax,
    x=temperature["Year"].iloc[-1],
    y=temperature["global_temp_anomaly_C"].iloc[-1],
    text=ANNOTATION,
)
accessibility_check(ax)

last_fig = fig
plt.show()

display(pd.DataFrame([story]).T.rename(columns={0: "Story Scaffold"}))


## Loop 4 · Reflect & Extend
Prompt students to discuss:
- Where does the shading change sign, and why does that matter?
- How might uncertainty or regional variation change the narrative?
- Optional extension: segment by decade and compute decadal slopes.

In [None]:
save_last_fig("day01_solution_plot.png", fig=last_fig)