## 🔗 Open This Notebook in Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DavidLangworthy/ds4s/blob/master/Day%201_%20Introduction%20%E2%80%93%20Climate%20Change%20%26%20Basic%20Plotting.ipynb)

# 🌎 Day 1 – Visualizing Global Warming
### Introduction to Jupyter, Python, and Climate Data

Welcome! Today you will:
1. Load NASA’s global temperature anomaly data.
2. Practice quick diagnostics so mistakes are caught early.
3. Build a climate line chart with a clear claim-evidence-takeaway story.

### 🗂️ Data Card
| Field | Details |
| --- | --- |
| **Dataset** | NASA GISTEMP Global Surface Temperature Anomalies |
| **Source & link** | NASA Goddard Institute for Space Studies — [GISTEMP v4](https://data.giss.nasa.gov/gistemp/) |
| **Temporal / spatial coverage** | Global, annual mean anomaly, 1880–present |
| **Key units** | Temperature anomaly in °C relative to 1951–1980 baseline |
| **Method & caveats** | Annual means compiled from meteorological stations and sea-surface temperatures. 2024 values are preliminary and may be revised. |

### ⏱️ Learning Path for Today

            Each loop takes about 10–15 minutes:
            - [ ] Set up folders and shared utilities (you run this once).
- [ ] Load and validate the temperature dataset.
- [ ] Run quick diagnostics (shape, columns, nulls, ranges).
- [ ] Smooth the signal with a 5-year rolling average.
- [ ] Build the annotated climate line chart and log accessibility checks.

            > 👩‍🏫 **Teacher tip:** Use these checkpoints for quick formative assessment. Have students raise a colored card after each check cell to signal confidence or questions.

> ### 👩‍🏫 Teacher Sidebar
> **Suggested timing:** ~45 minutes including discussion pauses.
>
> **Likely misconceptions:** Confusing absolute temperature with anomaly; forgetting that missing values exist before 1880.
>
> **Fast finisher extension:** Compare land-only vs. ocean-only anomalies using additional columns in the CSV.

In [None]:
from __future__ import annotations

from pathlib import Path
from typing import Mapping, Sequence

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

try:
    import plotly.express as px  # noqa: F401 - imported for student use
except ModuleNotFoundError:  # pragma: no cover - Plotly installed in Colab
    px = None

pd.options.display.float_format = "{:.2f}".format
sns.set_theme(style="whitegrid", context="talk")
plt.rcParams.update(
    {
        "axes.titlesize": 18,
        "axes.titleweight": "bold",
        "axes.labelsize": 13,
        "axes.grid": True,
        "grid.alpha": 0.25,
        "figure.dpi": 120,
        "axes.spines.top": False,
        "axes.spines.right": False,
    }
)

STORY_KEYS = (
    "title",
    "subtitle",
    "claim",
    "evidence",
    "visual",
    "takeaway",
    "source",
    "units",
    "annotation",
    "alt_text",
)


def load_csv(path: Path, *, description: str = "", **read_kwargs) -> pd.DataFrame:
    df = pd.read_csv(path, **read_kwargs)
    label = description or path.name
    print(
        f"✅ Loaded {label} with shape {df.shape[0]} rows × {df.shape[1]} columns."
    )
    return df


def validate_columns(
    df: pd.DataFrame, required: Sequence[str], *, df_name: str = "DataFrame"
) -> None:
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"{df_name} is missing columns: {missing}")
    print(f"✅ {df_name} includes required columns: {', '.join(required)}")


def expect_rows_between(
    df: pd.DataFrame,
    lower: int,
    upper: int,
    *,
    df_name: str = "DataFrame",
) -> None:
    rows = len(df)
    if not (lower <= rows <= upper):
        raise ValueError(
            f"{df_name} has {rows} rows; expected between {lower} and {upper}."
        )
    print(f"✅ {df_name} row count {rows} within [{lower}, {upper}].")


def quick_null_check(df: pd.DataFrame, *, df_name: str = "DataFrame") -> pd.Series:
    nulls = df.isna().sum()
    print(f"{df_name} missing values per column:\n{nulls}")
    return nulls


def quick_preview(
    df: pd.DataFrame, *, n: int = 5, df_name: str = "DataFrame"
) -> pd.DataFrame:
    print(f"🔍 Previewing {df_name} (first {n} rows):")
    return df.head(n)


def numeric_sanity_check(
    series: pd.Series,
    *,
    minimum: float | None = None,
    maximum: float | None = None,
    name: str = "Series",
) -> None:
    if minimum is not None and series.min() < minimum:
        raise ValueError(
            f"{name} has values below the expected minimum of {minimum}."
        )
    if maximum is not None and series.max() > maximum:
        raise ValueError(
            f"{name} has values above the expected maximum of {maximum}."
        )
    print(
        f"✅ {name} within expected range"
        f"{f' ≥ {minimum}' if minimum is not None else ''}"
        f"{f' and ≤ {maximum}' if maximum is not None else ''}."
    )


def story_fields_are_complete(story: Mapping[str, str]) -> None:
    missing = [key for key in STORY_KEYS if not str(story.get(key, "")).strip()]
    if missing:
        raise ValueError(
            "Please complete the storytelling scaffold before plotting: "
            + ", ".join(missing)
        )
    print(
        "✅ Story scaffold complete (title, subtitle, claim, evidence, visual,"
        " takeaway, source, units, annotation, alt text)."
    )


def print_story_scaffold(story: Mapping[str, str]) -> None:
    story_fields_are_complete(story)
    print("\n📖 Story Scaffold")
    print(f"Claim: {story['claim']}")
    print(f"Evidence: {story['evidence']}")
    print(f"Visual focus: {story['visual']}")
    print(f"Takeaway: {story['takeaway']}")
    print(f"Source: {story['source']} ({story['units']})")


def apply_matplotlib_story(ax: plt.Axes, story: Mapping[str, str]) -> None:
    story_fields_are_complete(story)
    ax.set_title(f"{story['title']}\n{story['subtitle']}", loc="left", pad=18)
    ax.figure.text(
        0.01,
        -0.08,
        (
            f"Claim: {story['claim']} | Evidence: {story['evidence']}"
            f" | Takeaway: {story['takeaway']}"
            f"\nSource: {story['source']} • Units: {story['units']}"
        ),
        ha="left",
        fontsize=10,
    )


def annotate_callout(
    ax: plt.Axes,
    *,
    xy: tuple[float, float],
    xytext: tuple[float, float],
    text: str,
) -> None:
    ax.annotate(
        text,
        xy=xy,
        xytext=xytext,
        arrowprops=dict(arrowstyle="->", color="black", lw=1),
        bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", alpha=0.8),
    )


def record_alt_text(text: str) -> None:
    print(f"📝 Alt text ready: {text}")


def accessibility_checklist(
    *, palette: str, has_alt_text: bool, contrast_passed: bool = True
) -> None:
    print("♿ Accessibility checklist:")
    print(f" • Palette: {palette}")
    print(
        f" • Alt text provided: {'yes' if has_alt_text else 'add alt text before sharing'}"
    )
    print(f" • Contrast OK: {'yes' if contrast_passed else 'adjust colors'}")


def save_figure(fig: plt.Figure, filename: str) -> Path:
    plots_dir = Path.cwd() / "plots"
    plots_dir.mkdir(parents=True, exist_ok=True)
    output_path = plots_dir / filename
    fig.savefig(output_path, dpi=300, bbox_inches="tight")
    print(f"💾 Saved figure to {output_path}")
    return output_path


def save_plotly_figure(fig, filename: str) -> Path:
    plots_dir = Path.cwd() / "plots"
    plots_dir.mkdir(parents=True, exist_ok=True)
    html_path = plots_dir / filename.replace(".png", ".html")
    fig.write_html(html_path)
    print(f"💾 Saved interactive figure to {html_path}")
    try:
        static_path = plots_dir / filename
        fig.write_image(str(static_path))
        print(f"💾 Saved static image to {static_path}")
    except Exception as exc:  # pragma: no cover - depends on kaleido
        print(f"⚠️ Static export skipped: {exc}")
    return html_path

In [None]:
from pathlib import Path

DATA_DIR = Path.cwd() / "data"
PLOTS_DIR = Path.cwd() / "plots"
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Data directory: {DATA_DIR}")
print(f"Plots directory: {PLOTS_DIR}")

## Loop 1 · Load & Label the Data
Learn how to read the CSV and confirm the schema.

In [None]:
temperature_path = DATA_DIR / "GLB.Ts+dSST.csv"

df_raw = load_csv(
    temperature_path,
    description="NASA GISTEMP anomalies",
    skiprows=1,
    usecols=[0, 13],
    names=["Year", "TempAnomaly"],
    header=0,
)

validate_columns(df_raw, ["Year", "TempAnomaly"], df_name="temperature anomalies")
expect_rows_between(df_raw, 140, 200, df_name="temperature anomalies")
df_raw["TempAnomaly"] = pd.to_numeric(df_raw["TempAnomaly"], errors="coerce")

In [None]:
quick_preview(df_raw, n=5, df_name="temperature anomalies")
quick_null_check(df_raw, df_name="temperature anomalies")

## Loop 2 · Clean & Focus the Time Series
Tighten the data to the years we can trust and prepare helper columns.

In [None]:
df_temp = (
    df_raw.dropna(subset=["TempAnomaly"]).query("Year >= 1880").reset_index(drop=True)
)
df_temp["Rolling5"] = df_temp["TempAnomaly"].rolling(window=5, center=True).mean()
expect_rows_between(df_temp, 140, 170, df_name="clean anomalies")
numeric_sanity_check(df_temp["TempAnomaly"], minimum=-1.0, maximum=1.5, name="TempAnomaly (°C)")
numeric_sanity_check(df_temp["Rolling5"].dropna(), minimum=-1.0, maximum=1.5, name="Rolling5 (°C)")

In [None]:
quick_preview(df_temp, n=5, df_name="clean anomalies")
quick_preview(df_temp.tail(), n=5, df_name="recent anomalies")

## Loop 3 · Plot with Story-first Scaffolding
Fill out the storytelling checklist **before** drawing the figure.

In [None]:
latest_year = int(df_temp["Year"].iloc[-1])
latest_value = df_temp["Rolling5"].dropna().iloc[-1]

story = {
    "title": "Earth is Warming Faster Than the 20th Century Baseline",
    "subtitle": f"Global surface temperature anomalies, 1880–{latest_year}",
    "claim": "Recent decades are the warmest in the 140+ year record.",
    "evidence": "5-year rolling average now sits over 1°C above the 1951–1980 baseline.",
    "visual": "Line chart comparing annual anomalies with a smoothed trend.",
    "takeaway": "Warming is persistent and accelerating, underscoring urgency for mitigation.",
    "source": "NASA GISTEMP v4",
    "units": "Temperature anomaly (°C relative to 1951–1980)",
    "annotation": f"{latest_year}: {latest_value:.2f}°C above baseline",
    "alt_text": (
        "Line chart showing annual global temperature anomalies from 1880 to "
        f"{latest_year} with a 5-year rolling average climbing from near 0°C to over "
        f"1°C above the mid-20th century baseline."
    ),
}

print_story_scaffold(story)

In [None]:
fig, ax = plt.subplots(figsize=(11, 6))
ax.plot(
    df_temp["Year"],
    df_temp["TempAnomaly"],
    label="Annual anomaly",
    color="#d1495b",
    alpha=0.4,
)
ax.plot(
    df_temp["Year"],
    df_temp["Rolling5"],
    label="5-year rolling average",
    color="#00798c",
    linewidth=3,
)
ax.axhline(0, color="black", linestyle="--", linewidth=1, alpha=0.7)
ax.set_xlabel("Year")
ax.set_ylabel("Temperature anomaly (°C)")
ax.legend(loc="upper left")

apply_matplotlib_story(ax, story)
annotate_callout(
    ax,
    xy=(latest_year, latest_value),
    xytext=(1975, 1.2),
    text=story["annotation"],
)

record_alt_text(story["alt_text"])
accessibility_checklist(palette="Colorbrewer warm/cool contrast", has_alt_text=True)

In [None]:
save_figure(fig, "day01_solution_plot.png")