## 🔗 Open This Notebook in Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DavidLangworthy/ds4s/blob/master/Day%202%20%E2%80%93%20Fossil%20Fuels%20vs.%20Renewables.ipynb)

# ⚡ Day 2 – Exploring Energy Trends
### Fossil Fuels vs. Renewables in a Warming World

Today you will investigate how quickly the world is scaling up renewable energy. The micro-loops walk you through loading multiple datasets, merging them safely, and communicating uncertainty.

### 🗂️ Data Card
| Field | Details |
| --- | --- |
| **Dataset** | Our World in Data – Renewable Energy Share Datasets |
| **Source & link** | OWID Energy Data Explorer — [Renewables share](https://ourworldindata.org/grapher/renewable-share-energy) |
| **Temporal / spatial coverage** | Global (World aggregate), annual values 1965–2023 |
| **Key units** | Share of primary energy from renewables (% of total) |
| **Method & caveats** | Individual CSVs per technology. Shares are calculated from primary energy equivalents; small discrepancies occur across series due to rounding. |

### ⏱️ Learning Path for Today

            Each loop takes about 10–15 minutes:
            - [ ] Inventory and load each renewable energy CSV.
- [ ] Filter to the world aggregate and align the schemas.
- [ ] Merge sources into a tidy long-form table with diagnostics.
- [ ] Plot stacked area + line overlays with storytelling scaffold.

            > 👩‍🏫 **Teacher tip:** Use these checkpoints for quick formative assessment. Have students raise a colored card after each check cell to signal confidence or questions.

> ### 👩‍🏫 Teacher Sidebar
> **Suggested timing:** ~50 minutes including discussion.
>
> **Likely misconceptions:** Mixing percentage units with absolute energy; forgetting to align on the same year column.
>
> **Fast finisher extension:** Have students add biofuels or geothermal series and compare regional splits.

In [None]:
from __future__ import annotations

from pathlib import Path
from typing import Mapping, Sequence

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

try:
    import plotly.express as px  # noqa: F401 - imported for student use
except ModuleNotFoundError:  # pragma: no cover - Plotly installed in Colab
    px = None

pd.options.display.float_format = "{:.2f}".format
sns.set_theme(style="whitegrid", context="talk")
plt.rcParams.update(
    {
        "axes.titlesize": 18,
        "axes.titleweight": "bold",
        "axes.labelsize": 13,
        "axes.grid": True,
        "grid.alpha": 0.25,
        "figure.dpi": 120,
        "axes.spines.top": False,
        "axes.spines.right": False,
    }
)

STORY_KEYS = (
    "title",
    "subtitle",
    "claim",
    "evidence",
    "visual",
    "takeaway",
    "source",
    "units",
    "annotation",
    "alt_text",
)


def load_csv(path: Path, *, description: str = "", **read_kwargs) -> pd.DataFrame:
    df = pd.read_csv(path, **read_kwargs)
    label = description or path.name
    print(
        f"✅ Loaded {label} with shape {df.shape[0]} rows × {df.shape[1]} columns."
    )
    return df


def validate_columns(
    df: pd.DataFrame, required: Sequence[str], *, df_name: str = "DataFrame"
) -> None:
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"{df_name} is missing columns: {missing}")
    print(f"✅ {df_name} includes required columns: {', '.join(required)}")


def expect_rows_between(
    df: pd.DataFrame,
    lower: int,
    upper: int,
    *,
    df_name: str = "DataFrame",
) -> None:
    rows = len(df)
    if not (lower <= rows <= upper):
        raise ValueError(
            f"{df_name} has {rows} rows; expected between {lower} and {upper}."
        )
    print(f"✅ {df_name} row count {rows} within [{lower}, {upper}].")


def quick_null_check(df: pd.DataFrame, *, df_name: str = "DataFrame") -> pd.Series:
    nulls = df.isna().sum()
    print(f"{df_name} missing values per column:\n{nulls}")
    return nulls


def quick_preview(
    df: pd.DataFrame, *, n: int = 5, df_name: str = "DataFrame"
) -> pd.DataFrame:
    print(f"🔍 Previewing {df_name} (first {n} rows):")
    return df.head(n)


def numeric_sanity_check(
    series: pd.Series,
    *,
    minimum: float | None = None,
    maximum: float | None = None,
    name: str = "Series",
) -> None:
    if minimum is not None and series.min() < minimum:
        raise ValueError(
            f"{name} has values below the expected minimum of {minimum}."
        )
    if maximum is not None and series.max() > maximum:
        raise ValueError(
            f"{name} has values above the expected maximum of {maximum}."
        )
    print(
        f"✅ {name} within expected range"
        f"{f' ≥ {minimum}' if minimum is not None else ''}"
        f"{f' and ≤ {maximum}' if maximum is not None else ''}."
    )


def story_fields_are_complete(story: Mapping[str, str]) -> None:
    missing = [key for key in STORY_KEYS if not str(story.get(key, "")).strip()]
    if missing:
        raise ValueError(
            "Please complete the storytelling scaffold before plotting: "
            + ", ".join(missing)
        )
    print(
        "✅ Story scaffold complete (title, subtitle, claim, evidence, visual,"
        " takeaway, source, units, annotation, alt text)."
    )


def print_story_scaffold(story: Mapping[str, str]) -> None:
    story_fields_are_complete(story)
    print("\n📖 Story Scaffold")
    print(f"Claim: {story['claim']}")
    print(f"Evidence: {story['evidence']}")
    print(f"Visual focus: {story['visual']}")
    print(f"Takeaway: {story['takeaway']}")
    print(f"Source: {story['source']} ({story['units']})")


def apply_matplotlib_story(ax: plt.Axes, story: Mapping[str, str]) -> None:
    story_fields_are_complete(story)
    ax.set_title(f"{story['title']}\n{story['subtitle']}", loc="left", pad=18)
    ax.figure.text(
        0.01,
        -0.08,
        (
            f"Claim: {story['claim']} | Evidence: {story['evidence']}"
            f" | Takeaway: {story['takeaway']}"
            f"\nSource: {story['source']} • Units: {story['units']}"
        ),
        ha="left",
        fontsize=10,
    )


def annotate_callout(
    ax: plt.Axes,
    *,
    xy: tuple[float, float],
    xytext: tuple[float, float],
    text: str,
) -> None:
    ax.annotate(
        text,
        xy=xy,
        xytext=xytext,
        arrowprops=dict(arrowstyle="->", color="black", lw=1),
        bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", alpha=0.8),
    )


def record_alt_text(text: str) -> None:
    print(f"📝 Alt text ready: {text}")


def accessibility_checklist(
    *, palette: str, has_alt_text: bool, contrast_passed: bool = True
) -> None:
    print("♿ Accessibility checklist:")
    print(f" • Palette: {palette}")
    print(
        f" • Alt text provided: {'yes' if has_alt_text else 'add alt text before sharing'}"
    )
    print(f" • Contrast OK: {'yes' if contrast_passed else 'adjust colors'}")


def save_figure(fig: plt.Figure, filename: str) -> Path:
    plots_dir = Path.cwd() / "plots"
    plots_dir.mkdir(parents=True, exist_ok=True)
    output_path = plots_dir / filename
    fig.savefig(output_path, dpi=300, bbox_inches="tight")
    print(f"💾 Saved figure to {output_path}")
    return output_path


def save_plotly_figure(fig, filename: str) -> Path:
    plots_dir = Path.cwd() / "plots"
    plots_dir.mkdir(parents=True, exist_ok=True)
    html_path = plots_dir / filename.replace(".png", ".html")
    fig.write_html(html_path)
    print(f"💾 Saved interactive figure to {html_path}")
    try:
        static_path = plots_dir / filename
        fig.write_image(str(static_path))
        print(f"💾 Saved static image to {static_path}")
    except Exception as exc:  # pragma: no cover - depends on kaleido
        print(f"⚠️ Static export skipped: {exc}")
    return html_path

In [None]:
from pathlib import Path

DATA_DIR = Path.cwd() / "data"
PLOTS_DIR = Path.cwd() / "plots"
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Data directory: {DATA_DIR}")
print(f"Plots directory: {PLOTS_DIR}")

## Loop 1 · Load Renewable Series
Read each CSV and document its structure so merges are predictable.

In [None]:
renewable_files = {
    "total": "01 renewable-share-energy.csv",
    "hydro": "06 hydro-share-energy.csv",
    "wind": "10 wind-share-energy.csv",
    "solar": "14 solar-share-energy.csv",
}

dfs_world = {}
for label, filename in renewable_files.items():
    df = load_csv(
        DATA_DIR / filename,
        description=f"OWID {label} renewables share",
    )
    validate_columns(
        df,
        ["Entity", "Code", "Year"],
        df_name=f"{label} share",
    )
    dfs_world[label] = df.query("Entity == 'World'").copy()
    expect_rows_between(
        dfs_world[label], 50, 70, df_name=f"world {label}"
    )

In [None]:
{key: quick_preview(value, n=3, df_name=f"world {key}") for key, value in dfs_world.items()}

## Loop 2 · Harmonise & Merge
Keep only the needed columns, rename consistently, and combine for plotting.

In [None]:
df_total = dfs_world["total"][["Year", "Renewables (% equivalent primary energy)"]].rename(
    columns={"Renewables (% equivalent primary energy)": "Total Renewable"}
)
df_components = []
rename_map = {
    "hydro": "Hydro",
    "wind": "Wind",
    "solar": "Solar",
}
for key, label in rename_map.items():
    df_part = dfs_world[key][["Year", f"{label} (% equivalent primary energy)"]].rename(
        columns={f"{label} (% equivalent primary energy)": label}
    )
    df_components.append(df_part)

df_merged = df_total.copy()
for component in df_components:
    df_merged = df_merged.merge(component, on="Year", how="inner")

expect_rows_between(df_merged, 50, 70, df_name="merged renewables")
quick_null_check(df_merged, df_name="merged renewables")

In [None]:
quick_preview(df_merged, n=5, df_name="merged renewables")

## Loop 3 · Reshape for Storytelling
Move to a tidy, long format and compute contribution shares.

In [None]:
value_columns = ["Hydro", "Wind", "Solar"]
df_long = df_merged.melt(
    id_vars="Year", value_vars=value_columns, var_name="Technology", value_name="Share"
)
numeric_sanity_check(df_long["Share"], minimum=0, maximum=50, name="Renewables share (%)")
quick_preview(df_long, n=6, df_name="tidy renewables")

## Loop 4 · Visualise & Annotate
Use the storytelling scaffold before drawing the figure.

In [None]:
latest_year = int(df_merged["Year"].iloc[-1])
latest_total = df_merged["Total Renewable"].iloc[-1]

story = {
    "title": "Renewables Are Growing — But Fossil Fuels Still Dominate",
    "subtitle": f"World share of primary energy from renewables, 1965–{latest_year}",
    "claim": "Renewable energy has more than doubled since 2000, led by wind and solar.",
    "evidence": (
        f"Total renewable share reached {latest_total:.1f}% globally with solar/wind rising fastest."
    ),
    "visual": "Stacked area for hydro/wind/solar with total share overlay.",
    "takeaway": "Growth is accelerating yet absolute share remains below 15%, highlighting decarbonisation gap.",
    "source": "Our World in Data (BP Statistical Review)",
    "units": "% of primary energy",
    "annotation": f"{latest_year}: {latest_total:.1f}% of world energy",
    "alt_text": (
        "Stacked area chart showing hydro stable near 6%, with wind and solar climbing sharply after 2005, bringing"
        f" total renewables to roughly {latest_total:.0f}% of global primary energy by {latest_year}."
    ),
}

print_story_scaffold(story)

In [None]:
fig, ax = plt.subplots(figsize=(11, 6))

technologies = ["Hydro", "Wind", "Solar"]
colors = {"Hydro": "#4c78a8", "Wind": "#9ecae9", "Solar": "#f28e2b"}

ax.stackplot(
    df_merged["Year"],
    [df_merged[tech] for tech in technologies],
    labels=[f"{tech} share" for tech in technologies],
    colors=[colors[tech] for tech in technologies],
    alpha=0.8,
)
ax.plot(
    df_merged["Year"],
    df_merged["Total Renewable"],
    color="#2f4b7c",
    linewidth=3,
    label="Total renewable share",
)
ax.set_ylabel("Share of primary energy (%)")
ax.set_xlabel("Year")
ax.set_ylim(bottom=0)
ax.legend(loc="upper left", ncol=2)

apply_matplotlib_story(ax, story)
annotate_callout(
    ax,
    xy=(latest_year, latest_total),
    xytext=(1995, latest_total + 5),
    text=story["annotation"],
)

record_alt_text(story["alt_text"])
accessibility_checklist(
    palette="Colorblind-safe blues and orange overlay",
    has_alt_text=True,
)

In [None]:
save_figure(fig, "day02_solution_plot.png")