## 🔗 Open This Notebook in Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DavidLangworthy/ds4s/blob/master/Day%202%20%E2%80%93%20Fossil%20Fuels%20vs.%20Renewables.ipynb)

# ⚡ Day 2 · Charting the Energy Transition

Today's focus is building comparisons that stay trustworthy under classroom time pressure. We'll hop between quick context and hands-on code so the data never feels like a cliff. By the end you will:

- Assemble a tidy table of global renewable energy shares
- Verify contributions from hydro, wind, and solar
- Tell a story about how the mix has (and has not) changed

## 🗂️ Data Card · Our World in Data Energy Shares
| Field | Details |
| --- | --- |
| Source | [Our World in Data – BP Statistical Review & IEA](https://ourworldindata.org/energy-mix) |
| Temporal coverage | 1965–2021 |
| Geographic scope | Global (World aggregate) |
| Units | % of primary energy supply |
| Update cadence | Annual |
| Caveats | Hydropower dominates early years; dataset lags by ~2 years; technologies beyond hydro/wind/solar (e.g., bioenergy, geothermal) are not broken out. |
| What this chart cannot show | Country-level disparities, seasonal variability, or total demand growth. Pair with demand or emissions data for deeper context. |

In [None]:
# 🔁 Shared scaffolds used across DS4S notebooks
from __future__ import annotations

import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

plt.rcParams.update({
    "figure.dpi": 120,
    "axes.facecolor": "#f8f9fb",
    "axes.grid": True,
    "grid.alpha": 0.25,
    "grid.linestyle": "--",
    "axes.titlesize": 18,
    "axes.labelsize": 12,
    "axes.titleweight": "bold",
    "legend.frameon": False,
    "legend.fontsize": 11,
    "font.family": "DejaVu Sans",
})

def quick_diagnostics(df: pd.DataFrame, dataset_name: str, *, expected_columns: list[str] | None = None, expected_rows: tuple[int, int] | None = None) -> None:
    """Print lightweight diagnostics without stopping execution."""
    print(f"\n🔍 {dataset_name}")
    print(f"Shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    if expected_columns is not None:
        missing = [col for col in expected_columns if col not in df.columns]
        if missing:
            warnings.warn(f"Missing expected columns: {missing}")
    if expected_rows is not None:
        low, high = expected_rows
        if not (low <= len(df) <= high):
            warnings.warn(f"Row count {len(df)} outside expected range {expected_rows}")
        else:
            print(f"Row count within expected range {expected_rows}.")
    print("Null counts:")
    print(df.isna().sum())
    print("Preview:")
    print(df.head())
    print("-" * 60)

def expect_value_range(series: pd.Series, *, lower: float | None = None, upper: float | None = None, context: str = "") -> None:
    """Warn when values fall outside an expected numeric window."""
    label = context or series.name or "series"
    if lower is not None and float(series.min()) < lower:
        warnings.warn(f"{label}: minimum {series.min():.3f} is below expected {lower}")
    if upper is not None and float(series.max()) > upper:
        warnings.warn(f"{label}: maximum {series.max():.3f} is above expected {upper}")
    print(f"{label}: {series.min():.3f} → {series.max():.3f}")

def validate_story_elements(*, title: str, subtitle: str, annotation: str, source: str, units: str) -> None:
    """Confirm the storytelling scaffold is filled before plotting."""
    elements = {
        "TITLE": title,
        "SUBTITLE": subtitle,
        "ANNOTATION": annotation,
        "SOURCE": source,
        "UNITS": units,
    }
    missing = [key for key, value in elements.items() if not str(value).strip()]
    if missing:
        warnings.warn(f"Please fill these storytelling fields: {', '.join(missing)}")
    else:
        print("👍 Story scaffold complete.")

def baseline_style(ax: plt.Axes | None = None) -> plt.Axes:
    """Standardise axes styling for consistency across notebooks."""
    ax = ax or plt.gca()
    for spine in ["top", "right"]:
        ax.spines[spine].set_visible(False)
    ax.set_facecolor("#ffffff")
    return ax

def save_last_visual(fig, filename: str, *, subfolder: str = "plots") -> None:
    """Persist the most recent Matplotlib or Plotly figure without failing the run."""
    plots_dir = Path.cwd() / subfolder
    plots_dir.mkdir(parents=True, exist_ok=True)
    output_path = plots_dir / filename
    try:
        if hasattr(fig, "write_image"):
            fig.write_image(str(output_path))
        elif hasattr(fig, "savefig"):
            fig.savefig(output_path, dpi=300, bbox_inches="tight")
        else:
            warnings.warn("Figure type not supported for export; skipping save.")
            return
        print(f"Saved visual to {output_path}")
    except Exception as exc:
        warnings.warn(f"Plot export skipped: {exc}")


## Step 1 · Load the global energy share tables
Import each CSV and run diagnostics immediately. Look for the right columns and thousands of rows that cover all countries and years.

In [None]:
data_dir = Path.cwd() / "data"
energy_files = {
    "renewable_total": ("01 renewable-share-energy.csv", "Renewables (% equivalent primary energy)"),
    "hydro": ("06 hydro-share-energy.csv", "Hydro (% equivalent primary energy)"),
    "wind": ("10 wind-share-energy.csv", "Wind (% equivalent primary energy)"),
    "solar": ("14 solar-share-energy.csv", "Solar (% equivalent primary energy)"),
}

datasets = {}
for label, (filename, column) in energy_files.items():
    df = pd.read_csv(data_dir / filename)
    quick_diagnostics(
        df,
        f"{label} raw",
        expected_columns=["Entity", "Code", "Year", column],
        expected_rows=(5000, 6000),
    )
    datasets[label] = df


## Step 2 · Isolate the world aggregate and combine sources
Filter to the `World` entity, rename columns for clarity, and merge into a single tidy frame. Confirm we still have one row per year.

In [None]:
world_frames = {}
for label, (filename, column) in energy_files.items():
    frame = datasets[label]
    world_only = frame.loc[frame["Entity"] == "World", ["Year", column]].copy()
    world_frames[label] = world_only.rename(columns={column: label})
    quick_diagnostics(
        world_frames[label],
        f"{label} · World",
        expected_columns=["Year", label],
        expected_rows=(50, 70),
    )

df_mix = (
    world_frames["renewable_total"]
    .merge(world_frames["hydro"], on="Year", how="left")
    .merge(world_frames["wind"], on="Year", how="left")
    .merge(world_frames["solar"], on="Year", how="left")
    .sort_values("Year")
    .reset_index(drop=True)
)

column_map = {
    "renewable_total": "RenewableShare",
    "hydro": "HydroShare",
    "wind": "WindShare",
    "solar": "SolarShare",
}
df_mix = df_mix.rename(columns=column_map)

df_mix[["WindShare", "SolarShare"]] = df_mix[["WindShare", "SolarShare"]].fillna(0)

df_mix["TechTotal"] = df_mix[["HydroShare", "WindShare", "SolarShare"]].sum(axis=1)
df_mix["UnattributedShare"] = df_mix["RenewableShare"] - df_mix["TechTotal"]

quick_diagnostics(
    df_mix,
    "World renewable mix",
    expected_columns=[
        "Year",
        "RenewableShare",
        "HydroShare",
        "WindShare",
        "SolarShare",
        "TechTotal",
        "UnattributedShare",
    ],
    expected_rows=(50, 70),
)


## Step 3 · Sanity-check contributions
Are all shares in plausible ranges? Does the technology sum roughly match the total? Use these quick prints to self-diagnose before plotting.

In [None]:
expect_value_range(df_mix["RenewableShare"], lower=0, upper=40, context="RenewableShare (%)")
expect_value_range(df_mix["HydroShare"], lower=0, upper=35, context="HydroShare (%)")
expect_value_range(df_mix["WindShare"], lower=0, upper=10, context="WindShare (%)")
expect_value_range(df_mix["SolarShare"], lower=0, upper=10, context="SolarShare (%)")

print("Recent years focus:")
print(df_mix.tail())
print("Unattributed share (should stay near zero):")
print(df_mix["UnattributedShare"].describe())


### Expected trend preview
Hydro dominates early decades, with wind and solar rising recently. Use the preview below to orient your final plot.

![Preview of the finished chart](../../../plots/day02_solution_plot.png)

## Step 4 · Tell the energy story with a stacked visual
Build a stacked area chart for the technology mix, overlay the total renewable share, and narrate what the audience should notice.

In [None]:
import seaborn as sns

TITLE = "Renewables nearly doubled their share since 2000"
SUBTITLE = "Global mix of hydro, wind, and solar as share of primary energy (1965–2021)"
ANNOTATION = "Wind and solar now supply about 4.6% of global energy—hydro still carries most renewables."
SOURCE = "Source: Our World in Data (BP Statistical Review & IEA, accessed 2025-01-05)"
UNITS = "Share of global primary energy (%)"

validate_story_elements(
    title=TITLE,
    subtitle=SUBTITLE,
    annotation=ANNOTATION,
    source=SOURCE,
    units=UNITS,
)

palette = sns.color_palette("colorblind", 3)
latest_row = df_mix.iloc[-1]

fig_energy, ax = plt.subplots(figsize=(12, 7))
ax = baseline_style(ax)

ax.stackplot(
    df_mix["Year"],
    df_mix["HydroShare"],
    df_mix["WindShare"],
    df_mix["SolarShare"],
    labels=["Hydro", "Wind", "Solar"],
    colors=palette,
    alpha=0.85,
)
ax.plot(
    df_mix["Year"],
    df_mix["RenewableShare"],
    color="#1f2d3d",
    linewidth=2.5,
    label="Total renewable share",
)

ax.set_title(TITLE, loc="left", pad=18)
ax.text(0.0, 1.03, SUBTITLE, transform=ax.transAxes, fontsize=12, color="#555555")
ax.set_xlabel("Year")
ax.set_ylabel(UNITS)
ax.set_ylim(0, max(18, df_mix["RenewableShare"].max() + 2))
ax.legend(loc="upper left", ncol=2)

ax.annotate(
    f"{int(latest_row['Year'])}: {latest_row['RenewableShare']:.1f}% renewables",
    xy=(latest_row["Year"], latest_row["RenewableShare"]),
    xytext=(latest_row["Year"] - 15, latest_row["RenewableShare"] + 3),
    arrowprops=dict(arrowstyle="->", color="#333333"),
    fontsize=12,
    color="#333333",
)

ax.text(
    0.01,
    -0.20,
    f"{ANNOTATION}
{SOURCE} · Units: {UNITS}",
    transform=ax.transAxes,
    fontsize=10,
    color="#555555",
    va="top",
)

plt.tight_layout()
plt.show()


In [None]:
save_last_visual(fig_energy, "day02_solution_plot.png")

## 🔍 Reflection & limitations
- Remaining question: the chart does not reveal total energy demand—renewable share can rise while absolute fossil use stays high. Pair with demand or emissions data to discuss sufficiency.
- Teaching note: invite fast finishers to break out country-level leaders while others catch up using the shared diagnostics.
- Ethical lens: discuss communities affected by hydro build-out and where new renewable capacity is (or isn't) being deployed.