# Ambulance Station to Hospital Travel Time Analysis

This notebook builds a directed bipartite graph connecting ambulance stations to acute hospitals using LSOA-level travel times and fallback estimates. It generates analytics, visualisations, and export artifacts.

In [1]:
# Step 0 — Status banner & utilities
# - Purpose: central helpers for paths, logging, and safe "status banner".
# - High level, no secrets: show versions, commit SHA, and environment sanity.
# - Back-checks: project root discovery; graceful fallback if not git repo.
# - Design: no side effects on import; pure helpers called by later steps.

from __future__ import annotations

import json
import os
import sys
import textwrap
import subprocess
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd
import geopandas as gpd


PROJECT_MARKERS = {".git", "README.md", "pyproject.toml", "requirements.txt"}


def find_project_root(start: Optional[Path] = None) -> Path:
    """Walk up from start (or CWD) until a project marker is found."""
    here = Path.cwd() if start is None else start
    for parent in [here, *here.parents]:
        if any((parent / m).exists() for m in PROJECT_MARKERS):
            return parent
    return here  # fallback: current directory


def get_git_sha(root: Path) -> str:
    """Return short git SHA if available; otherwise 'unknown'."""
    try:
        return subprocess.check_output(
            ["git", "rev-parse", "--short", "HEAD"], cwd=str(root)
        ).decode("utf-8").strip()
    except Exception:
        return "unknown"


def banner(msg: str) -> None:
    """Pretty-print a banner box for key milestones and statuses."""
    print("\n" + "=" * 80)
    print(textwrap.fill(msg, width=78))
    print("=" * 80 + "\n")


def now_stamp() -> str:
    """Timestamp string for unique output folders."""
    return datetime.now().strftime("%Y%m%d-%H%M")


# Show initial status banner
ROOT = find_project_root()
SHA = get_git_sha(ROOT)
banner(
    f"01b_directed_bipartite_graph — GeoPandas (BNG) | Root={ROOT} | SHA={SHA} | "
    f"Python {sys.version.split()[0]} | Pandas {pd.__version__} | GeoPandas {gpd.__version__}"
)



01b_directed_bipartite_graph — GeoPandas (BNG) |
Root=/Users/rosstaylor/Downloads/Code Repositories/REACH Map (NHS SW)/GitHub
Repo/REACH-Map-NHS-SW | SHA=41b45a5 | Python 3.11.13 | Pandas 2.3.2 |
GeoPandas 1.1.1



In [2]:
# Step 1 — Configuration & file contracts (no absolute paths)
# - Purpose: centralise constants, CRS, thresholds, and I/O locations.
# - Back-checks: assert directories exist; guide users with clear actions.
# - Design: small dataclass for config; easy reuse in later notebooks.
# - No hard-coded user paths; everything relative to project ROOT.

BNG_EPSG = 27700  # British National Grid
WGS84_EPSG = 4326


@dataclass(frozen=True)
class Config:
    # Inputs (produced by step 01 and test slice)
    data_dir: Path
    test_dir: Path
    stations_csv: Path
    hospitals_csv: Path
    travel_matrix_csv: Path
    lsoa_gpkg: Path
    lsoa_layer: str

    # Outputs
    out_dir: Path
    out_stamp_dir: Path

    # Edge/time params
    max_minutes: int
    v_kph: float
    fudge: float
    top_k: int


cfg = Config(
    data_dir=ROOT / "data",
    test_dir=ROOT / "data" / "raw" / "test_data_ICB_level",
    stations_csv=ROOT / "data" / "raw" / "test_data_ICB_level" / "ambulance_stations_icb.csv",
    hospitals_csv=ROOT / "data" / "raw" / "test_data_ICB_level" / "acute_hospitals_icb.csv",
    travel_matrix_csv=ROOT / "data" / "raw" / "test_data_ICB_level" / "travel_matrix_lsoa_icb.csv",
    lsoa_gpkg=ROOT / "data" / "raw" / "test_data_ICB_level" / "demographics_age_continuous_icb.gpkg",
    lsoa_layer="LSOA_continuous_age_icb",
    out_dir=ROOT / "outputs" / "01b_directed_bipartite_graph",
    out_stamp_dir=ROOT / "outputs" / "01b_directed_bipartite_graph" / now_stamp(),
    max_minutes=120,
    v_kph=50.0,
    fudge=1.30,
    top_k=3,
)

# Ensure directories exist
cfg.out_dir.mkdir(parents=True, exist_ok=True)
cfg.out_stamp_dir.mkdir(parents=True, exist_ok=True)

# File presence checks with helpful messages
missing = [
    p for p in [
        cfg.stations_csv, cfg.hospitals_csv, cfg.lsoa_gpkg
    ] if not p.exists()
]
if missing:
    raise FileNotFoundError(
        "Missing required input files:\n  - " + "\n  - ".join(map(str, missing)) +
        "\nPlease run `01_develop_test_data_cornwall_icb` and/or place test slice files."
    )

banner(
    f"Config OK — outputs → {cfg.out_stamp_dir}\n"
    f"max_minutes={cfg.max_minutes}, v_kph={cfg.v_kph}, fudge={cfg.fudge}, top_k={cfg.top_k}"
)


Config OK — outputs → /Users/rosstaylor/Downloads/Code Repositories/REACH Map
(NHS SW)/GitHub Repo/REACH-Map-NHS-
SW/outputs/01b_directed_bipartite_graph/20251026-2010 max_minutes=120,
v_kph=50.0, fudge=1.3, top_k=3



In [3]:
# Step 2 — Load inputs (stations, hospitals, LSOA polygons, optional travel)
# - Purpose: read CSV/GPKG files; keep only canonical columns; light cleaning.
# - Back-checks: basic schema checks; explain remediation if columns missing.
# - Lat/Lon not trusted yet: we will validate and reproject later.
# - Travel matrix optional: proceed without if not present.

REQ_STATION_COLS = {"code", "name", "latitude", "longitude"}
REQ_HOSP_COLS = {"code", "name", "latitude", "longitude"}
TRAVEL_COLS = {"origin_lsoa", "dest_lsoa", "time_car_min"}

stations_raw = pd.read_csv(cfg.stations_csv, dtype="string").fillna("")
hospitals_raw = pd.read_csv(cfg.hospitals_csv, dtype="string").fillna("")
banner(f"Loaded stations={len(stations_raw):,}, hospitals={len(hospitals_raw):,}")


def normalise_cols(df: pd.DataFrame) -> pd.DataFrame:
    """Lowercase column names and harmonise common alternatives."""
    df = df.copy()
    df.columns = [c.strip().lower() for c in df.columns]
    repl = {
        "lat": "latitude",
        "lon": "longitude",
        "long": "longitude",
        "id": "code",
    }
    df.rename(columns={k: v for k, v in repl.items() if k in df.columns}, inplace=True)
    return df


stations_raw = normalise_cols(stations_raw)
hospitals_raw = normalise_cols(hospitals_raw)

if not REQ_STATION_COLS.issubset(stations_raw.columns):
    raise ValueError(
        f"Stations CSV missing columns: {REQ_STATION_COLS - set(stations_raw.columns)}"
    )
if not REQ_HOSP_COLS.issubset(hospitals_raw.columns):
    raise ValueError(
        f"Hospitals CSV missing columns: {REQ_HOSP_COLS - set(hospitals_raw.columns)}"
    )

# LSOA polygons (BNG expected)
lsoa_g = gpd.read_file(cfg.lsoa_gpkg, layer=cfg.lsoa_layer)
if lsoa_g.crs is None:
    raise ValueError("LSOA layer has no CRS. Expected EPSG:27700 (BNG).")
if lsoa_g.crs.to_epsg() != BNG_EPSG:
    lsoa_g = lsoa_g.to_crs(epsg=BNG_EPSG)

# Optional travel matrix
travel = None
if cfg.travel_matrix_csv.exists():
    travel = pd.read_csv(cfg.travel_matrix_csv, dtype="string")
    travel.columns = [c.strip().lower() for c in travel.columns]
    if not TRAVEL_COLS.issubset(travel.columns):
        travel = None
        banner("Travel matrix present but columns mismatch; ignoring for now.")
    else:
        # normalise codes and minutes
        for col in ("origin_lsoa", "dest_lsoa"):
            travel[col] = travel[col].str.strip().str.upper()
        travel["time_car_min"] = pd.to_numeric(travel["time_car_min"], errors="coerce")
        travel = travel.dropna(subset=["time_car_min"]).reset_index(drop=True)
        banner(f"Travel matrix loaded, rows={len(travel):,}")
else:
    banner("No travel matrix found — will compute fallback times from BNG distance.")



Loaded stations=14, hospitals=3


Travel matrix loaded, rows=112,560



In [4]:
# Step 3 — Lat/Lon sanity, dedupe, and geometry build (WGS84 → BNG)
# - Purpose: canonicalise coordinates; auto-swap obviously inverted lat/lon.
# - Back-checks: Cornwall bounds check; drop duplicates; assert unique codes.
# - Output: GeoDataFrames in both WGS84 and BNG with x_bng/y_bng columns.

SW_LON_MIN, SW_LON_MAX = -6.5, -3.0
SW_LAT_MIN, SW_LAT_MAX = 49.0, 52.0


def fix_latlon(df: pd.DataFrame) -> pd.DataFrame:
    """Auto-swap lat/lon if values are out of plausible SW England bounds."""
    df = df.copy()
    df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
    df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")

    mask_bad = ~(
        (df["longitude"].between(SW_LON_MIN, SW_LON_MAX)) &
        (df["latitude"].between(SW_LAT_MIN, SW_LAT_MAX))
    )
    swap_ok = (
        (df["latitude"].between(SW_LON_MIN, SW_LON_MAX)) &
        (df["longitude"].between(SW_LAT_MIN, SW_LAT_MAX))
    )
    to_swap = mask_bad & swap_ok
    if to_swap.any():
        df.loc[to_swap, ["latitude", "longitude"]] = df.loc[
            to_swap, ["longitude", "latitude"]
        ].values
        banner(f"Auto-swapped lat/lon for {to_swap.sum()} rows based on bounds check.")
    return df


def to_geoms(df: pd.DataFrame, code_name: str) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """Return (WGS84 gdf, BNG gdf) with consistent schema and unique codes."""
    base = fix_latlon(df)[["code", "name", "latitude", "longitude"]].dropna()
    base = base.drop_duplicates(subset=["code"]).reset_index(drop=True)

    if base["code"].duplicated().any():
        raise ValueError(f"Duplicate {code_name} codes found after de-duplication.")

    g_wgs = gpd.GeoDataFrame(
        base,
        geometry=gpd.points_from_xy(base["longitude"], base["latitude"]),
        crs=f"EPSG:{WGS84_EPSG}",
    )
    g_bng = g_wgs.to_crs(epsg=BNG_EPSG)
    g_bng["x_bng"] = g_bng.geometry.x
    g_bng["y_bng"] = g_bng.geometry.y
    return g_wgs, g_bng


stations_wgs, stations_bng = to_geoms(stations_raw, "station")
hospitals_wgs, hospitals_bng = to_geoms(hospitals_raw, "hospital")

banner(
    f"Geoms OK — stations={len(stations_bng):,}, hospitals={len(hospitals_bng):,}, "
    f"CRS=BNG EPSG:{BNG_EPSG}"
)



Auto-swapped lat/lon for 14 rows based on bounds check.


Auto-swapped lat/lon for 3 rows based on bounds check.


Geoms OK — stations=14, hospitals=3, CRS=BNG EPSG:27700



In [5]:
# Step 4 — Map points to nearest LSOA (for optional official travel use)
# - Purpose: attach nearest LSOA code to each station/hospital (BNG nearest).
# - Back-checks: ensure LSOA has code column; warn if not found.
# - Use: enables LSOA→LSOA time lookups when travel table present.

LSOA_CODE_COL = next(
    (c for c in ["lsoa_code", "LSOA11CD", "lsoa11cd", "code"]
     if c in lsoa_g.columns),
    None,
)
if LSOA_CODE_COL is None:
    raise KeyError("Cannot find an LSOA code column in the GPKG layer.")

lsoa_centroids = lsoa_g.copy()
lsoa_centroids["geometry"] = lsoa_centroids.geometry.representative_point()


def attach_nearest_lsoa(points_bng: gpd.GeoDataFrame, label: str) -> gpd.GeoDataFrame:
    joined = gpd.sjoin_nearest(
        points_bng[["code", "name", "geometry"]],
        lsoa_centroids[[LSOA_CODE_COL, "geometry"]],
        how="left",
        distance_col="nearest_m",
    ).rename(columns={LSOA_CODE_COL: "lsoa_code"})
    if joined["lsoa_code"].isna().any():
        banner(f"Warning: {label} rows lack a nearest LSOA; check geometries/CRS.")
    return points_bng.merge(joined[["code", "lsoa_code", "nearest_m"]], on="code")


stations_bng = attach_nearest_lsoa(stations_bng, "stations")
hospitals_bng = attach_nearest_lsoa(hospitals_bng, "hospitals")

banner("Nearest LSOA attached to stations and hospitals.")


Nearest LSOA attached to stations and hospitals.



In [6]:
# Step 5 — Build station×hospital pairs and compute travel minutes
# - Purpose: construct bipartite candidate edges with time attributes.
# - Back-checks: use official LSOA→LSOA times when available; else fallback.
# - Fallback: BNG straight-line → minutes via v_kph × fudge; cap by max_minutes.

def bng_dist_m(a_x: np.ndarray, a_y: np.ndarray, b_x: float, b_y: float) -> np.ndarray:
    dx = a_x - b_x
    dy = a_y - b_y
    return np.sqrt(dx * dx + dy * dy)


stations_b = stations_bng[["code", "name", "x_bng", "y_bng", "lsoa_code"].copy()]
stations_b = stations_b.rename(
    columns={"code": "station_code", "name": "station_name", "lsoa_code": "station_lsoa"}
)

hospitals_b = hospitals_bng[["code", "name", "x_bng", "y_bng", "lsoa_code"].copy()]
hospitals_b = hospitals_b.rename(
    columns={"code": "hospital_code", "name": "hospital_name", "lsoa_code": "hospital_lsoa"}
)

stations_b["key"] = 1
hospitals_b["key"] = 1
pairs = stations_b.merge(hospitals_b, on="key").drop(columns="key")

# Optional official travel minutes via LSOA→LSOA
if travel is not None:
    tt = travel.copy()
    tt = tt.rename(columns=str.lower)
    tt = tt[["origin_lsoa", "dest_lsoa", "time_car_min"]]
    # Normalise code formatting for safer joins
    for col in ("origin_lsoa", "dest_lsoa"):
        tt[col] = tt[col].str.strip().str.upper()
    for col in ("station_lsoa", "hospital_lsoa"):
        pairs[col] = pairs[col].str.strip().str.upper()

    pairs = pairs.merge(
        tt,
        left_on=["station_lsoa", "hospital_lsoa"],
        right_on=["origin_lsoa", "dest_lsoa"],
        how="left",
    )
    pairs["has_official_time"] = pairs["time_car_min"].notna()
else:
    pairs["time_car_min"] = np.nan
    pairs["has_official_time"] = False

# Fallback minutes from BNG straight-line distance
fallback_mask = pairs["time_car_min"].isna()
if fallback_mask.any():
    dist_m = bng_dist_m(
        pairs.loc[fallback_mask, "x_bng_x"].to_numpy(),
        pairs.loc[fallback_mask, "y_bng_x"].to_numpy(),
        pairs.loc[fallback_mask, "x_bng_y"].to_numpy(),
        pairs.loc[fallback_mask, "y_bng_y"].to_numpy(),
    )
    km = dist_m / 1000.0
    fallback_min = (km / cfg.v_kph) * 60.0 * cfg.fudge
    pairs.loc[fallback_mask, "time_car_min"] = fallback_min

# Quality filter: keep pairs within max_minutes
pairs = pairs[pairs["time_car_min"] <= cfg.max_minutes].reset_index(drop=True)

banner(
    f"Pairs built: {len(pairs):,} edges | "
    f"{pairs['has_official_time'].mean():.0%} official times | "
    f"Max minutes={cfg.max_minutes}"
)


Pairs built: 42 edges | 100% official times | Max minutes=120



In [7]:
# Step 6 — Build directed bipartite graph (NetworkX attributes)
# - Purpose: encode stations and hospitals as bipartite nodes; edges carry minutes.
# - Back-checks: unique codes; consistent node attributes; simple sanity metrics.
# - Output: in-memory graph, plus summary tables used for exports and mapping.

import networkx as nx

G = nx.DiGraph()

# Add stations (bipartite=0) with name attribute
station_nodes = [
    (row.code, {"bipartite": 0, "kind": "station", "name": row.name})
    for row in stations_bng.itertuples(index=False)
]
G.add_nodes_from(station_nodes)

# Add hospitals (bipartite=1) with name attribute
hospital_nodes = [
    (row.code, {"bipartite": 1, "kind": "hospital", "name": row.name})
    for row in hospitals_bng.itertuples(index=False)
]
G.add_nodes_from(hospital_nodes)

# Add edges (station -> hospital)
for r in pairs.itertuples(index=False):
    G.add_edge(
        r.station_code,
        r.hospital_code,
        time_min=float(r.time_car_min),
        has_official=bool(r.has_official_time),
        station_lsoa=r.station_lsoa,
        hospital_lsoa=r.hospital_lsoa,
    )

if G.number_of_nodes() == 0 or G.number_of_edges() == 0:
    raise RuntimeError("Empty graph — check inputs and pair filtering.")

banner(
    f"Graph OK — nodes={G.number_of_nodes():,}, edges={G.number_of_edges():,} "
    f"(stations={stations_bng.shape[0]:,}, hospitals={hospitals_bng.shape[0]:,})"
)



Graph OK — nodes=17, edges=42 (stations=14, hospitals=3)



In [8]:
# Step 7 — Quick analytics (best hospital per station; top-K table)
# - Purpose: convenient summaries for dashboards and mapping overlays.
# - Back-checks: ensure each station has ≥1 reachable hospital.
# - Outputs: two DataFrames for export: best and top-K by time_min.


def best_by_time(df: pd.DataFrame) -> pd.DataFrame:
    ix = df.groupby("station_code")["time_car_min"].idxmin()
    return df.loc[ix, [
        "station_code", "station_name", "hospital_code", "hospital_name",
        "time_car_min", "has_official_time"
    ]].sort_values("time_car_min").reset_index(drop=True)


def top_k_by_time(df: pd.DataFrame, k: int) -> pd.DataFrame:
    return (
        df.sort_values(["station_code", "time_car_min"])  # type: ignore[index]
          .groupby("station_code")
          .head(k)
          .reset_index(drop=True)
    )


pairs_view = pairs[[
    "station_code", "station_name", "hospital_code", "hospital_name",
    "time_car_min", "has_official_time"
]].copy()

best = best_by_time(pairs_view)
topk = top_k_by_time(pairs_view, cfg.top_k)

uncovered = set(stations_b["station_code"]) - set(best["station_code"])  # type: ignore[index]
if uncovered:
    banner(
        f"Warning: {len(uncovered)} station(s) lack reachable hospital within "
        f"{cfg.max_minutes} min. Consider raising max_minutes."
    )

banner(f"Top-K summary prepared (K={cfg.top_k}).")



Top-K summary prepared (K=3).



In [9]:
# Step 8 — GeoPandas map (BNG): LSOA boundary, stations, hospitals, optional lines
# - Purpose: static, tile-free map in BNG suitable for reports/exports.
# - Back-checks: CRS alignment; graceful skip if no pairs.
# - Output: saved PNG/SVG; lightweight styling to remain readable.

import matplotlib.pyplot as plt
from shapely.geometry import LineString
import matplotlib.lines as mlines

base = lsoa_g[["geometry"]].to_crs(epsg=BNG_EPSG)

fig, ax = plt.subplots(figsize=(10, 10))
base.plot(ax=ax, linewidth=0.2, edgecolor="0.8", facecolor="none")

# Plot stations (circle) and hospitals (triangle) — default colours
stations_bng.plot(ax=ax, markersize=20, alpha=0.9, label="Stations")
hospitals_bng.plot(ax=ax, markersize=30, alpha=0.9, label="Hospitals", marker="^")

# Optional: draw best station→hospital lines for readability
if not best.empty:
    line_geoms: List[LineString] = []
    for r in best.itertuples(index=False):
        a = stations_bng.loc[stations_bng["code"] == r.station_code, "geometry"].iloc[0]
        b = hospitals_bng.loc[hospitals_bng["code"] == r.hospital_code, "geometry"].iloc[0]
        line_geoms.append(LineString([a, b]))
    lines_g = gpd.GeoDataFrame(geometry=line_geoms, crs=f"EPSG:{BNG_EPSG}")
    lines_g.plot(ax=ax, linewidth=0.4, alpha=0.5)

ax.set_title("Stations → Hospitals (best by travel minutes)\nBNG (EPSG:27700)")
ax.set_axis_off()

# Custom legend handles (GeoPandas doesn't auto-collect plot labels)
station_handle = mlines.Line2D([], [], linestyle="None", marker="o", markersize=7, label="Stations")
hospital_handle = mlines.Line2D([], [], linestyle="None", marker="^", markersize=8, label="Hospitals")
edge_handle = mlines.Line2D([], [], linewidth=0.8, label="Best edges")
ax.legend(handles=[station_handle, hospital_handle, edge_handle], loc="lower right")

out_png = cfg.out_stamp_dir / "01b_visual.png"
fig.savefig(out_png, dpi=300, bbox_inches="tight")
plt.close(fig)

banner(f"Map saved to {out_png}")


Map saved to /Users/rosstaylor/Downloads/Code Repositories/REACH Map (NHS
SW)/GitHub Repo/REACH-Map-NHS-
SW/outputs/01b_directed_bipartite_graph/20251026-2010/01b_visual.png



In [10]:
# Step 9 — Exports: nodes, edges, graphml, summaries, metadata receipt
# - Purpose: persist clean artefacts for downstream 02a/02b/02c use.
# - Back-checks: row counts > 0; write metadata receipt for reproducibility.
# - Contract: stable column names; avoid surprises across notebooks.

# Nodes (stations + hospitals)
nodes_station = stations_bng[["code", "name", "x_bng", "y_bng"]].copy()
nodes_station["kind"] = "station"
nodes_hospital = hospitals_bng[["code", "name", "x_bng", "y_bng"]].copy()
nodes_hospital["kind"] = "hospital"
nodes = pd.concat([nodes_station, nodes_hospital], ignore_index=True)

# Edges
edges = pairs_view.rename(columns={"time_car_min": "minutes", "has_official_time": "has_official"})

# Write CSVs
nodes.to_csv(cfg.out_stamp_dir / "nodes_station_hospital.csv", index=False)
edges.to_csv(cfg.out_stamp_dir / "edges_station_to_hospital.csv", index=False)
best.to_csv(cfg.out_stamp_dir / "station_best_hospital.csv", index=False)
topk.to_csv(cfg.out_stamp_dir / f"station_top_{cfg.top_k}_hospitals.csv", index=False)

# GraphML
nx.write_graphml(G, cfg.out_stamp_dir / "station_hospital.graphml")

# Summary
summary = pd.DataFrame({
    "metric": ["nodes", "edges", "pct_official_times", "max_minutes", "top_k"],
    "value": [
        G.number_of_nodes(),
        G.number_of_edges(),
        round(edges["has_official"].mean() * 100, 1),
        cfg.max_minutes,
        cfg.top_k,
    ],
})
summary.to_csv(cfg.out_stamp_dir / "01b_summary.csv", index=False)

In [11]:
# Metadata receipt
receipt = {
    "notebook": "01b_directed_bipartite_graph",
    "git_sha": SHA,
    "created_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
    "crs": {"map_epsg": BNG_EPSG, "input_epsg": WGS84_EPSG},
    "config": {
        "max_minutes": cfg.max_minutes,
        "v_kph": cfg.v_kph,
        "fudge": cfg.fudge,
        "top_k": cfg.top_k,
    },
    "inputs": {
        "stations_csv": str(cfg.stations_csv),
        "hospitals_csv": str(cfg.hospitals_csv),
        "travel_matrix_csv": str(cfg.travel_matrix_csv) if cfg.travel_matrix_csv.exists() else None,
        "lsoa_gpkg": str(cfg.lsoa_gpkg),
        "lsoa_layer": cfg.lsoa_layer,
    },
    "outputs_dir": str(cfg.out_stamp_dir),
    "counts": {
        "stations": int(stations_bng.shape[0]),
        "hospitals": int(hospitals_bng.shape[0]),
        "edges": int(edges.shape[0]),
    },
}
with open(cfg.out_stamp_dir / "01b_metadata.json", "w") as f:
    json.dump(receipt, f, indent=2)

banner("Exports complete — CSVs, GraphML, summary, and metadata receipt written.")



Exports complete — CSVs, GraphML, summary, and metadata receipt written.



In [12]:

# Step 10 — Quality gates & friendly guidance
# - Purpose: final assertions + hints to fix common issues quickly.
# - Back-checks: minimum viable artefacts; uncover likely misconfigurations.
# - Output: human-readable prompts for next steps (02a/02b/02c).

assert nodes.shape[0] > 0, "No nodes exported — check station/hospital inputs."
assert edges.shape[0] > 0, (
    "No edges exported — try increasing max_minutes or review coordinates."
)

if edges["minutes"].max() < 10:
    banner("Heads-up: unusually small minutes; check units/CRS and fudge factor.")

if edges["has_official"].mean() < 0.5 and cfg.travel_matrix_csv.exists():
    banner(
        "Note: <50% official times used despite a travel matrix being present.\n"
        "Check LSOA code matching. Consider harmonising LSOA code formats."
    )

banner(
    "Ready for 02a/02b/02c.\n"
    "- 02a: population coverage vs thresholds.\n"
    "- 02b: sparse matrices (R and C) + caching.\n"
    "- 02c: MCLP/p-median with equity weights."
)



Ready for 02a/02b/02c. - 02a: population coverage vs thresholds. - 02b: sparse
matrices (R and C) + caching. - 02c: MCLP/p-median with equity weights.

