diff --git a/scripts/generate_model_card.py b/scripts/generate_model_card.py new file mode 100644 index 0000000..597e7d2 --- /dev/null +++ b/scripts/generate_model_card.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +""" +Generate a Model Card for a ClimateVision release. + +Usage: + python scripts/generate_model_card.py \\ + --config config.yaml \\ + --metrics outputs/eval/metrics.json \\ + --fairness outputs/governance/fairness.json \\ + --output-dir outputs/model_cards/ + +The script is intended to run inside the release CI pipeline so that +every model version published has a card committed alongside it. +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +from climatevision.governance.model_card import generate + +logger = logging.getLogger("generate_model_card") + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--config", type=Path, required=True, help="Training config (yaml/json)") + parser.add_argument("--metrics", type=Path, required=True, help="Evaluation metrics JSON") + parser.add_argument("--fairness", type=Path, default=None, help="Fairness report JSON") + parser.add_argument("--output-dir", type=Path, default=None, help="Where to write the card") + parser.add_argument("--name", default=None, help="Override model name") + parser.add_argument("--version", default=None, help="Override model version") + parser.add_argument("-v", "--verbose", action="store_true") + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + paths = generate( + config=args.config, + metrics=args.metrics, + fairness_report=args.fairness, + output_dir=args.output_dir, + name=args.name, + version=args.version, + ) + for label, path in paths.items(): + print(f"{label}: {path}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/climatevision/governance/__init__.py b/src/climatevision/governance/__init__.py index 4f93a95..bb42723 100644 --- a/src/climatevision/governance/__init__.py +++ b/src/climatevision/governance/__init__.py @@ -27,6 +27,13 @@ AuditLogger, log_prediction, ) +from .model_card import ( + ModelCard, + build_model_card, + generate as generate_model_card, + render_markdown, + write_model_card, +) __all__ = [ "explain_prediction", @@ -42,4 +49,9 @@ "AuditEntry", "AuditLogger", "log_prediction", + "ModelCard", + "build_model_card", + "generate_model_card", + "render_markdown", + "write_model_card", ] diff --git a/src/climatevision/governance/model_card.py b/src/climatevision/governance/model_card.py new file mode 100644 index 0000000..a29d2cc --- /dev/null +++ b/src/climatevision/governance/model_card.py @@ -0,0 +1,222 @@ +""" +Automated model card generator for ClimateVision releases. + +Builds a Google-style "Model Card" (Mitchell et al., 2019) from the +training config and an evaluation metrics blob. Output is rendered as +both Markdown (for the GitHub release notes / model registry) and JSON +(for programmatic consumption by downstream tooling). +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Optional, Union + +logger = logging.getLogger(__name__) + +_PROJECT_ROOT = Path(__file__).resolve().parents[4] +_DEFAULT_OUTPUT_DIR = _PROJECT_ROOT / "outputs" / "model_cards" + +REQUIRED_METRICS = ("iou", "f1", "precision", "recall") + + +@dataclass +class ModelCard: + name: str + version: str + analysis_type: str + description: str + intended_use: str + out_of_scope_uses: list[str] + training_data: dict + evaluation_data: dict + metrics: dict + fairness: dict + limitations: list[str] + ethical_considerations: list[str] + contact: str + generated_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + + def to_dict(self) -> dict: + return { + "name": self.name, + "version": self.version, + "analysis_type": self.analysis_type, + "description": self.description, + "intended_use": self.intended_use, + "out_of_scope_uses": self.out_of_scope_uses, + "training_data": self.training_data, + "evaluation_data": self.evaluation_data, + "metrics": self.metrics, + "fairness": self.fairness, + "limitations": self.limitations, + "ethical_considerations": self.ethical_considerations, + "contact": self.contact, + "generated_at": self.generated_at, + } + + +_DEFAULT_INTENDED_USE = ( + "Detection of {analysis_type} in satellite imagery for use by " + "conservation organisations, NGOs, and government agencies. The " + "model produces per-pixel probability scores intended to be reviewed " + "alongside ground-truth reference data and analyst judgement." +) + +_DEFAULT_OUT_OF_SCOPE = [ + "Real-time legal enforcement decisions without analyst review.", + "Carbon credit issuance without independent ground-truth validation.", + "Use on imagery from sensors not represented in the training set.", +] + +_DEFAULT_LIMITATIONS = [ + "Performance degrades on cloud cover above the masking threshold used in preprocessing.", + "Geographic coverage limited to regions present in the training set.", + "Temporal generalisation to seasons or years outside the training window is unverified.", +] + +_DEFAULT_ETHICS = [ + "Model outputs may carry geographic bias; downstream users must run " + "the bias audit pipeline before distributing results across regions.", + "Predictions should never be the sole basis for actions affecting " + "indigenous land rights or local communities.", +] + + +def _coerce_config(config: Union[dict, str, Path]) -> dict: + if isinstance(config, dict): + return config + path = Path(config) + text = path.read_text() + if path.suffix in {".yml", ".yaml"}: + try: + import yaml + except ImportError as exc: # pragma: no cover - import guard + raise RuntimeError("PyYAML is required to load YAML configs") from exc + return yaml.safe_load(text) + return json.loads(text) + + +def _validate_metrics(metrics: dict) -> None: + missing = [m for m in REQUIRED_METRICS if m not in metrics] + if missing: + raise ValueError(f"metrics missing required keys: {missing}") + + +def build_model_card( + config: Union[dict, str, Path], + metrics: dict, + fairness_report: Optional[dict] = None, + *, + name: Optional[str] = None, + version: Optional[str] = None, + description: Optional[str] = None, + contact: str = "ClimateVision Governance ", +) -> ModelCard: + cfg = _coerce_config(config) + _validate_metrics(metrics) + + analysis_type = cfg.get("analysis_type") or cfg.get("analysis", {}).get("type", "deforestation") + resolved_name = name or cfg.get("model", {}).get("name") or f"climatevision-{analysis_type}" + resolved_version = version or cfg.get("model", {}).get("version") or "0.0.0" + + return ModelCard( + name=resolved_name, + version=resolved_version, + analysis_type=analysis_type, + description=description or f"U-Net segmentation model for {analysis_type}.", + intended_use=_DEFAULT_INTENDED_USE.format(analysis_type=analysis_type), + out_of_scope_uses=list(_DEFAULT_OUT_OF_SCOPE), + training_data=cfg.get("training_data", cfg.get("data", {})), + evaluation_data=cfg.get("evaluation_data", {}), + metrics=dict(metrics), + fairness=fairness_report or {}, + limitations=list(_DEFAULT_LIMITATIONS), + ethical_considerations=list(_DEFAULT_ETHICS), + contact=contact, + ) + + +def render_markdown(card: ModelCard) -> str: + metrics_rows = "\n".join( + f"| {k} | {v} |" for k, v in sorted(card.metrics.items()) + ) + fairness_block = ( + "\n".join(f"- **{k}**: {v}" for k, v in card.fairness.items()) + or "_No fairness report attached._" + ) + + sections = [ + f"# Model Card: {card.name} ({card.version})", + f"_Generated {card.generated_at}_", + "", + "## Description", + card.description, + "", + "## Intended Use", + card.intended_use, + "", + "### Out-of-Scope Uses", + "\n".join(f"- {u}" for u in card.out_of_scope_uses), + "", + "## Training Data", + f"```json\n{json.dumps(card.training_data, indent=2)}\n```", + "", + "## Evaluation", + "| Metric | Value |", + "| --- | --- |", + metrics_rows, + "", + "## Fairness", + fairness_block, + "", + "## Limitations", + "\n".join(f"- {l}" for l in card.limitations), + "", + "## Ethical Considerations", + "\n".join(f"- {e}" for e in card.ethical_considerations), + "", + "## Contact", + card.contact, + ] + return "\n".join(sections) + "\n" + + +def write_model_card( + card: ModelCard, + output_dir: Optional[Union[str, Path]] = None, +) -> dict[str, Path]: + output_dir = Path(output_dir) if output_dir else _DEFAULT_OUTPUT_DIR + output_dir.mkdir(parents=True, exist_ok=True) + + base = f"{card.name}_{card.version}" + md_path = output_dir / f"{base}.md" + json_path = output_dir / f"{base}.json" + + md_path.write_text(render_markdown(card)) + json_path.write_text(json.dumps(card.to_dict(), indent=2)) + + logger.info("Wrote model card to %s and %s", md_path, json_path) + return {"markdown": md_path, "json": json_path} + + +def generate( + config: Union[dict, str, Path], + metrics: Union[dict, str, Path], + fairness_report: Optional[Union[dict, str, Path]] = None, + output_dir: Optional[Union[str, Path]] = None, + **kwargs: Any, +) -> dict[str, Path]: + """End-to-end: load inputs, build the card, render to disk.""" + metrics_dict = _coerce_config(metrics) if not isinstance(metrics, dict) else metrics + fairness_dict = ( + _coerce_config(fairness_report) + if fairness_report is not None and not isinstance(fairness_report, dict) + else fairness_report + ) + card = build_model_card(config, metrics_dict, fairness_dict, **kwargs) + return write_model_card(card, output_dir=output_dir) diff --git a/tests/test_model_card.py b/tests/test_model_card.py new file mode 100644 index 0000000..b3a4211 --- /dev/null +++ b/tests/test_model_card.py @@ -0,0 +1,95 @@ +"""Tests for governance.model_card.""" + +from __future__ import annotations + +import json + +import pytest + +from climatevision.governance.model_card import ( + REQUIRED_METRICS, + build_model_card, + generate, + render_markdown, + write_model_card, +) + + +def _config(): + return { + "model": {"name": "unet-deforestation", "version": "1.2.0"}, + "analysis_type": "deforestation", + "training_data": { + "regions": ["amazon", "congo"], + "tile_count": 12000, + }, + "evaluation_data": {"regions": ["southeast_asia"], "tile_count": 1500}, + } + + +def _metrics(): + return {"iou": 0.81, "f1": 0.86, "precision": 0.88, "recall": 0.85} + + +def test_build_card_uses_config_values(): + card = build_model_card(_config(), _metrics()) + assert card.name == "unet-deforestation" + assert card.version == "1.2.0" + assert card.analysis_type == "deforestation" + assert card.metrics == _metrics() + assert card.training_data["tile_count"] == 12000 + + +def test_missing_metric_raises(): + bad = {"iou": 0.5, "f1": 0.5} + with pytest.raises(ValueError): + build_model_card(_config(), bad) + + +def test_required_metric_set_is_documented(): + assert set(REQUIRED_METRICS) <= set(_metrics()) + + +def test_render_markdown_includes_all_sections(): + card = build_model_card(_config(), _metrics(), fairness_report={"score": 0.92}) + md = render_markdown(card) + for heading in [ + "# Model Card:", + "## Description", + "## Intended Use", + "## Training Data", + "## Evaluation", + "## Fairness", + "## Limitations", + "## Ethical Considerations", + "## Contact", + ]: + assert heading in md + assert "score" in md + + +def test_write_model_card_emits_md_and_json(tmp_path): + card = build_model_card(_config(), _metrics()) + paths = write_model_card(card, output_dir=tmp_path) + + assert paths["markdown"].exists() + assert paths["json"].exists() + + payload = json.loads(paths["json"].read_text()) + assert payload["version"] == "1.2.0" + assert payload["metrics"]["iou"] == pytest.approx(0.81) + + +def test_generate_loads_files_from_disk(tmp_path): + cfg_path = tmp_path / "config.json" + metrics_path = tmp_path / "metrics.json" + cfg_path.write_text(json.dumps(_config())) + metrics_path.write_text(json.dumps(_metrics())) + + paths = generate( + config=cfg_path, + metrics=metrics_path, + output_dir=tmp_path / "cards", + ) + assert paths["markdown"].exists() + assert paths["json"].exists()