diff --git a/docs/benchmarks/layered_admissibility.md b/docs/benchmarks/layered_admissibility.md index 30bb1f9..e6d098c 100644 --- a/docs/benchmarks/layered_admissibility.md +++ b/docs/benchmarks/layered_admissibility.md @@ -27,8 +27,14 @@ Deterministically compare admissibility outcomes across fixture bundles using Co - no fuzzy matching - no semantic equivalence +## Visualization + +![Layered admissibility degradation curve](../media/layered_admissibility_curve.svg) + +This SVG is a deterministic benchmark artifact generated directly from `artifacts/layered_admissibility_results.json` via the hand-written renderer (`src/visualization/svg_curve_renderer.py`). Rendering is pure SVG text generation with fixed canvas geometry, stable ordering, and fixed float precision (three decimals), so output is CI-friendly and reproducible with no stochastic rendering. + ## Future - add more fixture families -- add progressive degradation levels -- add SVG curve visualization later +- extend deterministic benchmark artifacts +- keep visualization static and reproducible diff --git a/docs/media/layered_admissibility_curve.svg b/docs/media/layered_admissibility_curve.svg new file mode 100644 index 0000000..ee88b66 --- /dev/null +++ b/docs/media/layered_admissibility_curve.svg @@ -0,0 +1,36 @@ + + + Layered Admissibility Degradation Curve + + + + 0.000 + + 0.500 + + 1.000 + positive + mild + moderate + severe + + Fixture progression + overall_admissibility_score + + coding_workflow_pr_review_v1 | 1.000 + + coding_workflow_pr_review_mild_v1 | 0.917 + + coding_workflow_pr_review_moderate_v1 | 0.833 + + coding_workflow_pr_review_degraded_v1 | 0.500 + RECOVERY_PATH_INVALID + RECOVERY_PATH_INVALID, CAUSAL_DEPENDENCY_LOSS + RECOVERY_PATH_INVALID, CAUSAL_DEPENDENCY_LOSS, POLICY_ORDER_BROKEN, INVARIANT_VIOLATION + + Legend (component scores) + - structural + - relational + - operational + - governance + diff --git a/scripts/render_curve_svg.py b/scripts/render_curve_svg.py new file mode 100644 index 0000000..032f128 --- /dev/null +++ b/scripts/render_curve_svg.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import json +from pathlib import Path +import sys + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +from src.visualization.svg_curve_renderer import SVGCurveRenderer + +INPUT_PATH = Path("artifacts/layered_admissibility_results.json") +OUTPUT_PATH = Path("docs/media/layered_admissibility_curve.svg") + + +if __name__ == "__main__": + payload = json.loads(INPUT_PATH.read_text(encoding="utf-8")) + svg = SVGCurveRenderer().render(payload) + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + OUTPUT_PATH.write_text(svg, encoding="utf-8") diff --git a/src/visualization/__init__.py b/src/visualization/__init__.py new file mode 100644 index 0000000..703e279 --- /dev/null +++ b/src/visualization/__init__.py @@ -0,0 +1 @@ +"""Deterministic visualization helpers.""" diff --git a/src/visualization/svg_curve_renderer.py b/src/visualization/svg_curve_renderer.py new file mode 100644 index 0000000..ece999a --- /dev/null +++ b/src/visualization/svg_curve_renderer.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from dataclasses import dataclass +from html import escape + + +@dataclass(frozen=True, slots=True) +class _PointLayout: + fixture_id: str + score: float + x: float + y: float + failure_labels: tuple[str, ...] + + +class SVGCurveRenderer: + WIDTH = 1000 + HEIGHT = 520 + MARGIN_LEFT = 90 + MARGIN_RIGHT = 40 + MARGIN_TOP = 70 + MARGIN_BOTTOM = 140 + + TITLE = "Layered Admissibility Degradation Curve" + X_LABEL = "Fixture progression" + Y_LABEL = "overall_admissibility_score" + + X_TICKS: tuple[tuple[str, str], ...] = ( + ("coding_workflow_pr_review_v1", "positive"), + ("coding_workflow_pr_review_mild_v1", "mild"), + ("coding_workflow_pr_review_moderate_v1", "moderate"), + ("coding_workflow_pr_review_degraded_v1", "severe"), + ) + + LEGEND_ITEMS: tuple[str, ...] = ("structural", "relational", "operational", "governance") + + FAILURE_ANNOTATION_ORDER: tuple[str, ...] = ( + "RECOVERY_PATH_INVALID", + "CAUSAL_DEPENDENCY_LOSS", + "POLICY_ORDER_BROKEN", + "INVARIANT_VIOLATION", + ) + + def _fmt(self, value: float) -> str: + return f"{value:.3f}" + + def _layout_points(self, curve_json: dict) -> tuple[_PointLayout, ...]: + points_by_fixture = {point["fixture_id"]: point for point in curve_json["points"]} + plot_width = self.WIDTH - self.MARGIN_LEFT - self.MARGIN_RIGHT + plot_height = self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM + + layouts: list[_PointLayout] = [] + for index, (fixture_id, _) in enumerate(self.X_TICKS): + point = points_by_fixture[fixture_id] + score = float(point["overall_admissibility_score"]) + x = self.MARGIN_LEFT + (plot_width * index / (len(self.X_TICKS) - 1)) + y = self.MARGIN_TOP + ((1.0 - score) * plot_height) + layouts.append( + _PointLayout( + fixture_id=fixture_id, + score=score, + x=x, + y=y, + failure_labels=tuple(sorted(point["failure_labels"])), + ) + ) + return tuple(layouts) + + def render(self, curve_json: dict) -> str: + layouts = self._layout_points(curve_json) + plot_bottom = self.HEIGHT - self.MARGIN_BOTTOM + plot_right = self.WIDTH - self.MARGIN_RIGHT + + polyline_points = " ".join(f"{self._fmt(p.x)},{self._fmt(p.y)}" for p in layouts) + elements: list[str] = [ + f'', + ' ', + f' {self.TITLE}', + f' ', + f' ', + ] + + for tick_score in (0.0, 0.5, 1.0): + y = self.MARGIN_TOP + ((1.0 - tick_score) * (self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM)) + elements.append( + f' ' + ) + elements.append( + f' {self._fmt(tick_score)}' + ) + + for point, (_, stage_name) in zip(layouts, self.X_TICKS): + elements.append( + f' {stage_name}' + ) + + elements.extend( + [ + f' ', + f' {self.X_LABEL}', + f' {self.Y_LABEL}', + ] + ) + + for point in layouts: + elements.append( + f' ' + ) + elements.append( + f' {escape(point.fixture_id)} | {self._fmt(point.score)}' + ) + + y_base = plot_bottom + 44 + for point in layouts[1:]: + ordered_labels = [label for label in self.FAILURE_ANNOTATION_ORDER if label in point.failure_labels] + if ordered_labels: + elements.append( + f' {", ".join(ordered_labels)}' + ) + + legend_x = 700 + legend_y = 84 + elements.append(f' ') + elements.append(f' Legend (component scores)') + for idx, item in enumerate(self.LEGEND_ITEMS): + elements.append( + f' - {item}' + ) + + elements.append("") + return "\n".join(elements) + "\n" diff --git a/tests/test_svg_curve_renderer.py b/tests/test_svg_curve_renderer.py new file mode 100644 index 0000000..f647864 --- /dev/null +++ b/tests/test_svg_curve_renderer.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path + +from src.visualization.svg_curve_renderer import SVGCurveRenderer + +INPUT_PATH = Path("artifacts/layered_admissibility_results.json") +SVG_PATH = Path("docs/media/layered_admissibility_curve.svg") + + +def _render() -> str: + payload = json.loads(INPUT_PATH.read_text(encoding="utf-8")) + return SVGCurveRenderer().render(payload) + + +def test_svg_render_is_deterministic() -> None: + assert _render() == _render() + + +def test_svg_root_exists() -> None: + output = _render() + assert output.startswith('") + + +def test_svg_contains_fixture_labels() -> None: + output = _render() + assert "coding_workflow_pr_review_v1" in output + assert "coding_workflow_pr_review_mild_v1" in output + assert "coding_workflow_pr_review_moderate_v1" in output + assert "coding_workflow_pr_review_degraded_v1" in output + + +def test_svg_contains_expected_failure_annotations() -> None: + output = _render() + for label in [ + "RECOVERY_PATH_INVALID", + "CAUSAL_DEPENDENCY_LOSS", + "POLICY_ORDER_BROKEN", + "INVARIANT_VIOLATION", + ]: + assert label in output + + +def test_svg_polyline_coordinates_monotonic_degradation() -> None: + output = _render() + match = re.search(r' None: + output = _render() + assert "960.000,225.000" in output + assert "380.000,95.833" in output + assert "0.917" in output + + +def test_rendered_svg_matches_committed_artifact() -> None: + generated = _render() + committed = SVG_PATH.read_text(encoding="utf-8") + assert generated == committed