diff --git a/src/agents_shipgate/cli/verify/orchestrator.py b/src/agents_shipgate/cli/verify/orchestrator.py index fff58504..9aa62619 100644 --- a/src/agents_shipgate/cli/verify/orchestrator.py +++ b/src/agents_shipgate/cli/verify/orchestrator.py @@ -22,7 +22,7 @@ VerifierBaseStatus, VerifierHumanReview, VerifierNextAction, - map_merge_verdict, + merge_verdict_for, ) from agents_shipgate.triggers import evaluate @@ -513,12 +513,6 @@ def _map_optional_tree_path( return tree_dir / relative -def _merge_verdict(*, decision: str | None, head_status: str) -> MergeVerdict: - if decision is not None: - return map_merge_verdict(decision) - return "mergeable" if head_status == "skipped" else "unknown" - - def _can_merge_without_human( *, merge_verdict: MergeVerdict, release_decision: ReleaseDecision | None ) -> bool: @@ -642,7 +636,7 @@ def _build_verifier( include_scan_artifacts=report is not None, ) decision = release_decision_model.decision if release_decision_model else None - merge_verdict = _merge_verdict(decision=decision, head_status=head_status) + merge_verdict = merge_verdict_for(decision=decision, head_status=head_status) human_review = _human_review( merge_verdict=merge_verdict, release_decision=release_decision_model ) diff --git a/src/agents_shipgate/schemas/capability_change.py b/src/agents_shipgate/schemas/capability_change.py index 3f9fb03b..9fa055fb 100644 --- a/src/agents_shipgate/schemas/capability_change.py +++ b/src/agents_shipgate/schemas/capability_change.py @@ -35,7 +35,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator -from agents_shipgate.schemas.common import Confidence +from agents_shipgate.schemas.common import Confidence, ReleaseDecisionStatus # --- Capability change ------------------------------------------------------- @@ -327,12 +327,11 @@ def _sort_lists(self) -> HumanAck: # --- Verifier summary -------------------------------------------------------- -VerifierVerdict = Literal[ - "blocked", - "review_required", - "insufficient_evidence", - "passed", -] +# Back-compat alias for the canonical verdict vocabulary. ``VerifierSummary.verdict`` +# MUST mirror ``release_decision.decision`` (Principle 2), so it reuses the exact +# same ``ReleaseDecisionStatus`` enum rather than re-spelling it. Kept as a named +# alias because callers and tests import ``VerifierVerdict`` directly. +VerifierVerdict = ReleaseDecisionStatus class VerifierCapabilityDeltaSummary(BaseModel): diff --git a/src/agents_shipgate/schemas/common.py b/src/agents_shipgate/schemas/common.py index 40571e76..82d5279b 100644 --- a/src/agents_shipgate/schemas/common.py +++ b/src/agents_shipgate/schemas/common.py @@ -7,6 +7,19 @@ Severity = Literal["info", "low", "medium", "high", "critical"] Confidence = Literal["low", "medium", "high"] BaselineStatus = Literal["new", "matched", "resolved"] +# The canonical release-verdict vocabulary — the ONE enum the whole system +# gates on. ``build_release_decision()`` is the only place that computes it; +# every other "verdict"/"decision" field (AgentSummary, ReviewerSummary, +# VerifierSummary, ReleaseConsequence) re-uses this exact alias so the +# vocabulary can never be re-spelled or drift out of lockstep. The +# agent-facing ``MergeVerdict`` (schemas/verifier.py) is a deterministic +# projection of this via ``map_merge_verdict()``. +ReleaseDecisionStatus = Literal[ + "blocked", + "review_required", + "insufficient_evidence", + "passed", +] # v0.15: per-finding provenance kind. Independent of `confidence` — # `confidence` records how sure a rule is; `provenance_kind` records # *what kind of rule fired* (and what artifact it inspected). Lets diff --git a/src/agents_shipgate/schemas/report.py b/src/agents_shipgate/schemas/report.py index 5632dd46..f0d075f6 100644 --- a/src/agents_shipgate/schemas/report.py +++ b/src/agents_shipgate/schemas/report.py @@ -17,6 +17,7 @@ BaselineStatus, Confidence, ProvenanceKind, + ReleaseDecisionStatus, Severity, SourceReference, ) @@ -149,14 +150,11 @@ class BaselineSummary(BaseModel): # v0.8: release_decision block — see docs/STABILITY.md for the -# divergence contract with summary.status (which stays baseline-blind -# for backwards compatibility). -ReleaseDecisionStatus = Literal[ - "blocked", - "review_required", - "insufficient_evidence", - "passed", -] +# divergence contract with summary.status (which stays baseline-blind for +# backwards compatibility). The ``ReleaseDecisionStatus`` verdict vocabulary +# is now defined once in schemas/common.py and imported above, so the report +# summaries, ReleaseConsequence, and the verifier projection all share the +# exact same enum (one decision engine — no re-spelling, no drift). class ReleaseDecisionItem(BaseModel): @@ -386,12 +384,7 @@ class AgentSummary(BaseModel): model_config = ConfigDict(extra="forbid") - verdict: Literal[ - "blocked", - "review_required", - "insufficient_evidence", - "passed", - ] + verdict: ReleaseDecisionStatus headline: str blocker_count: int = 0 review_item_count: int = 0 @@ -464,15 +457,11 @@ class ReviewerSummary(BaseModel): model_config = ConfigDict(extra="forbid") - # Mirror the release verdict for at-a-glance context. Same enum as - # ``AgentSummary.verdict`` so a downstream consumer can switch on - # either block without re-deriving. - verdict: Literal[ - "blocked", - "review_required", - "insufficient_evidence", - "passed", - ] + # Mirror the release verdict for at-a-glance context. The exact same + # ``ReleaseDecisionStatus`` alias as ``AgentSummary.verdict`` and + # ``release_decision.decision`` so a downstream consumer can switch on + # any block without re-deriving — and the vocabulary cannot drift. + verdict: ReleaseDecisionStatus headline: str # Per-lens activity counts. Each is the cheapest "did this lens diff --git a/src/agents_shipgate/schemas/verifier.py b/src/agents_shipgate/schemas/verifier.py index c84191d1..f616f145 100644 --- a/src/agents_shipgate/schemas/verifier.py +++ b/src/agents_shipgate/schemas/verifier.py @@ -2,7 +2,9 @@ from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from agents_shipgate.schemas.common import ReleaseDecisionStatus VerifierBaseStatus = Literal[ "not_requested", @@ -32,7 +34,14 @@ "none", ] -_DECISION_TO_VERDICT: dict[str, MergeVerdict] = { +# The projection from the canonical release verdict (``ReleaseDecisionStatus``, +# the ONE thing ``build_release_decision`` computes) onto the agent-facing +# ``MergeVerdict``. Keyed with ``ReleaseDecisionStatus`` so a key that is not a +# real release status is a type error, and covered by a totality test +# (tests/test_verdict_contract.py) so adding a release status without a mapping +# fails CI rather than silently falling back. This dict is the only bridge +# between the two vocabularies. +_DECISION_TO_VERDICT: dict[ReleaseDecisionStatus, MergeVerdict] = { "passed": "mergeable", "review_required": "human_review_required", "insufficient_evidence": "insufficient_evidence", @@ -41,10 +50,30 @@ def map_merge_verdict(decision: str | None) -> MergeVerdict: - """Project ``release_decision.decision`` onto a merge verdict.""" + """Project ``release_decision.decision`` onto a merge verdict. + + ``None`` (no head scan / no decision) is ``unknown``. A decision string + outside the canonical vocabulary fails safe to ``human_review_required`` + rather than ``mergeable`` — an unrecognized verdict must never auto-pass. + """ if decision is None: return "unknown" - return _DECISION_TO_VERDICT.get(decision, "human_review_required") + return _DECISION_TO_VERDICT.get(decision, "human_review_required") # type: ignore[arg-type] + + +def merge_verdict_for(*, decision: str | None, head_status: str) -> MergeVerdict: + """Single authority for deriving a ``MergeVerdict`` for a verify run. + + When the head scan produced a ``release_decision`` the verdict is a pure + projection of it (``map_merge_verdict``). With no decision the verdict + reflects *why*: a skipped head (Shipgate had nothing to gate) is + ``mergeable``; any other no-decision state (scan failed, or not yet run) + is ``unknown``. Centralized here so the orchestrator — or any future + caller — cannot invent a second, inconsistent rule. + """ + if decision is not None: + return map_merge_verdict(decision) + return "mergeable" if head_status == "skipped" else "unknown" class VerifierNextAction(BaseModel): @@ -141,6 +170,36 @@ class VerifierArtifact(BaseModel): first_next_action: VerifierNextAction | None = None artifacts: dict[str, str] = Field(default_factory=dict) + @model_validator(mode="after") + def _verdict_projects_release_decision(self) -> VerifierArtifact: + """Lock the one-decision-engine contract structurally. + + Whenever a head ``release_decision`` is present, the agent-facing + ``merge_verdict`` and the convenience ``decision`` copy MUST be exact + projections of it — never an independently computed second opinion. + Construction-time enforcement makes an inconsistent artifact + impossible to emit. (No release_decision — skipped / failed / preview + — is left unconstrained: there is no substrate to project.) + """ + if self.release_decision is None: + return self + substrate = self.release_decision.get("decision") + if self.decision != substrate: + raise ValueError( + "VerifierArtifact.decision must equal " + "release_decision['decision'] (one decision engine): " + f"{self.decision!r} != {substrate!r}" + ) + expected = map_merge_verdict(substrate) + if self.merge_verdict != expected: + raise ValueError( + "VerifierArtifact.merge_verdict must be the projection of " + f"release_decision['decision']={substrate!r} via " + f"map_merge_verdict (expected {expected!r}, got " + f"{self.merge_verdict!r})" + ) + return self + __all__ = [ "CapabilityChangeBucket", @@ -154,4 +213,5 @@ class VerifierArtifact(BaseModel): "VerifierHumanReview", "VerifierNextAction", "map_merge_verdict", + "merge_verdict_for", ] diff --git a/tests/test_verdict_contract.py b/tests/test_verdict_contract.py new file mode 100644 index 00000000..11a5883a --- /dev/null +++ b/tests/test_verdict_contract.py @@ -0,0 +1,167 @@ +"""PR-A contract lock: one verdict engine, one drift-proof projection. + +These tests are the structural guarantee behind the "one decision engine" +discipline. ``build_release_decision()`` computes the canonical +``ReleaseDecisionStatus``; everything else — the report summary blocks +(``AgentSummary`` / ``ReviewerSummary`` / ``VerifierSummary``), the +``ReleaseConsequence`` lens, and the agent-facing ``MergeVerdict`` in +verifier.json — must be a *projection* of it that cannot silently drift. If +any of these fail, two parts of the system can disagree about whether a PR +can merge, which is exactly the failure this product exists to prevent. +""" + +from __future__ import annotations + +from typing import get_args + +import pytest +from pydantic import ValidationError + +from agents_shipgate.schemas.capability_change import VerifierSummary, VerifierVerdict +from agents_shipgate.schemas.common import ReleaseDecisionStatus +from agents_shipgate.schemas.report import ( + AgentSummary, + ReleaseConsequence, + ReleaseDecision, + ReviewerSummary, +) +from agents_shipgate.schemas.verifier import ( + _DECISION_TO_VERDICT, + VerifierArtifact, + map_merge_verdict, + merge_verdict_for, +) + +CANONICAL = set(get_args(ReleaseDecisionStatus)) + + +# --- One vocabulary: every verdict surface shares the canonical enum -------- + + +def test_canonical_vocabulary_is_the_expected_four() -> None: + assert CANONICAL == { + "blocked", + "review_required", + "insufficient_evidence", + "passed", + } + + +@pytest.mark.parametrize( + "model, field", + [ + (ReleaseDecision, "decision"), + (ReleaseConsequence, "decision"), + (AgentSummary, "verdict"), + (ReviewerSummary, "verdict"), + (VerifierSummary, "verdict"), + ], +) +def test_every_verdict_field_uses_the_canonical_enum(model, field) -> None: + members = set(get_args(model.model_fields[field].annotation)) + assert members == CANONICAL, ( + f"{model.__name__}.{field} re-spells the verdict vocabulary instead " + "of reusing ReleaseDecisionStatus; the enums can now drift apart." + ) + + +def test_verifier_verdict_alias_is_the_canonical_enum() -> None: + assert set(get_args(VerifierVerdict)) == CANONICAL + + +# --- The projection is total and drift-proof -------------------------------- + + +def test_projection_is_total_over_release_status() -> None: + for status in get_args(ReleaseDecisionStatus): + assert status in _DECISION_TO_VERDICT, ( + f"release status {status!r} has no explicit MergeVerdict mapping; " + "add it to _DECISION_TO_VERDICT (do not rely on the fail-safe " + "fallback for a known status)." + ) + + +def test_projection_table_is_pinned() -> None: + # Pin the exact bridge so changing a mapping is a deliberate, reviewed edit. + assert _DECISION_TO_VERDICT == { + "passed": "mergeable", + "review_required": "human_review_required", + "insufficient_evidence": "insufficient_evidence", + "blocked": "blocked", + } + + +def test_map_merge_verdict_none_is_unknown() -> None: + assert map_merge_verdict(None) == "unknown" + + +def test_map_merge_verdict_unknown_status_fails_safe_not_mergeable() -> None: + # An out-of-contract decision string must never auto-pass. + assert map_merge_verdict("definitely-not-a-status") == "human_review_required" + + +@pytest.mark.parametrize( + "decision, head_status, expected", + [ + ("passed", "succeeded", "mergeable"), + ("review_required", "succeeded", "human_review_required"), + ("insufficient_evidence", "succeeded", "insufficient_evidence"), + ("blocked", "succeeded", "blocked"), + (None, "skipped", "mergeable"), # nothing to gate + (None, "succeeded", "unknown"), # ran but produced no decision + (None, "failed", "unknown"), # scan failed + ], +) +def test_merge_verdict_for_matrix(decision, head_status, expected) -> None: + assert merge_verdict_for(decision=decision, head_status=head_status) == expected + + +# --- The artifact cannot disagree with its substrate (structural lock) ------ + + +def _artifact(**overrides) -> VerifierArtifact: + base: dict = { + "workspace": "/tmp/w", + "config": "shipgate.yaml", + "head_status": "succeeded", + } + base.update(overrides) + return VerifierArtifact(**base) + + +@pytest.mark.parametrize("status", sorted(CANONICAL)) +def test_consistent_artifact_is_accepted(status) -> None: + art = _artifact( + release_decision={"decision": status}, + decision=status, + merge_verdict=map_merge_verdict(status), + ) + assert art.merge_verdict == map_merge_verdict(status) + + +def test_artifact_rejects_merge_verdict_inconsistent_with_decision() -> None: + with pytest.raises(ValidationError): + _artifact( + release_decision={"decision": "blocked"}, + decision="blocked", + merge_verdict="mergeable", # lie: blocked must project to "blocked" + ) + + +def test_artifact_rejects_top_level_decision_mismatch() -> None: + with pytest.raises(ValidationError): + _artifact( + release_decision={"decision": "passed"}, + decision="blocked", # disagrees with the substrate + merge_verdict="mergeable", + ) + + +def test_artifact_without_release_decision_is_unconstrained() -> None: + # preview / skipped / failed paths have no substrate to project. + art = _artifact(release_decision=None, merge_verdict="unknown") + assert art.merge_verdict == "unknown" + art2 = _artifact( + release_decision=None, head_status="skipped", merge_verdict="mergeable" + ) + assert art2.merge_verdict == "mergeable"