diff --git a/artifacts/layered_admissibility_results.json b/artifacts/layered_admissibility_results.json index 059a7ff..88d82de 100644 --- a/artifacts/layered_admissibility_results.json +++ b/artifacts/layered_admissibility_results.json @@ -22,6 +22,53 @@ "relational_score": 1.0, "structural_score": 1.0 }, + { + "expected_admissible": false, + "failed_contracts": [ + "recovery_path_available" + ], + "failure_labels": [ + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "coding_workflow_pr_review_mild_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_mild_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.9166666666666666, + "passed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review", + "security_causal_block" + ], + "relational_score": 0.6666666666666666, + "structural_score": 1.0 + }, + { + "expected_admissible": false, + "failed_contracts": [ + "recovery_path_available", + "security_causal_block" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "coding_workflow_pr_review_moderate_v1", + "fixture_path": "fixtures/coding_workflow_pr_review_moderate_v1", + "fixture_version": "1.0.0", + "governance_score": 1.0, + "observed_admissible": false, + "operational_score": 1.0, + "overall_admissibility_score": 0.8333333333333334, + "passed_contracts": [ + "no_orphan_tool_calls", + "pre_merge_review" + ], + "relational_score": 0.3333333333333333, + "structural_score": 1.0 + }, { "expected_admissible": false, "failed_contracts": [ diff --git a/docs/benchmarks/layered_admissibility.md b/docs/benchmarks/layered_admissibility.md index c67950d..30bb1f9 100644 --- a/docs/benchmarks/layered_admissibility.md +++ b/docs/benchmarks/layered_admissibility.md @@ -9,11 +9,16 @@ Deterministically compare admissibility outcomes across fixture bundles using Co | fixture_id | expected_admissible | observed_admissible | structural_score | relational_score | operational_score | governance_score | overall_admissibility_score | failure_labels | | --- | --- | --- | --- | --- | --- | --- | --- | --- | | coding_workflow_pr_review_v1 | true | true | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | none | +| coding_workflow_pr_review_mild_v1 | false | false | 1.000 | 0.667 | 1.000 | 1.000 | 0.917 | RECOVERY_PATH_INVALID | +| coding_workflow_pr_review_moderate_v1 | false | false | 1.000 | 0.333 | 1.000 | 1.000 | 0.833 | CAUSAL_DEPENDENCY_LOSS, RECOVERY_PATH_INVALID | | coding_workflow_pr_review_degraded_v1 | false | false | 1.000 | 0.000 | 0.000 | 1.000 | 0.500 | CAUSAL_DEPENDENCY_LOSS, INVARIANT_VIOLATION, POLICY_ORDER_BROKEN, RECOVERY_PATH_INVALID | ## Interpretation -The positive fixture remains fully admissible while the degraded fixture shows deterministic score loss and explicit failure labels. +- positive fixture remains fully admissible +- mild fixture isolates recovery reachability loss +- moderate fixture combines recovery and causality loss +- severe fixture combines relational and operational failures ## Non-goals diff --git a/fixtures/coding_workflow_pr_review_mild_v1/README.md b/fixtures/coding_workflow_pr_review_mild_v1/README.md new file mode 100644 index 0000000..eb991b4 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/README.md @@ -0,0 +1,18 @@ +# coding_workflow_pr_review_mild_v1 + +Deterministic mild degraded fixture for coding workflow replay-validation contracts. + +## Intentional degradations + +1. **Reachability degradation**: reconstructed dependency graph removes recovery edges from `test_failure` to `rollback` and `escalate_to_human`, violating `recovery_path_available`. + +## Preserved properties + +- Ordering sequence remains intact in reconstructed trace. +- No orphan dependency invariant is preserved. + +## Expected failures + +- `RECOVERY_PATH_INVALID` + +This fixture is intentionally synthetic, deterministic, and scoped to this fixture family. diff --git a/fixtures/coding_workflow_pr_review_mild_v1/expected/admissibility.json b/fixtures/coding_workflow_pr_review_mild_v1/expected/admissibility.json new file mode 100644 index 0000000..44d3ddf --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/expected/admissibility.json @@ -0,0 +1,18 @@ +{ + "fixture_id": "coding_workflow_pr_review_mild_v1", + "fixture_version": "1.0.0", + "expected_admissible": false, + "expected_layer_scores": { + "structural": 1.0, + "relational": 0.6666666666666666, + "operational": 1.0, + "governance": 1.0 + }, + "notes": "Mild degraded fixture isolating recovery-path reachability loss.", + "must_fail_contracts": [ + "recovery_path_available" + ], + "expected_failure_labels": [ + "RECOVERY_PATH_INVALID" + ] +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/expected/failures.json b/fixtures/coding_workflow_pr_review_mild_v1/expected/failures.json new file mode 100644 index 0000000..ca1eb52 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/expected/failures.json @@ -0,0 +1,18 @@ +{ + "expected_failures": [ + "RECOVERY_PATH_INVALID" + ], + "allowed_failures": [ + "ORPHAN_DEPENDENCY", + "DETACHED_DEPENDENCY", + "GRAPH_FRAGMENTATION", + "TEMPORAL_ORDER_VIOLATION" + ], + "disallowed_failures": [ + "POLICY_ORDER_BROKEN", + "INVARIANT_VIOLATION", + "CYCLE_INTRODUCED", + "REPLAY_NON_REPRODUCIBLE", + "ARTIFACT_INTEGRITY_VIOLATION" + ] +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/no_orphan_tool_calls.json b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/no_orphan_tool_calls.json new file mode 100644 index 0000000..95b77e5 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/no_orphan_tool_calls.json @@ -0,0 +1,9 @@ +{ + "contract_id": "no_orphan_tool_calls", + "layer": "relational", + "type": "invariant", + "definition": { + "rule": "no_orphan_dependencies" + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/pre_merge_review.json b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/pre_merge_review.json new file mode 100644 index 0000000..d7192a8 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/pre_merge_review.json @@ -0,0 +1,14 @@ +{ + "contract_id": "pre_merge_review", + "layer": "operational", + "type": "ordering", + "definition": { + "required_sequence": [ + "generate_patch", + "run_tests", + "human_review", + "merge" + ] + }, + "severity": "CRITICAL" +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/recovery_path_available.json b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/recovery_path_available.json new file mode 100644 index 0000000..32c79ba --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/recovery_path_available.json @@ -0,0 +1,14 @@ +{ + "contract_id": "recovery_path_available", + "layer": "relational", + "type": "reachability", + "definition": { + "from": "test_failure", + "to": [ + "rollback", + "escalate_to_human" + ], + "min_paths": 1 + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/security_causal_block.json b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/security_causal_block.json new file mode 100644 index 0000000..b5e5d6f --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/contracts/security_causal_block.json @@ -0,0 +1,11 @@ +{ + "contract_id": "security_causal_block", + "layer": "relational", + "type": "causality", + "definition": { + "required_causal_edges": [ + ["security_scan_failed", "deploy_blocked"] + ] + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/dependency_graph.json b/fixtures/coding_workflow_pr_review_mild_v1/original/dependency_graph.json new file mode 100644 index 0000000..106fdea --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/dependency_graph.json @@ -0,0 +1,27 @@ +{ + "graph_version": "1.0", + "nodes": [ + {"node_id": "generate_patch", "label": "Generate patch", "metadata": {"phase": "build"}}, + {"node_id": "run_tests", "label": "Run tests", "metadata": {"phase": "verify"}}, + {"node_id": "test_failure", "label": "Test failure", "metadata": {"phase": "verify"}}, + {"node_id": "rollback", "label": "Rollback", "metadata": {"phase": "recovery"}}, + {"node_id": "security_scan_failed", "label": "Security scan failed", "metadata": {"phase": "security"}}, + {"node_id": "deploy_blocked", "label": "Deploy blocked", "metadata": {"phase": "security"}}, + {"node_id": "escalate_to_human", "label": "Escalate to human", "metadata": {"phase": "recovery"}}, + {"node_id": "human_review", "label": "Human review", "metadata": {"phase": "governance"}}, + {"node_id": "merge", "label": "Merge", "metadata": {"phase": "release"}} + ], + "edges": [ + {"source": "generate_patch", "target": "run_tests", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "run_tests", "target": "test_failure", "relation": "CAUSAL", "metadata": {}}, + {"source": "run_tests", "target": "security_scan_failed", "relation": "DATA_FLOW", "metadata": {}}, + {"source": "test_failure", "target": "rollback", "relation": "RECOVERY", "metadata": {}}, + {"source": "test_failure", "target": "escalate_to_human", "relation": "RECOVERY", "metadata": {}}, + {"source": "security_scan_failed", "target": "deploy_blocked", "relation": "CAUSAL", "metadata": {}}, + {"source": "rollback", "target": "human_review", "relation": "TEMPORAL", "metadata": {}}, + {"source": "escalate_to_human", "target": "human_review", "relation": "TEMPORAL", "metadata": {}}, + {"source": "human_review", "target": "merge", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "run_tests", "target": "merge", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "deploy_blocked", "target": "merge", "relation": "BLOCKER", "metadata": {"state": "prevented"}} + ] +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/state.json b/fixtures/coding_workflow_pr_review_mild_v1/original/state.json new file mode 100644 index 0000000..4e272a7 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/state.json @@ -0,0 +1,28 @@ +{ + "evidence": { + "pr_id": "PR-122-fixture", + "test_suite": "unit", + "security_gate": "required" + }, + "constraints": { + "requires_human_review": true, + "requires_clean_tests_before_merge": true + }, + "blockers": [ + "test_failure", + "security_scan_failed", + "deploy_blocked" + ], + "recovery_paths": { + "test_failure": [ + "rollback", + "escalate_to_human" + ] + }, + "dependencies": { + "merge": [ + "human_review", + "run_tests" + ] + } +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/original/trace.json b/fixtures/coding_workflow_pr_review_mild_v1/original/trace.json new file mode 100644 index 0000000..5a71c2c --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/original/trace.json @@ -0,0 +1,13 @@ +{ + "events": [ + {"action": "generate_patch", "step": 1}, + {"action": "run_tests", "step": 2}, + {"action": "test_failure", "step": 3}, + {"action": "rollback", "step": 4}, + {"action": "security_scan_failed", "step": 5}, + {"action": "deploy_blocked", "step": 6}, + {"action": "escalate_to_human", "step": 7}, + {"action": "human_review", "step": 8}, + {"action": "merge", "step": 9} + ] +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/dependency_graph.json b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/dependency_graph.json new file mode 100644 index 0000000..0e67c52 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/dependency_graph.json @@ -0,0 +1,138 @@ +{ + "graph_version": "1.0", + "nodes": [ + { + "node_id": "generate_patch", + "label": "Generate patch", + "metadata": { + "phase": "build" + } + }, + { + "node_id": "run_tests", + "label": "Run tests", + "metadata": { + "phase": "verify" + } + }, + { + "node_id": "test_failure", + "label": "Test failure", + "metadata": { + "phase": "verify" + } + }, + { + "node_id": "rollback", + "label": "Rollback", + "metadata": { + "phase": "recovery" + } + }, + { + "node_id": "security_scan_failed", + "label": "Security scan failed", + "metadata": { + "phase": "security" + } + }, + { + "node_id": "deploy_blocked", + "label": "Deploy blocked", + "metadata": { + "phase": "security" + } + }, + { + "node_id": "escalate_to_human", + "label": "Escalate to human", + "metadata": { + "phase": "recovery" + } + }, + { + "node_id": "human_review", + "label": "Human review", + "metadata": { + "phase": "governance" + } + }, + { + "node_id": "merge", + "label": "Merge", + "metadata": { + "phase": "release" + } + } + ], + "edges": [ + { + "source": "generate_patch", + "target": "run_tests", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "run_tests", + "target": "test_failure", + "relation": "CAUSAL", + "metadata": {} + }, + { + "source": "run_tests", + "target": "security_scan_failed", + "relation": "DATA_FLOW", + "metadata": {} + }, + { + "source": "security_scan_failed", + "target": "deploy_blocked", + "relation": "CAUSAL", + "metadata": {} + }, + { + "source": "rollback", + "target": "human_review", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "escalate_to_human", + "target": "human_review", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "human_review", + "target": "merge", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "run_tests", + "target": "merge", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "deploy_blocked", + "target": "merge", + "relation": "BLOCKER", + "metadata": { + "state": "prevented" + } + }, + { + "source": "run_tests", + "target": "rollback", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "run_tests", + "target": "escalate_to_human", + "relation": "TEMPORAL", + "metadata": {} + } + ] +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/state.json b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/state.json new file mode 100644 index 0000000..4e272a7 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/state.json @@ -0,0 +1,28 @@ +{ + "evidence": { + "pr_id": "PR-122-fixture", + "test_suite": "unit", + "security_gate": "required" + }, + "constraints": { + "requires_human_review": true, + "requires_clean_tests_before_merge": true + }, + "blockers": [ + "test_failure", + "security_scan_failed", + "deploy_blocked" + ], + "recovery_paths": { + "test_failure": [ + "rollback", + "escalate_to_human" + ] + }, + "dependencies": { + "merge": [ + "human_review", + "run_tests" + ] + } +} diff --git a/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/trace.json b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/trace.json new file mode 100644 index 0000000..d0fa556 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_mild_v1/reconstructed/trace.json @@ -0,0 +1,14 @@ +{ + "events": [ + {"action": "setup_workspace", "step": 0}, + {"action": "generate_patch", "step": 1}, + {"action": "run_tests", "step": 2}, + {"action": "test_failure", "step": 3}, + {"action": "rollback", "step": 4}, + {"action": "security_scan_failed", "step": 5}, + {"action": "deploy_blocked", "step": 6}, + {"action": "escalate_to_human", "step": 7}, + {"action": "human_review", "step": 8}, + {"action": "merge", "step": 9} + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/README.md b/fixtures/coding_workflow_pr_review_moderate_v1/README.md new file mode 100644 index 0000000..91e86c3 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/README.md @@ -0,0 +1,20 @@ +# coding_workflow_pr_review_moderate_v1 + +Deterministic moderate degraded fixture for coding workflow replay-validation contracts. + +## Intentional degradations + +1. **Reachability degradation**: reconstructed dependency graph removes recovery edges from `test_failure` to `rollback` and `escalate_to_human`, violating `recovery_path_available`. +2. **Causality degradation**: reconstructed dependency graph removes `security_scan_failed -> deploy_blocked`, violating `security_causal_block`. + +## Preserved properties + +- Ordering sequence remains intact in reconstructed trace. +- No orphan dependency invariant is preserved. + +## Expected failures + +- `RECOVERY_PATH_INVALID` +- `CAUSAL_DEPENDENCY_LOSS` + +This fixture is intentionally synthetic, deterministic, and scoped to this fixture family. diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/expected/admissibility.json b/fixtures/coding_workflow_pr_review_moderate_v1/expected/admissibility.json new file mode 100644 index 0000000..5f0bccc --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/expected/admissibility.json @@ -0,0 +1,20 @@ +{ + "fixture_id": "coding_workflow_pr_review_moderate_v1", + "fixture_version": "1.0.0", + "expected_admissible": false, + "expected_layer_scores": { + "structural": 1.0, + "relational": 0.3333333333333333, + "operational": 1.0, + "governance": 1.0 + }, + "notes": "Moderate degraded fixture combining recovery-path and causality loss.", + "must_fail_contracts": [ + "recovery_path_available", + "security_causal_block" + ], + "expected_failure_labels": [ + "RECOVERY_PATH_INVALID", + "CAUSAL_DEPENDENCY_LOSS" + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/expected/failures.json b/fixtures/coding_workflow_pr_review_moderate_v1/expected/failures.json new file mode 100644 index 0000000..b0d9223 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/expected/failures.json @@ -0,0 +1,19 @@ +{ + "expected_failures": [ + "RECOVERY_PATH_INVALID", + "CAUSAL_DEPENDENCY_LOSS" + ], + "allowed_failures": [ + "ORPHAN_DEPENDENCY", + "DETACHED_DEPENDENCY", + "GRAPH_FRAGMENTATION", + "TEMPORAL_ORDER_VIOLATION" + ], + "disallowed_failures": [ + "POLICY_ORDER_BROKEN", + "INVARIANT_VIOLATION", + "CYCLE_INTRODUCED", + "REPLAY_NON_REPRODUCIBLE", + "ARTIFACT_INTEGRITY_VIOLATION" + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/no_orphan_tool_calls.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/no_orphan_tool_calls.json new file mode 100644 index 0000000..95b77e5 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/no_orphan_tool_calls.json @@ -0,0 +1,9 @@ +{ + "contract_id": "no_orphan_tool_calls", + "layer": "relational", + "type": "invariant", + "definition": { + "rule": "no_orphan_dependencies" + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/pre_merge_review.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/pre_merge_review.json new file mode 100644 index 0000000..d7192a8 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/pre_merge_review.json @@ -0,0 +1,14 @@ +{ + "contract_id": "pre_merge_review", + "layer": "operational", + "type": "ordering", + "definition": { + "required_sequence": [ + "generate_patch", + "run_tests", + "human_review", + "merge" + ] + }, + "severity": "CRITICAL" +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/recovery_path_available.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/recovery_path_available.json new file mode 100644 index 0000000..32c79ba --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/recovery_path_available.json @@ -0,0 +1,14 @@ +{ + "contract_id": "recovery_path_available", + "layer": "relational", + "type": "reachability", + "definition": { + "from": "test_failure", + "to": [ + "rollback", + "escalate_to_human" + ], + "min_paths": 1 + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/security_causal_block.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/security_causal_block.json new file mode 100644 index 0000000..b5e5d6f --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/contracts/security_causal_block.json @@ -0,0 +1,11 @@ +{ + "contract_id": "security_causal_block", + "layer": "relational", + "type": "causality", + "definition": { + "required_causal_edges": [ + ["security_scan_failed", "deploy_blocked"] + ] + }, + "severity": "HIGH" +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/dependency_graph.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/dependency_graph.json new file mode 100644 index 0000000..106fdea --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/dependency_graph.json @@ -0,0 +1,27 @@ +{ + "graph_version": "1.0", + "nodes": [ + {"node_id": "generate_patch", "label": "Generate patch", "metadata": {"phase": "build"}}, + {"node_id": "run_tests", "label": "Run tests", "metadata": {"phase": "verify"}}, + {"node_id": "test_failure", "label": "Test failure", "metadata": {"phase": "verify"}}, + {"node_id": "rollback", "label": "Rollback", "metadata": {"phase": "recovery"}}, + {"node_id": "security_scan_failed", "label": "Security scan failed", "metadata": {"phase": "security"}}, + {"node_id": "deploy_blocked", "label": "Deploy blocked", "metadata": {"phase": "security"}}, + {"node_id": "escalate_to_human", "label": "Escalate to human", "metadata": {"phase": "recovery"}}, + {"node_id": "human_review", "label": "Human review", "metadata": {"phase": "governance"}}, + {"node_id": "merge", "label": "Merge", "metadata": {"phase": "release"}} + ], + "edges": [ + {"source": "generate_patch", "target": "run_tests", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "run_tests", "target": "test_failure", "relation": "CAUSAL", "metadata": {}}, + {"source": "run_tests", "target": "security_scan_failed", "relation": "DATA_FLOW", "metadata": {}}, + {"source": "test_failure", "target": "rollback", "relation": "RECOVERY", "metadata": {}}, + {"source": "test_failure", "target": "escalate_to_human", "relation": "RECOVERY", "metadata": {}}, + {"source": "security_scan_failed", "target": "deploy_blocked", "relation": "CAUSAL", "metadata": {}}, + {"source": "rollback", "target": "human_review", "relation": "TEMPORAL", "metadata": {}}, + {"source": "escalate_to_human", "target": "human_review", "relation": "TEMPORAL", "metadata": {}}, + {"source": "human_review", "target": "merge", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "run_tests", "target": "merge", "relation": "PREREQUISITE", "metadata": {}}, + {"source": "deploy_blocked", "target": "merge", "relation": "BLOCKER", "metadata": {"state": "prevented"}} + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/state.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/state.json new file mode 100644 index 0000000..4e272a7 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/state.json @@ -0,0 +1,28 @@ +{ + "evidence": { + "pr_id": "PR-122-fixture", + "test_suite": "unit", + "security_gate": "required" + }, + "constraints": { + "requires_human_review": true, + "requires_clean_tests_before_merge": true + }, + "blockers": [ + "test_failure", + "security_scan_failed", + "deploy_blocked" + ], + "recovery_paths": { + "test_failure": [ + "rollback", + "escalate_to_human" + ] + }, + "dependencies": { + "merge": [ + "human_review", + "run_tests" + ] + } +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/original/trace.json b/fixtures/coding_workflow_pr_review_moderate_v1/original/trace.json new file mode 100644 index 0000000..5a71c2c --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/original/trace.json @@ -0,0 +1,13 @@ +{ + "events": [ + {"action": "generate_patch", "step": 1}, + {"action": "run_tests", "step": 2}, + {"action": "test_failure", "step": 3}, + {"action": "rollback", "step": 4}, + {"action": "security_scan_failed", "step": 5}, + {"action": "deploy_blocked", "step": 6}, + {"action": "escalate_to_human", "step": 7}, + {"action": "human_review", "step": 8}, + {"action": "merge", "step": 9} + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/dependency_graph.json b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/dependency_graph.json new file mode 100644 index 0000000..445b32a --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/dependency_graph.json @@ -0,0 +1,138 @@ +{ + "graph_version": "1.0", + "nodes": [ + { + "node_id": "generate_patch", + "label": "Generate patch", + "metadata": { + "phase": "build" + } + }, + { + "node_id": "run_tests", + "label": "Run tests", + "metadata": { + "phase": "verify" + } + }, + { + "node_id": "test_failure", + "label": "Test failure", + "metadata": { + "phase": "verify" + } + }, + { + "node_id": "rollback", + "label": "Rollback", + "metadata": { + "phase": "recovery" + } + }, + { + "node_id": "security_scan_failed", + "label": "Security scan failed", + "metadata": { + "phase": "security" + } + }, + { + "node_id": "deploy_blocked", + "label": "Deploy blocked", + "metadata": { + "phase": "security" + } + }, + { + "node_id": "escalate_to_human", + "label": "Escalate to human", + "metadata": { + "phase": "recovery" + } + }, + { + "node_id": "human_review", + "label": "Human review", + "metadata": { + "phase": "governance" + } + }, + { + "node_id": "merge", + "label": "Merge", + "metadata": { + "phase": "release" + } + } + ], + "edges": [ + { + "source": "generate_patch", + "target": "run_tests", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "run_tests", + "target": "test_failure", + "relation": "CAUSAL", + "metadata": {} + }, + { + "source": "run_tests", + "target": "security_scan_failed", + "relation": "DATA_FLOW", + "metadata": {} + }, + { + "source": "rollback", + "target": "human_review", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "escalate_to_human", + "target": "human_review", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "human_review", + "target": "merge", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "run_tests", + "target": "merge", + "relation": "PREREQUISITE", + "metadata": {} + }, + { + "source": "deploy_blocked", + "target": "merge", + "relation": "BLOCKER", + "metadata": { + "state": "prevented" + } + }, + { + "source": "run_tests", + "target": "rollback", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "run_tests", + "target": "escalate_to_human", + "relation": "TEMPORAL", + "metadata": {} + }, + { + "source": "run_tests", + "target": "deploy_blocked", + "relation": "TEMPORAL", + "metadata": {} + } + ] +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/state.json b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/state.json new file mode 100644 index 0000000..4e272a7 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/state.json @@ -0,0 +1,28 @@ +{ + "evidence": { + "pr_id": "PR-122-fixture", + "test_suite": "unit", + "security_gate": "required" + }, + "constraints": { + "requires_human_review": true, + "requires_clean_tests_before_merge": true + }, + "blockers": [ + "test_failure", + "security_scan_failed", + "deploy_blocked" + ], + "recovery_paths": { + "test_failure": [ + "rollback", + "escalate_to_human" + ] + }, + "dependencies": { + "merge": [ + "human_review", + "run_tests" + ] + } +} diff --git a/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/trace.json b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/trace.json new file mode 100644 index 0000000..d0fa556 --- /dev/null +++ b/fixtures/coding_workflow_pr_review_moderate_v1/reconstructed/trace.json @@ -0,0 +1,14 @@ +{ + "events": [ + {"action": "setup_workspace", "step": 0}, + {"action": "generate_patch", "step": 1}, + {"action": "run_tests", "step": 2}, + {"action": "test_failure", "step": 3}, + {"action": "rollback", "step": 4}, + {"action": "security_scan_failed", "step": 5}, + {"action": "deploy_blocked", "step": 6}, + {"action": "escalate_to_human", "step": 7}, + {"action": "human_review", "step": 8}, + {"action": "merge", "step": 9} + ] +} diff --git a/tests/test_degradation_curve_generator.py b/tests/test_degradation_curve_generator.py index 8e47728..3d45741 100644 --- a/tests/test_degradation_curve_generator.py +++ b/tests/test_degradation_curve_generator.py @@ -9,6 +9,8 @@ POS_FIXTURE = Path("fixtures/coding_workflow_pr_review_v1") +MILD_FIXTURE = Path("fixtures/coding_workflow_pr_review_mild_v1") +MODERATE_FIXTURE = Path("fixtures/coding_workflow_pr_review_moderate_v1") NEG_FIXTURE = Path("fixtures/coding_workflow_pr_review_degraded_v1") ARTIFACT_PATH = Path("artifacts/layered_admissibility_results.json") CURVE_ID = "coding_workflow_pr_review_curve_v1" @@ -38,7 +40,7 @@ def test_evaluate_negative_fixture_detects_expected_failures() -> None: def test_generate_curve_is_deterministic() -> None: generator = DegradationCurveGenerator() - fixtures = [POS_FIXTURE, NEG_FIXTURE] + fixtures = [POS_FIXTURE, MILD_FIXTURE, MODERATE_FIXTURE, NEG_FIXTURE] assert generator.to_dict(generator.generate(fixtures, curve_id=CURVE_ID)) == generator.to_dict( generator.generate(fixtures, curve_id=CURVE_ID) ) @@ -46,18 +48,20 @@ def test_generate_curve_is_deterministic() -> None: def test_to_dict_is_json_compatible_and_sorted() -> None: generator = DegradationCurveGenerator() - curve = generator.generate([POS_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) + curve = generator.generate([POS_FIXTURE, MILD_FIXTURE, MODERATE_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) curve_dict = generator.to_dict(curve) json.dumps(curve_dict, sort_keys=True) assert [point["fixture_path"] for point in curve_dict["points"]] == [ POS_FIXTURE.as_posix(), + MILD_FIXTURE.as_posix(), + MODERATE_FIXTURE.as_posix(), NEG_FIXTURE.as_posix(), ] def test_write_json_matches_committed_artifact(tmp_path: Path) -> None: generator = DegradationCurveGenerator() - curve = generator.generate([POS_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) + curve = generator.generate([POS_FIXTURE, MILD_FIXTURE, MODERATE_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) generated_path = tmp_path / "layered_admissibility_results.json" generator.write_json(curve, generated_path) @@ -68,12 +72,14 @@ def test_write_json_matches_committed_artifact(tmp_path: Path) -> None: def test_write_markdown_contains_fixture_rows(tmp_path: Path) -> None: generator = DegradationCurveGenerator() - curve = generator.generate([POS_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) + curve = generator.generate([POS_FIXTURE, MILD_FIXTURE, MODERATE_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) markdown_path = tmp_path / "layered_admissibility.md" generator.write_markdown(curve, markdown_path) content = markdown_path.read_text(encoding="utf-8") assert "coding_workflow_pr_review_v1" in content + assert "coding_workflow_pr_review_mild_v1" in content + assert "coding_workflow_pr_review_moderate_v1" in content assert "coding_workflow_pr_review_degraded_v1" in content assert "POLICY_ORDER_BROKEN" in content assert "RECOVERY_PATH_INVALID" in content @@ -106,3 +112,35 @@ def test_disallowed_failure_label_raises_clear_error() -> None: {"expected_failures": [], "disallowed_failures": ["DISALLOWED_FAILURE"]}, ("DISALLOWED_FAILURE",), ) + + +def test_progressive_curve_scores_are_monotonic_or_non_increasing() -> None: + generator = DegradationCurveGenerator() + curve = generator.generate([POS_FIXTURE, MILD_FIXTURE, MODERATE_FIXTURE, NEG_FIXTURE], curve_id=CURVE_ID) + points = {point.fixture_id: point for point in curve.points} + + assert points["coding_workflow_pr_review_v1"].overall_admissibility_score == 1.0 + assert points["coding_workflow_pr_review_mild_v1"].overall_admissibility_score < points["coding_workflow_pr_review_v1"].overall_admissibility_score + assert points["coding_workflow_pr_review_moderate_v1"].overall_admissibility_score <= points["coding_workflow_pr_review_mild_v1"].overall_admissibility_score + assert points["coding_workflow_pr_review_degraded_v1"].overall_admissibility_score <= points["coding_workflow_pr_review_moderate_v1"].overall_admissibility_score + + +def test_mild_fixture_only_expected_recovery_failure() -> None: + point = DegradationCurveGenerator().evaluate_fixture(MILD_FIXTURE) + labels = set(point.failure_labels) + + assert point.observed_admissible is False + assert "RECOVERY_PATH_INVALID" in labels + assert "POLICY_ORDER_BROKEN" not in labels + assert "CAUSAL_DEPENDENCY_LOSS" not in labels + assert "INVARIANT_VIOLATION" not in labels + + +def test_moderate_fixture_expected_recovery_and_causality_failures() -> None: + point = DegradationCurveGenerator().evaluate_fixture(MODERATE_FIXTURE) + labels = set(point.failure_labels) + + assert "RECOVERY_PATH_INVALID" in labels + assert "CAUSAL_DEPENDENCY_LOSS" in labels + assert "POLICY_ORDER_BROKEN" not in labels + assert "INVARIANT_VIOLATION" not in labels