DevOpsMadDog · DevOpsMadDog · Oct 4, 2025 · Oct 3, 2025
diff --git a/data/feeds/golden_regression_cases.json b/data/feeds/golden_regression_cases.json
@@ -0,0 +1,152 @@
+[
+    {
+        "id": "payments-critical-rce",
+        "cve_id": "CVE-2024-22201",
+        "title": "Remote code execution in payment gateway",
+        "expected": {
+            "decision": "BLOCK",
+            "confidence": 0.92,
+            "reason": "Exploit confirmed in production traffic"
+        },
+        "context": {
+            "service_name": "payments-gateway",
+            "environment": "production",
+            "business_context": {
+                "service_tier": "tier-0",
+                "owner": "fraud-response",
+                "regulatory_commitments": [
+                    "PCI-DSS"
+                ],
+                "change_window": "emergency"
+            },
+            "security_findings": [
+                {
+                    "source": "runtime-detector",
+                    "severity": "CRITICAL",
+                    "evidence": "Exploit shell spawned from payments container"
+                },
+                {
+                    "source": "threat-intel",
+                    "severity": "HIGH",
+                    "evidence": "Active ransomware campaign targeting CVE-2024-22201"
+                }
+            ],
+            "runtime_data": {
+                "exploit_attempts": 17,
+                "observed_latency_ms": 580,
+                "error_rate": 0.34
+            },
+            "sbom_data": {
+                "component": "payments-core",
+                "version": "5.4.1",
+                "patch_available": false
+            },
+            "threat_model": {
+                "attack_path": "internet > api gateway > pod",
+                "blast_radius": "customer payments"
+            }
+        },
+        "metadata": {
+            "customer": "GlobalPay",
+            "playbook": "CVE-2024-22201 emergency response",
+            "notes": "Exploit blocked by WAF only; FixOps expected to halt deployment"
+        }
+    },
+    {
+        "id": "inventory-patched-lib",
+        "cve_id": "CVE-2023-45008",
+        "title": "Patched library flagged in SBOM",
+        "expected": {
+            "decision": "ALLOW",
+            "confidence": 0.78,
+            "reason": "Runtime guarded by feature flag and mitigation deployed"
+        },
+        "context": {
+            "service_name": "inventory-api",
+            "environment": "staging",
+            "business_context": {
+                "service_tier": "tier-2",
+                "owner": "supply-chain",
+                "change_window": "standard"
+            },
+            "security_findings": [
+                {
+                    "source": "sbom-scan",
+                    "severity": "MEDIUM",
+                    "evidence": "Dependency vulnerable but patched version already in use"
+                }
+            ],
+            "runtime_data": {
+                "exploit_attempts": 0,
+                "observed_latency_ms": 120,
+                "error_rate": 0.02
+            },
+            "sbom_data": {
+                "component": "inventory-lib",
+                "version": "2.9.4",
+                "patch_available": true,
+                "patch_status": "applied"
+            },
+            "threat_model": {
+                "attack_path": "internal > api > database",
+                "blast_radius": "inventory counts"
+            }
+        },
+        "metadata": {
+            "customer": "Logistix",
+            "playbook": "Mitigation verification",
+            "notes": "Regression should prove FixOps would not block patched release"
+        }
+    },
+    {
+        "id": "authn-thirdparty-dependency",
+        "cve_id": "CVE-2022-31899",
+        "title": "Authentication dependency with upstream SLA",
+        "expected": {
+            "decision": "DEFER",
+            "confidence": 0.6,
+            "reason": "Awaiting vendor patch; monitored by runtime anomaly detection"
+        },
+        "context": {
+            "service_name": "auth-service",
+            "environment": "production",
+            "business_context": {
+                "service_tier": "tier-1",
+                "owner": "identity-platform",
+                "change_window": "coordinated",
+                "vendor_sla_hours": 48
+            },
+            "security_findings": [
+                {
+                    "source": "vendor-advisory",
+                    "severity": "HIGH",
+                    "evidence": "Vendor committed fix within SLA"
+                },
+                {
+                    "source": "runtime-detector",
+                    "severity": "MEDIUM",
+                    "evidence": "No exploit traffic observed"
+                }
+            ],
+            "runtime_data": {
+                "exploit_attempts": 0,
+                "observed_latency_ms": 210,
+                "error_rate": 0.04
+            },
+            "sbom_data": {
+                "component": "oauth-broker",
+                "version": "3.1.0",
+                "patch_available": false
+            },
+            "threat_model": {
+                "attack_path": "internet > auth > upstream",
+                "blast_radius": "session tokens"
+            }
+        },
+        "metadata": {
+            "customer": "ContosoID",
+            "playbook": "3rd party deferment",
+            "notes": "Regression ensures FixOps escalates but does not block with SLA in flight"
+        }
+    }
+]
diff --git a/fixops-blended-enterprise/scripts/run_real_cve_playbook.py b/fixops-blended-enterprise/scripts/run_real_cve_playbook.py
@@ -0,0 +1,79 @@
+"""Replay golden regression CVE cases against the FixOps decision engine."""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+from pathlib import Path
+from typing import Any, Dict
+
+
+def _bootstrap_path() -> None:
+    root = Path(__file__).resolve().parents[1]
+    src_path = root / "src"
+    if str(src_path) not in sys.path:
+        sys.path.insert(0, str(src_path))
+
+
+_bootstrap_path()
+
+from src.services.decision_engine import DecisionEngine  # noqa: E402
+from src.services.golden_regression_store import GoldenRegressionStore  # noqa: E402
+
+
+def _format_confidence(value: Any) -> str:
+    if value is None:
+        return "n/a"
+    return f"{float(value):.2f}"
+
+
+def _format_delta(delta: Dict[str, Any]) -> str:
+    confidence_delta = delta.get("confidence_delta")
+    if confidence_delta is None:
+        return "n/a"
+    sign = "+" if confidence_delta >= 0 else ""
+    return f"{sign}{confidence_delta:.2f}"
+
+
+async def main() -> None:
+    engine = DecisionEngine()
+    store = GoldenRegressionStore()
+
+    report = await store.evaluate(engine, initialize_engine=True)
+    summary = report["summary"]
+
+    print("FixOps Golden Regression Report")
+    print("=" * 34)
+    print(
+        f"Total cases: {summary['total_cases']} | Matches: {summary['matches']} | "
+        f"Mismatches: {summary['mismatches']} | Accuracy: {summary['accuracy']:.1%}"
+    )
+    print()
+
+    for case in report["cases"]:
+        status = "✅" if case["match"] else "❌"
+        expected = case["expected"]
+        actual = case["actual"]
+        delta = case["delta"]
+
+        print(
+            f"{status} {case['case_id']} ({case.get('cve_id', 'n/a')}): "
+            f"expected {expected['decision']} (conf {_format_confidence(expected.get('confidence'))}) vs "
+            f"actual {actual.get('decision', 'UNKNOWN')} (conf {_format_confidence(actual.get('confidence'))})"
+        )
+        print(
+            f"    Δ decision: {'match' if case['match'] else 'changed'} | "
+            f"Δ confidence: {_format_delta(delta)}"
+        )
+        if actual.get("reasoning"):
+            print(f"    Reasoning: {actual['reasoning']}")
+        if case.get("metadata"):
+            print(f"    Metadata: {case['metadata']}")
+        print()
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("Interrupted")