RocketBus · trentas · May 13, 2026 · May 13, 2026
diff --git a/docs/METRICS.md b/docs/METRICS.md
@@ -695,6 +695,45 @@ Confidence rules:
 
 ---
 
+## DORA (real) — Datadog-derived
+
+Populated when the org has an active Datadog integration and the CLI
+fetched events from `GET /api/integrations/datadog/events` for the
+analysis window. When the integration is absent or returns zero events,
+every `dora_*` field is null and the report's DORA section is empty.
+
+| Field | Unit | Source | Nullable when |
+|---|---|---|---|
+| `dora_source` | `"datadog"` | `analysis/dora_real.py` | no active integration / no events fetched |
+| `dora_deployments_total` | int ≥ 0 | same | same |
+| `dora_deployments_failed` | int ≥ 0 | same | same |
+| `dora_deployments_pending_evaluation` | int ≥ 0 | same | same |
+| `dora_incidents_total` | int ≥ 0 | same | same |
+| `dora_cfr` | float `0.0–1.0` | same | every deploy in the window is `change_failure=null` (still pending Datadog evaluation) |
+| `dora_mttr_per_deploy_seconds_median` | seconds | same | no failed deploy carries `recovery_time_sec` |
+| `dora_mttr_per_deploy_seconds_p90` | seconds | same | same |
+| `dora_mttr_per_incident_seconds_median` | seconds | same | no incident carries `time_to_restore_seconds` |
+| `dora_mttr_per_incident_seconds_p90` | seconds | same | same |
+| `dora_rollback_rate` | float `0.0–1.0` | same | no failed deploys in the window |
+| `dora_rollbacks_total` | int ≥ 0 | same | (always set when `dora_source` is) |
+| `dora_lead_time_seconds_median` | seconds | same | no deploy carries `commits[].change_lead_time` |
+| `dora_deploy_frequency_per_day` | float ≥ 0 | same | `ExternalDORAData.window_from`/`window_to` not provided |
+| `dora_remediation_distribution` | `Record<remediation_type, int>` | same | no failed deploys in the window |
+
+Tri-state `change_failure`:
+
+- Datadog evaluates each deploy and assigns `change_failure ∈ {true, false, null}`.
+- `null` means "still inside Datadog's evaluation window, no verdict yet".
+- The CFR denominator is `count(change_failure in {true, false})` — `null` is excluded.
+- Surfaced separately as `dora_deployments_pending_evaluation` so the dashboard can show *what we don't know yet*.
+
+MTTR has two flavors that come from different events:
+
+- **Per-deploy** (`dora_mttr_per_deploy_seconds_*`) — median/p90 of `recovery_time_sec` over failed deploys. Joins cleanly back to commits via `commit_sha`; this is the per-deploy lens the AI-vs-human correlation card uses (slice 5).
+- **Per-incident** (`dora_mttr_per_incident_seconds_*`) — median/p90 of `time_to_restore_seconds` over failure events. Canonical DORA-reporting number.
+
+---
+
 ## Module → fields map
 
 | Module | Fields populated |
@@ -716,6 +755,7 @@ Confidence rules:
 | `analysis/pr_lifecycle.py` | `pr_merged_count`, `pr_median_time_to_merge_hours`, `pr_median_size_files`, `pr_median_size_lines`, `pr_review_rounds_median`, `pr_single_pass_rate` |
 | `analysis/flow_load.py` | `flow_load` |
 | `analysis/flow_efficiency.py` | `flow_efficiency_median`, `median_time_to_first_review_hours`, `time_in_phase_median_hours`, `flow_efficiency_by_intent`, `flow_efficiency_by_origin` |
+| `analysis/dora_real.py` | `dora_source`, `dora_deployments_total`, `dora_deployments_failed`, `dora_deployments_pending_evaluation`, `dora_incidents_total`, `dora_cfr`, `dora_mttr_per_deploy_seconds_median`, `dora_mttr_per_deploy_seconds_p90`, `dora_mttr_per_incident_seconds_median`, `dora_mttr_per_incident_seconds_p90`, `dora_rollback_rate`, `dora_rollbacks_total`, `dora_lead_time_seconds_median`, `dora_deploy_frequency_per_day`, `dora_remediation_distribution` |
 | `analysis/duplicate_detector.py` | `duplicate_block_rate`, `duplicate_block_count`, `duplicate_median_block_size`, `duplicate_by_origin`, `duplicate_by_tool` |
 | `analysis/move_detector.py` | `moved_code_pct`, `refactoring_ratio`, `move_by_origin` |
 | `analysis/code_provenance.py` | `revision_age_distribution`, `pct_revising_new_code`, `pct_revising_mature_code`, `provenance_by_origin` |

diff --git a/iris/analysis/dora_real.py b/iris/analysis/dora_real.py
@@ -0,0 +1,190 @@
+"""DORA metrics computed from Datadog-derived external events.
+
+The engine consumes pre-fetched events (see ``iris.models.external``);
+Datadog talks to nobody here. Treats Datadog's tri-state
+``change_failure`` correctly — ``None`` deployments are excluded from
+the CFR denominator and surfaced as a "pending evaluation" bucket so
+the dashboard can show *what we don't know yet* alongside what we do.
+
+Metrics produced (see :class:`DORARealResult`):
+
+- **CFR** — failed deploys / evaluated deploys (excludes pending).
+- **MTTR per-deploy** — median/p50/p90 of ``recovery_time_sec`` over
+  failed deploys. The per-deploy lens is what powers the AI-vs-human
+  correlation in slice 5 because it joins cleanly through
+  ``commit_sha``.
+- **MTTR per-incident** — median/p50/p90 of ``time_to_restore_seconds``
+  over failure events. The canonical DORA-reporting number.
+- **Rollback rate** — free byproduct of ``remediation.type``; the
+  fraction of failed deploys that ended in a rollback.
+- **Lead time** — median of ``commits[].change_lead_time`` across every
+  commit on every deploy.
+- **Deploy frequency** — deploys per day when a window is provided.
+"""
+
+from dataclasses import dataclass, field
+from statistics import median
+
+from iris.models.external import (
+    ExternalDeployment,
+    ExternalDORAData,
+    ExternalIncident,
+)
+
+
+@dataclass(frozen=True)
+class DORAPercentile:
+    p50: float
+    p90: float
+
+
+@dataclass(frozen=True)
+class DORARealResult:
+    """Datadog-derived DORA metrics for the analysis window."""
+
+    source: str  # "datadog"
+    deployments_total: int
+    deployments_failed: int
+    deployments_pending_evaluation: int
+    incidents_total: int
+
+    # CFR is None when no deployments have been evaluated (everything is pending).
+    cfr: float | None = None
+
+    # Seconds. None when no failed deploys carry recovery_time_sec.
+    mttr_per_deploy_seconds_median: float | None = None
+    mttr_per_deploy_seconds_p90: float | None = None
+
+    # Seconds. None when no incidents carry time_to_restore_seconds.
+    mttr_per_incident_seconds_median: float | None = None
+    mttr_per_incident_seconds_p90: float | None = None
+
+    # Rollback rate is None when no failed deploys exist.
+    rollback_rate: float | None = None
+    rollbacks_total: int = 0
+
+    # Lead time over commits[].change_lead_time. None when no commits carry it.
+    lead_time_seconds_median: float | None = None
+
+    # Deploys per calendar day across the window. None when the caller didn't
+    # supply window timestamps.
+    deploy_frequency_per_day: float | None = None
+
+    # Per-remediation breakdown (e.g. {"rollback": 12, "hotfix": 3}).
+    remediation_distribution: dict[str, int] = field(default_factory=dict)
+
+
+def analyze_dora_real(data: ExternalDORAData) -> DORARealResult:
+    """Compute DORA metrics from the pre-fetched external events.
+
+    Empty input is supported — every metric defaults to ``None`` / 0
+    so the aggregator can wire the result unconditionally.
+    """
+    deploys = list(data.deployments)
+    incidents = list(data.incidents)
+
+    failed = [d for d in deploys if d.change_failure is True]
+    pending = [d for d in deploys if d.change_failure is None]
+    evaluated = [d for d in deploys if d.change_failure is not None]
+
+    cfr = (len(failed) / len(evaluated)) if evaluated else None
+
+    deploy_recoveries = [
+        d.recovery_time_sec for d in failed if d.recovery_time_sec is not None
+    ]
+    mttr_deploy = _percentiles(deploy_recoveries)
+
+    incident_restores = [
+        i.time_to_restore_seconds
+        for i in incidents
+        if i.time_to_restore_seconds is not None
+    ]
+    mttr_incident = _percentiles(incident_restores)
+
+    rollbacks = [d for d in failed if d.remediation_type == "rollback"]
+    rollback_rate = (len(rollbacks) / len(failed)) if failed else None
+
+    remediation_distribution: dict[str, int] = {}
+    for d in failed:
+        key = d.remediation_type or "unknown"
+        remediation_distribution[key] = remediation_distribution.get(key, 0) + 1
+
+    lead_times = [
+        c.change_lead_time
+        for d in deploys
+        for c in d.commits
+        if c.change_lead_time is not None
+    ]
+    lead_time_median = median(lead_times) if lead_times else None
+
+    deploy_freq = _deploy_frequency_per_day(
+        len(deploys), data.window_from, data.window_to
+    )
+
+    return DORARealResult(
+        source=data.source,
+        deployments_total=len(deploys),
+        deployments_failed=len(failed),
+        deployments_pending_evaluation=len(pending),
+        incidents_total=len(incidents),
+        cfr=cfr,
+        mttr_per_deploy_seconds_median=mttr_deploy.p50 if mttr_deploy else None,
+        mttr_per_deploy_seconds_p90=mttr_deploy.p90 if mttr_deploy else None,
+        mttr_per_incident_seconds_median=mttr_incident.p50
+        if mttr_incident
+        else None,
+        mttr_per_incident_seconds_p90=mttr_incident.p90 if mttr_incident else None,
+        rollback_rate=rollback_rate,
+        rollbacks_total=len(rollbacks),
+        lead_time_seconds_median=float(lead_time_median)
+        if lead_time_median is not None
+        else None,
+        deploy_frequency_per_day=deploy_freq,
+        remediation_distribution=remediation_distribution,
+    )
+
+
+def _percentiles(values: list[int]) -> DORAPercentile | None:
+    if not values:
+        return None
+    sorted_values = sorted(values)
+    p50 = float(median(sorted_values))
+    p90 = float(_percentile(sorted_values, 0.9))
+    return DORAPercentile(p50=p50, p90=p90)
+
+
+def _percentile(sorted_values: list[int], q: float) -> float:
+    if not sorted_values:
+        raise ValueError("empty input")
+    if len(sorted_values) == 1:
+        return float(sorted_values[0])
+    # Nearest-rank percentile (matches what pandas does with method="nearest").
+    rank = max(1, int(round(q * len(sorted_values))))
+    rank = min(rank, len(sorted_values))
+    return float(sorted_values[rank - 1])
+
+
+def _deploy_frequency_per_day(
+    deploy_count: int,
+    window_from,
+    window_to,
+) -> float | None:
+    if window_from is None or window_to is None:
+        return None
+    span_seconds = (window_to - window_from).total_seconds()
+    if span_seconds <= 0:
+        return None
+    days = span_seconds / 86400.0
+    return round(deploy_count / days, 4)
+
+
+__all__ = [
+    "DORARealResult",
+    "DORAPercentile",
+    "analyze_dora_real",
+]
+
+
+# These names are re-exported for tests that don't want to peek at private helpers.
+_TESTING_PERCENTILES = _percentiles
+_TESTING_DEPLOY_FREQ = _deploy_frequency_per_day
diff --git a/iris/cli.py b/iris/cli.py
@@ -179,6 +179,35 @@ def _merge_quality_metrics(metrics, dup_result, move_result, ops_result, provena
     return ReportMetrics(**{k: v for k, v in d.items()})
 
 
+def _fetch_external_dora_if_available(commits, days: int):
+    """Pull DORA events from the platform when the user is logged in.
+
+    Best-effort: any failure (no auth, no integration, network error)
+    falls through with None so the analysis still ships without the
+    dora_* metrics. The window matches the commit-analysis window so
+    deploys can be joined to commits by commit_sha downstream.
+    """
+    try:
+        from iris.platform.config import get_auth
+        from iris.ingestion.external_reader import fetch_external_dora
+    except ImportError:
+        return None
+
+    auth = get_auth()
+    if not auth:
+        return None
+    server_url, token = auth
+
+    now = datetime.now(timezone.utc)
+    window_from = now - timedelta(days=days)
+    return fetch_external_dora(
+        server_url=server_url,
+        token=token,
+        window_from=window_from,
+        window_to=now,
+    )
+
+
 def _is_git_repo(path: str) -> bool:
     """Check if path contains a .git directory."""
     return os.path.isdir(os.path.join(path, ".git"))
@@ -385,9 +414,20 @@ def _tick(label: str) -> None:
     # Step 3: Classify commits by intent
     print(s["cli_classifying"], end=" ", flush=True)
 
+    # Optional: pull DORA events from a connected platform integration
+    # (Datadog) so the aggregator can populate the dora_* metrics.
+    external_data = None
+    if will_push:
+        external_data = _fetch_external_dora_if_available(commits, args.days)
+
     # Step 4: Analyze and aggregate metrics
     with span("analysis.aggregate", {"repo": repo_name, "commits": len(commits)}):
-        metrics = aggregate(commits, churn_days=args.churn_days, prs=prs or None)
+        metrics = aggregate(
+            commits,
+            churn_days=args.churn_days,
+            prs=prs or None,
+            external_data=external_data,
+        )
     print(s["cli_classified"].format(count=len(commits)))
     _tick("aggregate analysis done")
 

diff --git a/iris/i18n.py b/iris/i18n.py
@@ -173,6 +173,24 @@
         "on new work; revisit alongside merged-feature throughput."
     ),
 
+    # DORA (real) findings — populated only when external integration data is present
+    "finding_dora_cfr_descriptive": (
+        "Change Failure Rate (Datadog): {cfr_pct} "
+        "({failed} failed / {evaluated} evaluated deployments)."
+    ),
+    "finding_dora_cfr_all_pending": (
+        "{pending} deployments are still pending Datadog change-failure "
+        "evaluation — CFR will firm up once they settle."
+    ),
+    "finding_dora_mttr_descriptive": (
+        "Median time to recovery (per failed deploy): {hours:.1f}h "
+        "across {failed} failed deployments."
+    ),
+    "finding_dora_rollback_rate": (
+        "{pct} of failed deployments were resolved via rollback "
+        "({rollbacks} of the failed deploys in the window)."
+    ),
+
     # PR findings
     "finding_pr_volume": (
         "{count} PRs merged in this period with a "
@@ -954,6 +972,24 @@
         "trabalho novo; cruze com o throughput de features merged."
     ),
 
+    # Descobertas DORA (real) — populadas apenas quando há integração externa ativa
+    "finding_dora_cfr_descriptive": (
+        "Change Failure Rate (Datadog): {cfr_pct} "
+        "({failed} falhas em {evaluated} deploys avaliados)."
+    ),
+    "finding_dora_cfr_all_pending": (
+        "{pending} deploys ainda estão pendentes de avaliação de falha pelo "
+        "Datadog — o CFR vai se consolidar quando eles forem classificados."
+    ),
+    "finding_dora_mttr_descriptive": (
+        "Mediana do tempo de recuperação (por deploy com falha): {hours:.1f}h "
+        "em {failed} deploys com falha."
+    ),
+    "finding_dora_rollback_rate": (
+        "{pct} dos deploys com falha foram resolvidos via rollback "
+        "({rollbacks} dos deploys com falha na janela)."
+    ),
+
     # Descobertas de PR
     "finding_pr_volume": (
         "{count} PRs merged neste período com "