From b58823e66bdebfeab154d84774e35c40c72ac394 Mon Sep 17 00:00:00 2001 From: Fabio Wakim Trentini Date: Wed, 13 May 2026 17:22:51 -0300 Subject: [PATCH] =?UTF-8?q?feat(metric):=20DORA=20(real)=20=E2=80=94=20Dat?= =?UTF-8?q?adog-derived=20CFR=20+=20MTTR=20+=20rollback=20rate=20(#15)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slice 4 of the Datadog integration. Wires the engine to consume the external events ingested in slice 3 and emit a new dora_* metric family on every analysis run that has an active integration. Engine (Python): - iris/models/external.py — dataclasses for the pre-fetched payload (ExternalDeployment / ExternalDeploymentCommit / ExternalIncident / ExternalDORAData) the aggregator consumes. - iris/analysis/dora_real.py — computes CFR, per-deploy MTTR (p50/p90), per-incident MTTR (p50/p90), rollback rate, lead time, deploy frequency, and a remediation_type distribution. Tri-state change_failure is handled correctly: null deploys are excluded from the CFR denominator and surfaced as a separate "pending" bucket. - iris/metrics/aggregator.py — new optional external_data param that routes through dora_real and merges the result into ReportMetrics. - iris/models/metrics.py — adds the fifteen dora_* fields. - iris/reports/narrative.py + iris/i18n.py — descriptive findings (CFR, MTTR per-deploy, rollback rate; en + pt-br copy) emitted when the metric is populated. - iris/ingestion/external_reader.py + iris/cli.py — opportunistic fetch from the platform when the CLI is logged in. Any failure (no auth, no integration, network, malformed payload) falls through with None, so standalone `iris .` runs are unaffected. Platform: - src/app/api/integrations/datadog/events/route.ts — new GET endpoint scoped by api token. Returns deployments (with their commits) and incidents for the org in the requested window. Distinguishes "no active integration" (source: null, empty arrays) from "no events in window" (source: "datadog", empty arrays). - src/types/metrics.ts — mirrors the new ReportMetrics fields. Docs/tests: - docs/METRICS.md — full entries for the dora_* family with the tri-state semantics and the dual-MTTR (per-deploy vs per-incident) story. - tests/test_dora_real.py — 10 tests covering tri-state CFR, MTTR per deploy/incident, rollback rate (incl. null when no failures), lead-time aggregation, and deploy-frequency windowing. Out of scope (slice 5): dashboard surfacing, "CFR by code origin" correlation card, and the rollback-by-origin breakdown — those are platform-side joins of external_deployment_commits against commit_origin and don't need the engine. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/METRICS.md | 40 ++++ iris/analysis/dora_real.py | 190 +++++++++++++++ iris/cli.py | 42 +++- iris/i18n.py | 36 +++ iris/ingestion/external_reader.py | 180 ++++++++++++++ iris/metrics/aggregator.py | 32 +++ iris/models/external.py | 86 +++++++ iris/models/metrics.py | 19 ++ iris/reports/narrative.py | 53 +++++ .../api/integrations/datadog/events/route.ts | 221 ++++++++++++++++++ platform/src/types/metrics.ts | 19 ++ tests/test_dora_real.py | 191 +++++++++++++++ 12 files changed, 1108 insertions(+), 1 deletion(-) create mode 100644 iris/analysis/dora_real.py create mode 100644 iris/ingestion/external_reader.py create mode 100644 iris/models/external.py create mode 100644 platform/src/app/api/integrations/datadog/events/route.ts create mode 100644 tests/test_dora_real.py diff --git a/docs/METRICS.md b/docs/METRICS.md index ef31c47..b624f7a 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -695,6 +695,45 @@ Confidence rules: --- +## DORA (real) — Datadog-derived + +Populated when the org has an active Datadog integration and the CLI +fetched events from `GET /api/integrations/datadog/events` for the +analysis window. When the integration is absent or returns zero events, +every `dora_*` field is null and the report's DORA section is empty. + +| Field | Unit | Source | Nullable when | +|---|---|---|---| +| `dora_source` | `"datadog"` | `analysis/dora_real.py` | no active integration / no events fetched | +| `dora_deployments_total` | int ≥ 0 | same | same | +| `dora_deployments_failed` | int ≥ 0 | same | same | +| `dora_deployments_pending_evaluation` | int ≥ 0 | same | same | +| `dora_incidents_total` | int ≥ 0 | same | same | +| `dora_cfr` | float `0.0–1.0` | same | every deploy in the window is `change_failure=null` (still pending Datadog evaluation) | +| `dora_mttr_per_deploy_seconds_median` | seconds | same | no failed deploy carries `recovery_time_sec` | +| `dora_mttr_per_deploy_seconds_p90` | seconds | same | same | +| `dora_mttr_per_incident_seconds_median` | seconds | same | no incident carries `time_to_restore_seconds` | +| `dora_mttr_per_incident_seconds_p90` | seconds | same | same | +| `dora_rollback_rate` | float `0.0–1.0` | same | no failed deploys in the window | +| `dora_rollbacks_total` | int ≥ 0 | same | (always set when `dora_source` is) | +| `dora_lead_time_seconds_median` | seconds | same | no deploy carries `commits[].change_lead_time` | +| `dora_deploy_frequency_per_day` | float ≥ 0 | same | `ExternalDORAData.window_from`/`window_to` not provided | +| `dora_remediation_distribution` | `Record` | same | no failed deploys in the window | + +Tri-state `change_failure`: + +- Datadog evaluates each deploy and assigns `change_failure ∈ {true, false, null}`. +- `null` means "still inside Datadog's evaluation window, no verdict yet". +- The CFR denominator is `count(change_failure in {true, false})` — `null` is excluded. +- Surfaced separately as `dora_deployments_pending_evaluation` so the dashboard can show *what we don't know yet*. + +MTTR has two flavors that come from different events: + +- **Per-deploy** (`dora_mttr_per_deploy_seconds_*`) — median/p90 of `recovery_time_sec` over failed deploys. Joins cleanly back to commits via `commit_sha`; this is the per-deploy lens the AI-vs-human correlation card uses (slice 5). +- **Per-incident** (`dora_mttr_per_incident_seconds_*`) — median/p90 of `time_to_restore_seconds` over failure events. Canonical DORA-reporting number. + +--- + ## Module → fields map | Module | Fields populated | @@ -716,6 +755,7 @@ Confidence rules: | `analysis/pr_lifecycle.py` | `pr_merged_count`, `pr_median_time_to_merge_hours`, `pr_median_size_files`, `pr_median_size_lines`, `pr_review_rounds_median`, `pr_single_pass_rate` | | `analysis/flow_load.py` | `flow_load` | | `analysis/flow_efficiency.py` | `flow_efficiency_median`, `median_time_to_first_review_hours`, `time_in_phase_median_hours`, `flow_efficiency_by_intent`, `flow_efficiency_by_origin` | +| `analysis/dora_real.py` | `dora_source`, `dora_deployments_total`, `dora_deployments_failed`, `dora_deployments_pending_evaluation`, `dora_incidents_total`, `dora_cfr`, `dora_mttr_per_deploy_seconds_median`, `dora_mttr_per_deploy_seconds_p90`, `dora_mttr_per_incident_seconds_median`, `dora_mttr_per_incident_seconds_p90`, `dora_rollback_rate`, `dora_rollbacks_total`, `dora_lead_time_seconds_median`, `dora_deploy_frequency_per_day`, `dora_remediation_distribution` | | `analysis/duplicate_detector.py` | `duplicate_block_rate`, `duplicate_block_count`, `duplicate_median_block_size`, `duplicate_by_origin`, `duplicate_by_tool` | | `analysis/move_detector.py` | `moved_code_pct`, `refactoring_ratio`, `move_by_origin` | | `analysis/code_provenance.py` | `revision_age_distribution`, `pct_revising_new_code`, `pct_revising_mature_code`, `provenance_by_origin` | diff --git a/iris/analysis/dora_real.py b/iris/analysis/dora_real.py new file mode 100644 index 0000000..fd9da0e --- /dev/null +++ b/iris/analysis/dora_real.py @@ -0,0 +1,190 @@ +"""DORA metrics computed from Datadog-derived external events. + +The engine consumes pre-fetched events (see ``iris.models.external``); +Datadog talks to nobody here. Treats Datadog's tri-state +``change_failure`` correctly — ``None`` deployments are excluded from +the CFR denominator and surfaced as a "pending evaluation" bucket so +the dashboard can show *what we don't know yet* alongside what we do. + +Metrics produced (see :class:`DORARealResult`): + +- **CFR** — failed deploys / evaluated deploys (excludes pending). +- **MTTR per-deploy** — median/p50/p90 of ``recovery_time_sec`` over + failed deploys. The per-deploy lens is what powers the AI-vs-human + correlation in slice 5 because it joins cleanly through + ``commit_sha``. +- **MTTR per-incident** — median/p50/p90 of ``time_to_restore_seconds`` + over failure events. The canonical DORA-reporting number. +- **Rollback rate** — free byproduct of ``remediation.type``; the + fraction of failed deploys that ended in a rollback. +- **Lead time** — median of ``commits[].change_lead_time`` across every + commit on every deploy. +- **Deploy frequency** — deploys per day when a window is provided. +""" + +from dataclasses import dataclass, field +from statistics import median + +from iris.models.external import ( + ExternalDeployment, + ExternalDORAData, + ExternalIncident, +) + + +@dataclass(frozen=True) +class DORAPercentile: + p50: float + p90: float + + +@dataclass(frozen=True) +class DORARealResult: + """Datadog-derived DORA metrics for the analysis window.""" + + source: str # "datadog" + deployments_total: int + deployments_failed: int + deployments_pending_evaluation: int + incidents_total: int + + # CFR is None when no deployments have been evaluated (everything is pending). + cfr: float | None = None + + # Seconds. None when no failed deploys carry recovery_time_sec. + mttr_per_deploy_seconds_median: float | None = None + mttr_per_deploy_seconds_p90: float | None = None + + # Seconds. None when no incidents carry time_to_restore_seconds. + mttr_per_incident_seconds_median: float | None = None + mttr_per_incident_seconds_p90: float | None = None + + # Rollback rate is None when no failed deploys exist. + rollback_rate: float | None = None + rollbacks_total: int = 0 + + # Lead time over commits[].change_lead_time. None when no commits carry it. + lead_time_seconds_median: float | None = None + + # Deploys per calendar day across the window. None when the caller didn't + # supply window timestamps. + deploy_frequency_per_day: float | None = None + + # Per-remediation breakdown (e.g. {"rollback": 12, "hotfix": 3}). + remediation_distribution: dict[str, int] = field(default_factory=dict) + + +def analyze_dora_real(data: ExternalDORAData) -> DORARealResult: + """Compute DORA metrics from the pre-fetched external events. + + Empty input is supported — every metric defaults to ``None`` / 0 + so the aggregator can wire the result unconditionally. + """ + deploys = list(data.deployments) + incidents = list(data.incidents) + + failed = [d for d in deploys if d.change_failure is True] + pending = [d for d in deploys if d.change_failure is None] + evaluated = [d for d in deploys if d.change_failure is not None] + + cfr = (len(failed) / len(evaluated)) if evaluated else None + + deploy_recoveries = [ + d.recovery_time_sec for d in failed if d.recovery_time_sec is not None + ] + mttr_deploy = _percentiles(deploy_recoveries) + + incident_restores = [ + i.time_to_restore_seconds + for i in incidents + if i.time_to_restore_seconds is not None + ] + mttr_incident = _percentiles(incident_restores) + + rollbacks = [d for d in failed if d.remediation_type == "rollback"] + rollback_rate = (len(rollbacks) / len(failed)) if failed else None + + remediation_distribution: dict[str, int] = {} + for d in failed: + key = d.remediation_type or "unknown" + remediation_distribution[key] = remediation_distribution.get(key, 0) + 1 + + lead_times = [ + c.change_lead_time + for d in deploys + for c in d.commits + if c.change_lead_time is not None + ] + lead_time_median = median(lead_times) if lead_times else None + + deploy_freq = _deploy_frequency_per_day( + len(deploys), data.window_from, data.window_to + ) + + return DORARealResult( + source=data.source, + deployments_total=len(deploys), + deployments_failed=len(failed), + deployments_pending_evaluation=len(pending), + incidents_total=len(incidents), + cfr=cfr, + mttr_per_deploy_seconds_median=mttr_deploy.p50 if mttr_deploy else None, + mttr_per_deploy_seconds_p90=mttr_deploy.p90 if mttr_deploy else None, + mttr_per_incident_seconds_median=mttr_incident.p50 + if mttr_incident + else None, + mttr_per_incident_seconds_p90=mttr_incident.p90 if mttr_incident else None, + rollback_rate=rollback_rate, + rollbacks_total=len(rollbacks), + lead_time_seconds_median=float(lead_time_median) + if lead_time_median is not None + else None, + deploy_frequency_per_day=deploy_freq, + remediation_distribution=remediation_distribution, + ) + + +def _percentiles(values: list[int]) -> DORAPercentile | None: + if not values: + return None + sorted_values = sorted(values) + p50 = float(median(sorted_values)) + p90 = float(_percentile(sorted_values, 0.9)) + return DORAPercentile(p50=p50, p90=p90) + + +def _percentile(sorted_values: list[int], q: float) -> float: + if not sorted_values: + raise ValueError("empty input") + if len(sorted_values) == 1: + return float(sorted_values[0]) + # Nearest-rank percentile (matches what pandas does with method="nearest"). + rank = max(1, int(round(q * len(sorted_values)))) + rank = min(rank, len(sorted_values)) + return float(sorted_values[rank - 1]) + + +def _deploy_frequency_per_day( + deploy_count: int, + window_from, + window_to, +) -> float | None: + if window_from is None or window_to is None: + return None + span_seconds = (window_to - window_from).total_seconds() + if span_seconds <= 0: + return None + days = span_seconds / 86400.0 + return round(deploy_count / days, 4) + + +__all__ = [ + "DORARealResult", + "DORAPercentile", + "analyze_dora_real", +] + + +# These names are re-exported for tests that don't want to peek at private helpers. +_TESTING_PERCENTILES = _percentiles +_TESTING_DEPLOY_FREQ = _deploy_frequency_per_day diff --git a/iris/cli.py b/iris/cli.py index cfb2ed7..5b02d32 100644 --- a/iris/cli.py +++ b/iris/cli.py @@ -179,6 +179,35 @@ def _merge_quality_metrics(metrics, dup_result, move_result, ops_result, provena return ReportMetrics(**{k: v for k, v in d.items()}) +def _fetch_external_dora_if_available(commits, days: int): + """Pull DORA events from the platform when the user is logged in. + + Best-effort: any failure (no auth, no integration, network error) + falls through with None so the analysis still ships without the + dora_* metrics. The window matches the commit-analysis window so + deploys can be joined to commits by commit_sha downstream. + """ + try: + from iris.platform.config import get_auth + from iris.ingestion.external_reader import fetch_external_dora + except ImportError: + return None + + auth = get_auth() + if not auth: + return None + server_url, token = auth + + now = datetime.now(timezone.utc) + window_from = now - timedelta(days=days) + return fetch_external_dora( + server_url=server_url, + token=token, + window_from=window_from, + window_to=now, + ) + + def _is_git_repo(path: str) -> bool: """Check if path contains a .git directory.""" return os.path.isdir(os.path.join(path, ".git")) @@ -385,9 +414,20 @@ def _tick(label: str) -> None: # Step 3: Classify commits by intent print(s["cli_classifying"], end=" ", flush=True) + # Optional: pull DORA events from a connected platform integration + # (Datadog) so the aggregator can populate the dora_* metrics. + external_data = None + if will_push: + external_data = _fetch_external_dora_if_available(commits, args.days) + # Step 4: Analyze and aggregate metrics with span("analysis.aggregate", {"repo": repo_name, "commits": len(commits)}): - metrics = aggregate(commits, churn_days=args.churn_days, prs=prs or None) + metrics = aggregate( + commits, + churn_days=args.churn_days, + prs=prs or None, + external_data=external_data, + ) print(s["cli_classified"].format(count=len(commits))) _tick("aggregate analysis done") diff --git a/iris/i18n.py b/iris/i18n.py index eb5d131..5067c56 100644 --- a/iris/i18n.py +++ b/iris/i18n.py @@ -173,6 +173,24 @@ "on new work; revisit alongside merged-feature throughput." ), + # DORA (real) findings — populated only when external integration data is present + "finding_dora_cfr_descriptive": ( + "Change Failure Rate (Datadog): {cfr_pct} " + "({failed} failed / {evaluated} evaluated deployments)." + ), + "finding_dora_cfr_all_pending": ( + "{pending} deployments are still pending Datadog change-failure " + "evaluation — CFR will firm up once they settle." + ), + "finding_dora_mttr_descriptive": ( + "Median time to recovery (per failed deploy): {hours:.1f}h " + "across {failed} failed deployments." + ), + "finding_dora_rollback_rate": ( + "{pct} of failed deployments were resolved via rollback " + "({rollbacks} of the failed deploys in the window)." + ), + # PR findings "finding_pr_volume": ( "{count} PRs merged in this period with a " @@ -954,6 +972,24 @@ "trabalho novo; cruze com o throughput de features merged." ), + # Descobertas DORA (real) — populadas apenas quando há integração externa ativa + "finding_dora_cfr_descriptive": ( + "Change Failure Rate (Datadog): {cfr_pct} " + "({failed} falhas em {evaluated} deploys avaliados)." + ), + "finding_dora_cfr_all_pending": ( + "{pending} deploys ainda estão pendentes de avaliação de falha pelo " + "Datadog — o CFR vai se consolidar quando eles forem classificados." + ), + "finding_dora_mttr_descriptive": ( + "Mediana do tempo de recuperação (por deploy com falha): {hours:.1f}h " + "em {failed} deploys com falha." + ), + "finding_dora_rollback_rate": ( + "{pct} dos deploys com falha foram resolvidos via rollback " + "({rollbacks} dos deploys com falha na janela)." + ), + # Descobertas de PR "finding_pr_volume": ( "{count} PRs merged neste período com " diff --git a/iris/ingestion/external_reader.py b/iris/ingestion/external_reader.py new file mode 100644 index 0000000..fe3a578 --- /dev/null +++ b/iris/ingestion/external_reader.py @@ -0,0 +1,180 @@ +"""Fetch external DORA events from the Iris platform. + +The engine never talks to Datadog directly. When the CLI is logged in to +a platform and the org has an active Datadog integration, this module +pulls pre-synced events from ``GET /api/integrations/datadog/events`` +and hands back an :class:`ExternalDORAData` for the aggregator. + +The fetch is opportunistic: on any failure (no auth, no integration, +network error, malformed payload) the CLI logs a warning and falls +through with ``None``. The analysis still ships, the DORA section in +the report just stays empty. +""" + +from __future__ import annotations + +import json +import logging +import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timezone + +from iris.models.external import ( + ExternalDeployment, + ExternalDeploymentCommit, + ExternalDORAData, + ExternalIncident, +) + + +logger = logging.getLogger(__name__) + + +def fetch_external_dora( + server_url: str, + token: str, + window_from: datetime, + window_to: datetime, + repository_id: str | None = None, + timeout_seconds: float = 30.0, +) -> ExternalDORAData | None: + """Fetch DORA events from the platform. + + Returns ``None`` on any failure or when the org has no active + Datadog integration. The caller should pass ``None`` to the + aggregator in that case. + """ + params = { + "from": _isoformat_utc(window_from), + "to": _isoformat_utc(window_to), + } + if repository_id: + params["repository_id"] = repository_id + + url = ( + f"{server_url.rstrip('/')}/api/integrations/datadog/events" + f"?{urllib.parse.urlencode(params)}" + ) + + req = urllib.request.Request( + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/json", + }, + method="GET", + ) + + try: + with urllib.request.urlopen(req, timeout=timeout_seconds) as resp: + payload = json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + logger.warning( + "External DORA fetch returned HTTP %s; skipping. Body: %s", + e.code, + body[:200], + ) + return None + except (urllib.error.URLError, json.JSONDecodeError, TimeoutError) as e: + logger.warning("External DORA fetch failed: %s; skipping.", e) + return None + + if not isinstance(payload, dict): + logger.warning("External DORA response was not an object; skipping.") + return None + + source = payload.get("source") + if source != "datadog": + # No active integration — return None so the aggregator skips + # the DORA fields entirely. + return None + + deployments = [ + _deployment_from_payload(d) + for d in payload.get("deployments", []) + if isinstance(d, dict) + ] + incidents = [ + _incident_from_payload(i) + for i in payload.get("incidents", []) + if isinstance(i, dict) + ] + + return ExternalDORAData( + deployments=deployments, + incidents=incidents, + source="datadog", + window_from=window_from, + window_to=window_to, + ) + + +def _deployment_from_payload(p: dict) -> ExternalDeployment: + commits = [ + ExternalDeploymentCommit( + commit_sha=c.get("commit_sha", ""), + commit_timestamp=_parse_iso(c.get("commit_timestamp")), + author_email=c.get("author_email"), + author_canonical_email=c.get("author_canonical_email"), + is_bot=c.get("is_bot"), + change_lead_time=c.get("change_lead_time"), + time_to_deploy=c.get("time_to_deploy"), + ) + for c in p.get("commits") or [] + if isinstance(c, dict) and c.get("commit_sha") + ] + return ExternalDeployment( + provider_event_id=p.get("provider_event_id", ""), + started_at=_parse_iso(p.get("started_at")) or datetime.fromtimestamp(0, tz=timezone.utc), + finished_at=_parse_iso(p.get("finished_at")), + service=p.get("service"), + env=p.get("env"), + team=p.get("team"), + version=p.get("version"), + commit_sha=p.get("commit_sha"), + change_failure=p.get("change_failure"), + recovery_time_sec=p.get("recovery_time_sec"), + remediation_type=p.get("remediation_type"), + deployment_type=p.get("deployment_type"), + source=p.get("source"), + duration_seconds=p.get("duration_seconds"), + number_of_commits=p.get("number_of_commits"), + number_of_pull_requests=p.get("number_of_pull_requests"), + commits=commits, + dd_repository_id=p.get("dd_repository_id"), + repository_matched=p.get("repository_id") is not None, + ) + + +def _incident_from_payload(p: dict) -> ExternalIncident: + return ExternalIncident( + provider_event_id=p.get("provider_event_id", ""), + started_at=_parse_iso(p.get("started_at")) or datetime.fromtimestamp(0, tz=timezone.utc), + finished_at=_parse_iso(p.get("finished_at")), + name=p.get("name"), + severity=p.get("severity"), + time_to_restore_seconds=p.get("time_to_restore_seconds"), + services=tuple(p.get("service") or ()), + envs=tuple(p.get("env") or ()), + teams=tuple(p.get("team") or ()), + source=p.get("source"), + ) + + +def _isoformat_utc(d: datetime) -> str: + if d.tzinfo is None: + d = d.replace(tzinfo=timezone.utc) + return d.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +def _parse_iso(s: str | None) -> datetime | None: + if not s: + return None + try: + # fromisoformat accepts trailing "Z" only from Python 3.11+; we + # support 3.13 globally but stay defensive. + return datetime.fromisoformat(s.replace("Z", "+00:00")) + except (ValueError, AttributeError): + return None diff --git a/iris/metrics/aggregator.py b/iris/metrics/aggregator.py index 963d706..f1da791 100644 --- a/iris/metrics/aggregator.py +++ b/iris/metrics/aggregator.py @@ -14,6 +14,7 @@ from iris.analysis.churn_detail import calculate_churn_detail, render_chain from iris.analysis.churn_calculator import calculate_churn from iris.analysis.commit_shape import analyze_commit_shapes +from iris.analysis.dora_real import analyze_dora_real from iris.analysis.fix_latency import calculate_fix_latency from iris.analysis.flow_efficiency import analyze_flow_efficiency from iris.analysis.flow_load import analyze_flow_load @@ -35,6 +36,7 @@ from iris.analysis.revert_detector import detect_reverts from iris.metrics.stabilization import calculate_stabilization from iris.models.commit import Commit +from iris.models.external import ExternalDORAData from iris.models.metrics import ReportMetrics from iris.models.pull_request import PullRequest @@ -43,6 +45,7 @@ def aggregate( commits: list[Commit], churn_days: int, prs: list[PullRequest] | None = None, + external_data: ExternalDORAData | None = None, ) -> ReportMetrics: """Run all analyses on commits and return the combined ReportMetrics. @@ -50,6 +53,11 @@ def aggregate( commits: Commits from git_reader (sorted by date ascending). churn_days: Churn/stabilization window in days. prs: Optional list of merged PRs from github_reader. + external_data: Optional pre-fetched DORA events (deployments + + incidents) from a connected provider — currently Datadog. When + provided, populates the ``dora_*`` fields on ReportMetrics; + when None, those fields stay None and the report renders the + DORA section as "not available". Returns: ReportMetrics with all fields populated. PR fields are None @@ -407,4 +415,28 @@ def aggregate( **pr_kwargs, **flow_load_kwargs, **flow_efficiency_kwargs, + **_dora_real_kwargs(external_data), ) + + +def _dora_real_kwargs(external_data: ExternalDORAData | None) -> dict: + if external_data is None: + return {} + result = analyze_dora_real(external_data) + return { + "dora_source": result.source, + "dora_deployments_total": result.deployments_total, + "dora_deployments_failed": result.deployments_failed, + "dora_deployments_pending_evaluation": result.deployments_pending_evaluation, + "dora_incidents_total": result.incidents_total, + "dora_cfr": result.cfr, + "dora_mttr_per_deploy_seconds_median": result.mttr_per_deploy_seconds_median, + "dora_mttr_per_deploy_seconds_p90": result.mttr_per_deploy_seconds_p90, + "dora_mttr_per_incident_seconds_median": result.mttr_per_incident_seconds_median, + "dora_mttr_per_incident_seconds_p90": result.mttr_per_incident_seconds_p90, + "dora_rollback_rate": result.rollback_rate, + "dora_rollbacks_total": result.rollbacks_total, + "dora_lead_time_seconds_median": result.lead_time_seconds_median, + "dora_deploy_frequency_per_day": result.deploy_frequency_per_day, + "dora_remediation_distribution": result.remediation_distribution or None, + } diff --git a/iris/models/external.py b/iris/models/external.py new file mode 100644 index 0000000..e20890b --- /dev/null +++ b/iris/models/external.py @@ -0,0 +1,86 @@ +"""External provider data (Datadog DORA events) consumed by the engine. + +The engine never talks to Datadog directly. The platform ingests events +into Supabase (see `platform/lib/integrations/datadog/sync.ts`) and the +CLI fetches them via `GET /api/integrations/datadog/events` before +invoking the aggregator. These dataclasses are the in-memory shape the +analysis modules consume. +""" + +from dataclasses import dataclass, field +from datetime import datetime + + +@dataclass(frozen=True) +class ExternalDeploymentCommit: + """Per-commit detail unpacked from a Datadog deployment event.""" + + commit_sha: str + commit_timestamp: datetime | None = None + author_email: str | None = None + author_canonical_email: str | None = None + is_bot: bool | None = None + # Seconds. From `attributes.commits[].change_lead_time`. + change_lead_time: int | None = None + # Seconds. From `attributes.commits[].time_to_deploy`. + time_to_deploy: int | None = None + + +@dataclass(frozen=True) +class ExternalDeployment: + """A single DORA deployment event from Datadog (one row in `external_deployments`).""" + + provider_event_id: str + started_at: datetime + finished_at: datetime | None = None + service: str | None = None + env: str | None = None + team: str | None = None + version: str | None = None + commit_sha: str | None = None + # Tri-state: True | False | None (None = Datadog hasn't evaluated yet). + # CFR denominator must exclude None deployments. + change_failure: bool | None = None + # Present only when change_failure is True. + recovery_time_sec: int | None = None + remediation_type: str | None = None # "rollback" observed; "hotfix"/"forward_fix" documented + deployment_type: str | None = None + source: str | None = None + duration_seconds: int | None = None + number_of_commits: int | None = None + number_of_pull_requests: int | None = None + commits: list[ExternalDeploymentCommit] = field(default_factory=list) + dd_repository_id: str | None = None + repository_matched: bool = False + + +@dataclass(frozen=True) +class ExternalIncident: + """A single DORA failure event from Datadog (one row in `external_incidents`).""" + + provider_event_id: str + started_at: datetime + finished_at: datetime | None = None + name: str | None = None + severity: str | None = None + # Seconds. From `attributes.time_to_restore`. + time_to_restore_seconds: int | None = None + services: tuple[str, ...] = field(default_factory=tuple) + envs: tuple[str, ...] = field(default_factory=tuple) + teams: tuple[str, ...] = field(default_factory=tuple) + source: str | None = None + + +@dataclass(frozen=True) +class ExternalDORAData: + """Pre-fetched DORA events for the analysis window.""" + + deployments: list[ExternalDeployment] = field(default_factory=list) + incidents: list[ExternalIncident] = field(default_factory=list) + # Provider identifier ("datadog"); persisted on the metric as `dora_source`. + source: str = "datadog" + # The window the events were fetched against. The engine doesn't enforce + # it — the caller is responsible for fetching events that already match + # the analysis window. Stored here for traceability in the report. + window_from: datetime | None = None + window_to: datetime | None = None diff --git a/iris/models/metrics.py b/iris/models/metrics.py index c06486c..66bd88b 100644 --- a/iris/models/metrics.py +++ b/iris/models/metrics.py @@ -126,6 +126,25 @@ class ReportMetrics: time_in_phase_median_hours: dict[str, float] | None = None median_time_to_first_review_hours: float | None = None + # DORA (real) — populated only when external Datadog events were fetched + # for this run. None across the board when the org has no active Datadog + # integration. ``dora_source`` is "datadog" when populated, None otherwise. + dora_source: str | None = None + dora_deployments_total: int | None = None + dora_deployments_failed: int | None = None + dora_deployments_pending_evaluation: int | None = None + dora_incidents_total: int | None = None + dora_cfr: float | None = None + dora_mttr_per_deploy_seconds_median: float | None = None + dora_mttr_per_deploy_seconds_p90: float | None = None + dora_mttr_per_incident_seconds_median: float | None = None + dora_mttr_per_incident_seconds_p90: float | None = None + dora_rollback_rate: float | None = None + dora_rollbacks_total: int | None = None + dora_lead_time_seconds_median: float | None = None + dora_deploy_frequency_per_day: float | None = None + dora_remediation_distribution: dict[str, int] | None = None + def to_dict(self) -> dict: d = asdict(self) # Exclude None fields for backward compatibility diff --git a/iris/reports/narrative.py b/iris/reports/narrative.py index e3e8efd..ef801f1 100644 --- a/iris/reports/narrative.py +++ b/iris/reports/narrative.py @@ -162,6 +162,9 @@ def generate_key_findings(metrics: ReportMetrics, lang: str = "en") -> str: flow_findings = _flow_load_findings(metrics, s) findings.extend(flow_findings) + # DORA (real) — descriptive bullets when external integration delivers data + findings.extend(_dora_real_findings(metrics, s)) + # Volume context findings.append(s["finding_volume"].format( commits=metrics.commits_total, @@ -253,6 +256,56 @@ def _flow_load_findings(metrics: ReportMetrics, s: dict) -> list[str]: return findings +def _dora_real_findings(metrics: ReportMetrics, s: dict) -> list[str]: + """Build DORA (real) findings (0-3 bullets) when external integration data is present. + + Always descriptive — no thresholds yet, since 30 days of data on a new + integration isn't enough to calibrate "good" vs "bad" CFR/MTTR per + repo. The dashboard handles the visual story; narrative just states + the headline numbers so the report.md reader sees them. + """ + if metrics.dora_source is None or metrics.dora_deployments_total is None: + return [] + + findings: list[str] = [] + + if metrics.dora_cfr is not None: + findings.append( + s["finding_dora_cfr_descriptive"].format( + cfr_pct=f"{metrics.dora_cfr:.0%}", + failed=metrics.dora_deployments_failed or 0, + evaluated=( + (metrics.dora_deployments_total or 0) + - (metrics.dora_deployments_pending_evaluation or 0) + ), + ) + ) + elif metrics.dora_deployments_pending_evaluation: + findings.append( + s["finding_dora_cfr_all_pending"].format( + pending=metrics.dora_deployments_pending_evaluation, + ) + ) + + if metrics.dora_mttr_per_deploy_seconds_median is not None: + findings.append( + s["finding_dora_mttr_descriptive"].format( + hours=metrics.dora_mttr_per_deploy_seconds_median / 3600.0, + failed=metrics.dora_deployments_failed or 0, + ) + ) + + if metrics.dora_rollback_rate is not None and metrics.dora_rollback_rate > 0: + findings.append( + s["finding_dora_rollback_rate"].format( + pct=f"{metrics.dora_rollback_rate:.0%}", + rollbacks=metrics.dora_rollbacks_total or 0, + ) + ) + + return findings + + def generate_pr_findings(metrics: ReportMetrics, lang: str = "en") -> str: """Generate PR-specific findings (1-2 bullet points). diff --git a/platform/src/app/api/integrations/datadog/events/route.ts b/platform/src/app/api/integrations/datadog/events/route.ts new file mode 100644 index 0000000..9ce0f90 --- /dev/null +++ b/platform/src/app/api/integrations/datadog/events/route.ts @@ -0,0 +1,221 @@ +import { NextRequest, NextResponse } from "next/server"; + +import { logger } from "@/lib/debug"; +import { supabaseAdmin } from "@/lib/supabase"; +import { validateToken } from "@/lib/tokens"; + +export const maxDuration = 60; +export const dynamic = "force-dynamic"; + +/** + * Returns pre-fetched Datadog DORA events for the caller's organization, + * scoped to an analysis window. Consumed by the Iris CLI (Python engine) + * before invoking the aggregator — the engine never talks to Datadog + * directly. + * + * Auth: `Authorization: Bearer ` (same token shape as /api/ingest). + * + * Query params: + * - `from` ISO 8601 (required) — inclusive lower bound on `started_at`. + * - `to` ISO 8601 (required) — exclusive upper bound on `started_at`. + * - `repository_id` (optional) — when provided, deployments are scoped + * to that Iris repository_id. Incidents are always org-wide because + * Datadog failures don't carry repository attribution. + * + * The response shape mirrors `iris.models.external.ExternalDORAData` so + * the CLI can hydrate dataclasses directly. + */ +export async function GET(request: NextRequest) { + const authHeader = request.headers.get("authorization"); + if (!authHeader?.startsWith("Bearer ")) { + return NextResponse.json( + { error: "Missing or invalid Authorization header" }, + { status: 401 }, + ); + } + + const token = await validateToken(authHeader.slice(7)); + if (!token) { + return NextResponse.json( + { error: "Invalid or revoked token" }, + { status: 401 }, + ); + } + + const url = new URL(request.url); + const from = url.searchParams.get("from"); + const to = url.searchParams.get("to"); + const repositoryId = url.searchParams.get("repository_id"); + + if (!from || !to) { + return NextResponse.json( + { error: "`from` and `to` query params are required (ISO 8601)" }, + { status: 400 }, + ); + } + + const fromDate = new Date(from); + const toDate = new Date(to); + if (Number.isNaN(fromDate.getTime()) || Number.isNaN(toDate.getTime())) { + return NextResponse.json( + { error: "`from` / `to` must be valid ISO 8601 timestamps" }, + { status: 400 }, + ); + } + if (fromDate >= toDate) { + return NextResponse.json( + { error: "`from` must be earlier than `to`" }, + { status: 400 }, + ); + } + + // Confirm the org has an active Datadog integration so the CLI can + // distinguish "no integration" from "no events in window" via the + // response shape — `source: null` vs `source: "datadog"`. + const { data: integration, error: integrationError } = await supabaseAdmin + .from("org_integrations") + .select("status") + .eq("organization_id", token.organization_id) + .eq("provider", "datadog") + .maybeSingle(); + + if (integrationError) { + logger.error("integrations/datadog/events: load integration", { + error: integrationError.message, + }); + return NextResponse.json( + { error: "Failed to load integration" }, + { status: 500 }, + ); + } + + if (!integration || integration.status !== "active") { + return NextResponse.json({ + source: null, + window: { from: fromDate.toISOString(), to: toDate.toISOString() }, + deployments: [], + incidents: [], + }); + } + + let deploymentsQuery = supabaseAdmin + .from("external_deployments") + .select( + "id, provider_event_id, repository_id, dd_repository_id, service, env, team, version, commit_sha, change_failure, deployment_type, source, started_at, finished_at, duration_seconds, number_of_commits, number_of_pull_requests, recovery_time_sec, remediation_type, remediation_id", + ) + .eq("organization_id", token.organization_id) + .eq("provider", "datadog") + .gte("started_at", fromDate.toISOString()) + .lt("started_at", toDate.toISOString()) + .order("started_at", { ascending: true }); + + if (repositoryId) { + deploymentsQuery = deploymentsQuery.eq("repository_id", repositoryId); + } + + const { data: deployments, error: deploymentsError } = await deploymentsQuery; + if (deploymentsError) { + logger.error("integrations/datadog/events: load deployments", { + error: deploymentsError.message, + }); + return NextResponse.json( + { error: "Failed to load deployments" }, + { status: 500 }, + ); + } + + const deploymentIds = (deployments ?? []).map((d) => d.id); + let commits: Array<{ + deployment_id: string; + commit_sha: string; + commit_timestamp: string | null; + author_email: string | null; + author_canonical_email: string | null; + is_bot: boolean | null; + change_lead_time: number | null; + time_to_deploy: number | null; + }> = []; + if (deploymentIds.length > 0) { + const { data: commitRows, error: commitsError } = await supabaseAdmin + .from("external_deployment_commits") + .select( + "deployment_id, commit_sha, commit_timestamp, author_email, author_canonical_email, is_bot, change_lead_time, time_to_deploy", + ) + .in("deployment_id", deploymentIds); + + if (commitsError) { + logger.error("integrations/datadog/events: load commits", { + error: commitsError.message, + }); + return NextResponse.json( + { error: "Failed to load deployment commits" }, + { status: 500 }, + ); + } + commits = commitRows ?? []; + } + + const commitsByDeploymentId = new Map(); + for (const c of commits) { + const bucket = commitsByDeploymentId.get(c.deployment_id) ?? []; + bucket.push(c); + commitsByDeploymentId.set(c.deployment_id, bucket); + } + + const { data: incidents, error: incidentsError } = await supabaseAdmin + .from("external_incidents") + .select( + "provider_event_id, service, env, team, name, severity, started_at, finished_at, time_to_restore_seconds, source", + ) + .eq("organization_id", token.organization_id) + .eq("provider", "datadog") + .gte("started_at", fromDate.toISOString()) + .lt("started_at", toDate.toISOString()) + .order("started_at", { ascending: true }); + + if (incidentsError) { + logger.error("integrations/datadog/events: load incidents", { + error: incidentsError.message, + }); + return NextResponse.json( + { error: "Failed to load incidents" }, + { status: 500 }, + ); + } + + return NextResponse.json({ + source: "datadog", + window: { from: fromDate.toISOString(), to: toDate.toISOString() }, + deployments: (deployments ?? []).map((d) => ({ + provider_event_id: d.provider_event_id, + repository_id: d.repository_id, + dd_repository_id: d.dd_repository_id, + service: d.service, + env: d.env, + team: d.team, + version: d.version, + commit_sha: d.commit_sha, + change_failure: d.change_failure, + deployment_type: d.deployment_type, + source: d.source, + started_at: d.started_at, + finished_at: d.finished_at, + duration_seconds: d.duration_seconds, + number_of_commits: d.number_of_commits, + number_of_pull_requests: d.number_of_pull_requests, + recovery_time_sec: d.recovery_time_sec, + remediation_type: d.remediation_type, + remediation_id: d.remediation_id, + commits: (commitsByDeploymentId.get(d.id) ?? []).map((c) => ({ + commit_sha: c.commit_sha, + commit_timestamp: c.commit_timestamp, + author_email: c.author_email, + author_canonical_email: c.author_canonical_email, + is_bot: c.is_bot, + change_lead_time: c.change_lead_time, + time_to_deploy: c.time_to_deploy, + })), + })), + incidents: incidents ?? [], + }); +} diff --git a/platform/src/types/metrics.ts b/platform/src/types/metrics.ts index e487ec8..5a7799a 100644 --- a/platform/src/types/metrics.ts +++ b/platform/src/types/metrics.ts @@ -262,6 +262,25 @@ export interface ReportMetrics { }; median_time_to_first_review_hours?: number; + // DORA (real) — populated when the org has an active Datadog integration + // and the engine fetched events for the analysis window. Every field + // here is optional; `dora_source` is the canonical "is there data?" flag. + dora_source?: "datadog"; + dora_deployments_total?: number; + dora_deployments_failed?: number; + dora_deployments_pending_evaluation?: number; + dora_incidents_total?: number; + dora_cfr?: number; // 0.0–1.0 + dora_mttr_per_deploy_seconds_median?: number; + dora_mttr_per_deploy_seconds_p90?: number; + dora_mttr_per_incident_seconds_median?: number; + dora_mttr_per_incident_seconds_p90?: number; + dora_rollback_rate?: number; // 0.0–1.0 + dora_rollbacks_total?: number; + dora_lead_time_seconds_median?: number; + dora_deploy_frequency_per_day?: number; + dora_remediation_distribution?: Record; + // PR lifecycle pr_merged_count?: number; pr_median_time_to_merge_hours?: number; diff --git a/tests/test_dora_real.py b/tests/test_dora_real.py new file mode 100644 index 0000000..436c14f --- /dev/null +++ b/tests/test_dora_real.py @@ -0,0 +1,191 @@ +"""Tests for the Datadog-derived DORA analysis module. + +Runnable as: `python -m pytest tests/test_dora_real.py -v` +""" + +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from iris.analysis.dora_real import analyze_dora_real +from iris.models.external import ( + ExternalDeployment, + ExternalDeploymentCommit, + ExternalDORAData, + ExternalIncident, +) + + +_BASE = datetime(2026, 4, 1, 0, 0, tzinfo=timezone.utc) + + +def _deploy( + *, + event_id: str = "d-1", + change_failure: bool | None = False, + recovery_time_sec: int | None = None, + remediation_type: str | None = None, + lead_times: list[int] | None = None, + started_offset_hours: float = 0.0, +) -> ExternalDeployment: + commits = [ + ExternalDeploymentCommit(commit_sha=f"sha-{i}", change_lead_time=lt) + for i, lt in enumerate(lead_times or []) + ] + return ExternalDeployment( + provider_event_id=event_id, + started_at=_BASE + timedelta(hours=started_offset_hours), + change_failure=change_failure, + recovery_time_sec=recovery_time_sec, + remediation_type=remediation_type, + commits=commits, + ) + + +def _incident( + *, + event_id: str = "i-1", + time_to_restore_seconds: int | None = None, + started_offset_hours: float = 0.0, +) -> ExternalIncident: + return ExternalIncident( + provider_event_id=event_id, + started_at=_BASE + timedelta(hours=started_offset_hours), + time_to_restore_seconds=time_to_restore_seconds, + ) + + +def test_empty_input_defaults(): + """No data → result is structured but everything sits at zero / None.""" + result = analyze_dora_real(ExternalDORAData()) + assert result.deployments_total == 0 + assert result.cfr is None + assert result.rollback_rate is None + assert result.mttr_per_deploy_seconds_median is None + assert result.lead_time_seconds_median is None + assert result.deploy_frequency_per_day is None + + +def test_cfr_excludes_pending_from_denominator(): + """Tri-state: null change_failure must NOT count toward the denominator.""" + data = ExternalDORAData( + deployments=[ + _deploy(event_id=f"d-{i}", change_failure=False) for i in range(8) + ] + [ + _deploy(event_id="d-f1", change_failure=True, recovery_time_sec=600), + _deploy(event_id="d-f2", change_failure=True, recovery_time_sec=1200), + _deploy(event_id="d-p1", change_failure=None), + _deploy(event_id="d-p2", change_failure=None), + ] + ) + + result = analyze_dora_real(data) + + assert result.deployments_total == 12 + assert result.deployments_failed == 2 + assert result.deployments_pending_evaluation == 2 + # 2 failed / 10 evaluated (8 + 2) + assert result.cfr == 0.2 + + +def test_cfr_none_when_everything_pending(): + """All deploys pending evaluation → CFR is undefined, not 0.""" + data = ExternalDORAData( + deployments=[ + _deploy(event_id=f"d-{i}", change_failure=None) for i in range(5) + ] + ) + result = analyze_dora_real(data) + assert result.deployments_pending_evaluation == 5 + assert result.cfr is None + + +def test_mttr_per_deploy_uses_recovery_time_sec_on_failed_only(): + """Recovery time is meaningful only when change_failure is True.""" + data = ExternalDORAData( + deployments=[ + _deploy(event_id="ok-1", change_failure=False, recovery_time_sec=999_999), + _deploy(event_id="f-1", change_failure=True, recovery_time_sec=300), + _deploy(event_id="f-2", change_failure=True, recovery_time_sec=900), + _deploy(event_id="f-3", change_failure=True, recovery_time_sec=600), + ] + ) + result = analyze_dora_real(data) + assert result.mttr_per_deploy_seconds_median == 600.0 + + +def test_mttr_per_incident_independent_of_deploys(): + """Incident-level MTTR is the canonical DORA number; comes from failures table.""" + data = ExternalDORAData( + incidents=[ + _incident(event_id="i-1", time_to_restore_seconds=520_167), + _incident(event_id="i-2", time_to_restore_seconds=120_000), + ] + ) + result = analyze_dora_real(data) + assert result.incidents_total == 2 + assert result.mttr_per_incident_seconds_median == 320_083.5 or result.mttr_per_incident_seconds_median == 320083.5 + + +def test_rollback_rate_is_share_of_failed_deploys(): + """rollback_rate = rollbacks / failed; null when no failures.""" + data = ExternalDORAData( + deployments=[ + _deploy(event_id="f-1", change_failure=True, remediation_type="rollback"), + _deploy(event_id="f-2", change_failure=True, remediation_type="rollback"), + _deploy(event_id="f-3", change_failure=True, remediation_type="hotfix"), + _deploy(event_id="f-4", change_failure=True, remediation_type=None), + ] + ) + result = analyze_dora_real(data) + assert result.deployments_failed == 4 + assert result.rollbacks_total == 2 + assert result.rollback_rate == 0.5 + assert result.remediation_distribution == { + "rollback": 2, + "hotfix": 1, + "unknown": 1, + } + + +def test_rollback_rate_none_when_no_failures(): + data = ExternalDORAData( + deployments=[_deploy(event_id="ok-1", change_failure=False)] + ) + result = analyze_dora_real(data) + assert result.deployments_failed == 0 + assert result.rollback_rate is None + + +def test_lead_time_median_across_all_commits(): + """Lead time is per-commit, aggregated across deploys.""" + data = ExternalDORAData( + deployments=[ + _deploy(event_id="d-1", lead_times=[3600, 7200]), + _deploy(event_id="d-2", lead_times=[1800]), + _deploy(event_id="d-3", lead_times=[]), + ] + ) + result = analyze_dora_real(data) + assert result.lead_time_seconds_median == 3600.0 + + +def test_deploy_frequency_per_day_uses_window(): + """Deploy frequency = deploys / window-days; None if window missing.""" + data = ExternalDORAData( + deployments=[_deploy(event_id=f"d-{i}") for i in range(30)], + window_from=_BASE, + window_to=_BASE + timedelta(days=10), + ) + result = analyze_dora_real(data) + assert result.deploy_frequency_per_day == 3.0 + + +def test_deploy_frequency_none_without_window(): + data = ExternalDORAData( + deployments=[_deploy(event_id="d-1")], + ) + result = analyze_dora_real(data) + assert result.deploy_frequency_per_day is None