From 239a70c5f716378fe72146d4ec16757323a9df20 Mon Sep 17 00:00:00 2001 From: Morgan Wowk Date: Fri, 22 May 2026 15:24:17 -0700 Subject: [PATCH] Add k8s pod/cluster attributes to PENDING status span k8s.pod.name, k8s.namespace.name, k8s.cluster.url sourced from ContainerExecution.launcher_data. Handles kubernetes, kubernetes_job, and non-k8s launchers (SkyPilot) gracefully via early-return guards. --- .../instrumentation/execution_tracing.py | 29 ++++++++++++++++++- .../instrumentation/test_execution_tracing.py | 27 +++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/cloud_pipelines_backend/instrumentation/execution_tracing.py b/cloud_pipelines_backend/instrumentation/execution_tracing.py index e5e8afe..efda67e 100644 --- a/cloud_pipelines_backend/instrumentation/execution_tracing.py +++ b/cloud_pipelines_backend/instrumentation/execution_tracing.py @@ -61,6 +61,32 @@ def _error_attrs(*, execution: bts.ExecutionNode, status: str) -> dict[str, obje _EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) +def _launcher_pod_attrs( + *, execution: bts.ExecutionNode, status: str +) -> dict[str, object]: + """k8s pod/cluster attributes for the PENDING span.""" + if status != bts.ContainerExecutionStatus.PENDING: + return {} + if execution.container_execution_id is None: + return {} + ce = execution.container_execution + if ce is None or ce.launcher_data is None: + return {} + k8s = ( + ce.launcher_data.get("kubernetes") + or ce.launcher_data.get("kubernetes_job") + or {} + ) + attrs: dict[str, object] = {} + if pod_name := k8s.get("pod_name") or k8s.get("job_name"): + attrs["k8s.pod.name"] = pod_name + if namespace := k8s.get("namespace"): + attrs["k8s.namespace.name"] = namespace + if cluster_url := k8s.get("cluster_server"): + attrs["k8s.cluster.url"] = cluster_url + return attrs + + def _ns(*, dt: datetime.datetime) -> int: """Return *dt* as nanoseconds since the Unix epoch (required by OTel SDK). @@ -75,7 +101,7 @@ def _ns(*, dt: datetime.datetime) -> int: ) * 1_000_000_000 + delta.microseconds * 1_000 -def try_emit_execution_trace(*, execution: bts.ExecutionNode) -> None: +def emit_execution_trace(*, execution: bts.ExecutionNode) -> None: """Emit a complete execution trace when *execution* reaches a terminal status. No-op for non-terminal executions. All exceptions are caught and logged so @@ -106,6 +132,7 @@ def try_emit_execution_trace(*, execution: bts.ExecutionNode) -> None: "execution.id": execution.id, "execution.status": entry["status"], **_error_attrs(execution=execution, status=entry["status"]), + **_launcher_pod_attrs(execution=execution, status=entry["status"]), } _tracer.start_span( f"execution.status {entry['status']}", diff --git a/tests/instrumentation/test_execution_tracing.py b/tests/instrumentation/test_execution_tracing.py index 4b74770..15761cc 100644 --- a/tests/instrumentation/test_execution_tracing.py +++ b/tests/instrumentation/test_execution_tracing.py @@ -210,3 +210,30 @@ def test_root_span_not_marked_error_on_succeeded( s for s in span_exporter.get_finished_spans() if s.name == "execution" ) assert root.status.status_code != StatusCode.ERROR + + +class TestLauncherPodAttrs: + def test_pending_span_carries_k8s_attributes( + self, span_exporter: InMemorySpanExporter + ) -> None: + execution = _make_execution( + statuses=["QUEUED", "PENDING", "RUNNING", "SUCCEEDED"] + ) + execution_tracing.try_emit_execution_trace(execution=execution) + + pending_span = next( + s + for s in span_exporter.get_finished_spans() + if s.attributes.get("execution.status") == "PENDING" + ) + # No container_execution set on this stub — attrs should simply be absent. + assert "k8s.pod.name" not in (pending_span.attributes or {}) + + def test_non_pending_span_has_no_k8s_attributes( + self, span_exporter: InMemorySpanExporter + ) -> None: + execution = _make_execution(statuses=["QUEUED", "SUCCEEDED"]) + execution_tracing.try_emit_execution_trace(execution=execution) + + for span in span_exporter.get_finished_spans(): + assert "k8s.pod.name" not in (span.attributes or {})