Skip to content

Commit

Permalink
fix(tracing): ensure p is on the tracestate of active spans (#8569)
Browse files Browse the repository at this point in the history
Currently last datadog parent id is added to the tracestate in
`Htttpropagator.inject`.

We should only set the `p` tracestate field if a span is active.

This PR simplifies this feature by ensure `dd=p:..........;....` is
always set in the tracestate of otel spans

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.
- [x] If change touches code that signs or publishes builds or packages,
or handles credentials of any kind, I've requested a review from
`@DataDog/security-design-and-guidance`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
mabdinur committed Mar 7, 2024
1 parent 2f33a9f commit 90a3e3f
Show file tree
Hide file tree
Showing 12 changed files with 114 additions and 39 deletions.
20 changes: 14 additions & 6 deletions ddtrace/_trace/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
Optional[int], # span_id
_MetaDictType, # _meta
_MetricDictType, # _metrics
list[SpanLink],
dict[str, Any],
list[SpanLink], # span_links
dict[str, Any], # baggage
bool, # is_remote
]


Expand All @@ -45,7 +46,7 @@ class Context(object):
boundaries.
"""

__slots__ = ["trace_id", "span_id", "_lock", "_meta", "_metrics", "_span_links", "_baggage"]
__slots__ = ["trace_id", "span_id", "_lock", "_meta", "_metrics", "_span_links", "_baggage", "_is_remote"]

def __init__(
self,
Expand All @@ -58,13 +59,15 @@ def __init__(
lock=None, # type: Optional[threading.RLock]
span_links=None, # type: Optional[list[SpanLink]]
baggage=None, # type: Optional[dict[str, Any]]
is_remote=True, # type: bool
):
self._meta = meta if meta is not None else {} # type: _MetaDictType
self._metrics = metrics if metrics is not None else {} # type: _MetricDictType
self._baggage = baggage if baggage is not None else {} # type: dict[str, Any]

self.trace_id = trace_id # type: Optional[int]
self.span_id = span_id # type: Optional[int]
self._is_remote = is_remote # type: bool

if dd_origin is not None and _DD_ORIGIN_INVALID_CHARS_REGEX.search(dd_origin) is None:
self._meta[ORIGIN_KEY] = dd_origin
Expand All @@ -91,13 +94,14 @@ def __getstate__(self):
self._meta,
self._metrics,
self._span_links,
self._baggage
self._baggage,
self._is_remote,
# Note: self._lock is not serializable
)

def __setstate__(self, state):
# type: (_ContextState) -> None
self.trace_id, self.span_id, self._meta, self._metrics, self._span_links, self._baggage = state
self.trace_id, self.span_id, self._meta, self._metrics, self._span_links, self._baggage, self._is_remote = state
# We cannot serialize and lock, so we must recreate it unless we already have one
self._lock = threading.RLock()

Expand All @@ -111,6 +115,8 @@ def _with_span(self, span):
metrics=self._metrics,
lock=self._lock,
baggage=self._baggage,
span_links=self._span_links,
is_remote=self._is_remote,
)

def _update_tags(self, span):
Expand Down Expand Up @@ -251,18 +257,20 @@ def __eq__(self, other):
and self._metrics == other._metrics
and self._span_links == other._span_links
and self._baggage == other._baggage
and self._is_remote == other._is_remote
)
return False

def __repr__(self):
# type: () -> str
return "Context(trace_id=%s, span_id=%s, _meta=%s, _metrics=%s, _span_links=%s, _baggage=%s)" % (
return "Context(trace_id=%s, span_id=%s, _meta=%s, _metrics=%s, _span_links=%s, _baggage=%s, _is_remote=%s)" % (
self.trace_id,
self.span_id,
self._meta,
self._metrics,
self._span_links,
self._baggage,
self._is_remote,
)

__str__ = __repr__
5 changes: 5 additions & 0 deletions ddtrace/_trace/processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from ddtrace.constants import USER_KEEP
from ddtrace.internal import gitmetadata
from ddtrace.internal.constants import HIGHER_ORDER_TRACE_ID_BITS
from ddtrace.internal.constants import LAST_DD_PARENT_ID_KEY
from ddtrace.internal.constants import MAX_UINT_64BITS
from ddtrace.internal.logger import get_logger
from ddtrace.internal.sampling import SpanSamplingRule
Expand Down Expand Up @@ -218,6 +219,10 @@ def process_trace(self, trace):
if chunk_root.trace_id > MAX_UINT_64BITS:
trace_id_hob = _get_64_highest_order_bits_as_hex(chunk_root.trace_id)
chunk_root.set_tag_str(HIGHER_ORDER_TRACE_ID_BITS, trace_id_hob)

if LAST_DD_PARENT_ID_KEY in chunk_root._meta and chunk_root._parent is not None:
# we should only set the last parent id on local root spans
del chunk_root._meta[LAST_DD_PARENT_ID_KEY]
return trace


Expand Down
2 changes: 1 addition & 1 deletion ddtrace/_trace/span.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def context(self):
# type: () -> Context
"""Return the trace context for this span."""
if self._context is None:
self._context = Context(trace_id=self.trace_id, span_id=self.span_id)
self._context = Context(trace_id=self.trace_id, span_id=self.span_id, is_remote=False)
return self._context

def link_span(self, context, attributes=None):
Expand Down
3 changes: 2 additions & 1 deletion ddtrace/_trace/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ def _start_span(
sampling_priority=child_of.context.sampling_priority,
span_id=child_of.span_id,
trace_id=child_of.trace_id,
is_remote=False,
)

# If the child_of span was active then activate the new context
Expand All @@ -658,7 +659,7 @@ def _start_span(
context = child_of.context
parent = child_of
else:
context = Context()
context = Context(is_remote=False)

trace_id = context.trace_id
parent_id = context.span_id
Expand Down
2 changes: 2 additions & 0 deletions ddtrace/internal/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
)
W3C_TRACESTATE_KEY = "tracestate"
W3C_TRACEPARENT_KEY = "traceparent"
W3C_TRACESTATE_PARENT_ID_KEY = "p"
W3C_TRACESTATE_ORIGIN_KEY = "o"
W3C_TRACESTATE_SAMPLING_PRIORITY_KEY = "s"
DEFAULT_SAMPLING_RATE_LIMIT = 100
SAMPLING_DECISION_TRACE_TAG_KEY = "_dd.p.dm"
LAST_DD_PARENT_ID_KEY = "_dd.parent_id"
DEFAULT_SERVICE_NAME = "unnamed-python-service"
# Used to set the name of an integration on a span
COMPONENT = "component"
Expand Down
11 changes: 11 additions & 0 deletions ddtrace/internal/utils/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ddtrace.internal.constants import DEFAULT_TIMEOUT
from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY
from ddtrace.internal.constants import W3C_TRACESTATE_ORIGIN_KEY
from ddtrace.internal.constants import W3C_TRACESTATE_PARENT_ID_KEY
from ddtrace.internal.constants import W3C_TRACESTATE_SAMPLING_PRIORITY_KEY
from ddtrace.internal.http import HTTPConnection
from ddtrace.internal.http import HTTPSConnection
Expand Down Expand Up @@ -205,6 +206,16 @@ def w3c_encode_tag(args):
return tag_val.replace("=", "~")


def w3c_tracestate_add_p(tracestate, span_id):
# Adds last datadog parent_id to tracestate. This tag is used to reconnect a trace with non-datadog spans
p_member = "{}:{:016x}".format(W3C_TRACESTATE_PARENT_ID_KEY, span_id)
if "dd=" in tracestate:
return tracestate.replace("dd=", f"dd={p_member};")
elif tracestate:
return f"dd={p_member},{tracestate}"
return f"dd={p_member}"


class Response(object):
"""
Custom API Response object to represent a response from calling the API.
Expand Down
4 changes: 3 additions & 1 deletion ddtrace/opentelemetry/_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ddtrace.internal.logger import get_logger
from ddtrace.internal.utils.formats import flatten_key_value
from ddtrace.internal.utils.formats import is_sequence
from ddtrace.internal.utils.http import w3c_tracestate_add_p


if TYPE_CHECKING:
Expand Down Expand Up @@ -137,7 +138,8 @@ def get_span_context(self):
ts = None
tf = TraceFlags.DEFAULT
if self._ddspan.context:
ts = TraceState.from_header([self._ddspan.context._tracestate])
ts_str = w3c_tracestate_add_p(self._ddspan.context._tracestate, self._ddspan.span_id)
ts = TraceState.from_header([ts_str])
if self._ddspan.context.sampling_priority and self._ddspan.context.sampling_priority > 0:
tf = TraceFlags.SAMPLED

Expand Down
22 changes: 13 additions & 9 deletions ddtrace/propagation/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from ..internal.constants import _PROPAGATION_STYLE_NONE
from ..internal.constants import _PROPAGATION_STYLE_W3C_TRACECONTEXT
from ..internal.constants import HIGHER_ORDER_TRACE_ID_BITS as _HIGHER_ORDER_TRACE_ID_BITS
from ..internal.constants import LAST_DD_PARENT_ID_KEY
from ..internal.constants import MAX_UINT_64BITS as _MAX_UINT_64BITS
from ..internal.constants import PROPAGATION_STYLE_B3_MULTI
from ..internal.constants import PROPAGATION_STYLE_B3_SINGLE
Expand All @@ -45,6 +46,7 @@
from ..internal.sampling import SAMPLING_DECISION_TRACE_TAG_KEY
from ..internal.sampling import SamplingMechanism
from ..internal.sampling import validate_sampling_decision
from ..internal.utils.http import w3c_tracestate_add_p
from ._utils import get_wsgi_header


Expand Down Expand Up @@ -815,7 +817,7 @@ def _get_context(trace_id, span_id, trace_flag, ts, meta=None):
sampling_priority_ts, other_propagated_tags, origin, lpid = tracestate_values
meta.update(other_propagated_tags.items())
if lpid:
meta["_dd.parent_id"] = lpid
meta[LAST_DD_PARENT_ID_KEY] = lpid

sampling_priority = _TraceContext._get_sampling_priority(trace_flag, sampling_priority_ts, origin)
else:
Expand All @@ -835,15 +837,17 @@ def _inject(span_context, headers):
tp = span_context._traceparent
if tp:
headers[_HTTP_HEADER_TRACEPARENT] = tp
# only inject tracestate if traceparent injected: https://www.w3.org/TR/trace-context/#tracestate-header
ts = span_context._tracestate
# Adds last datadog parent_id to tracestate. This tag is used to reconnect a trace with non-datadog spans
if "dd=" in ts:
ts = ts.replace("dd=", "dd=p:{:016x};".format(span_context.span_id or 0))
if span_context._is_remote is False:
# Datadog Span is active, so the current span_id is the last datadog span_id
headers[_HTTP_HEADER_TRACESTATE] = w3c_tracestate_add_p(
span_context._tracestate, span_context.span_id or 0
)
elif LAST_DD_PARENT_ID_KEY in span_context._meta:
# Datadog Span is not active, propagate the last datadog span_id
span_id = int(span_context._meta[LAST_DD_PARENT_ID_KEY], 16)
headers[_HTTP_HEADER_TRACESTATE] = w3c_tracestate_add_p(span_context._tracestate, span_id)
else:
ts = "dd=p:{:016x}".format(span_context.span_id or 0)

headers[_HTTP_HEADER_TRACESTATE] = ts
headers[_HTTP_HEADER_TRACESTATE] = span_context._tracestate


class _NOP_Propagator:
Expand Down
4 changes: 4 additions & 0 deletions releasenotes/notes/always-add-p-to-ts-ff6c6c68c3cb9bfc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
otel: Ensures that the last datadog parent_id is added to w3c distributed tracing headers generated by the OpenTelemetry API.
4 changes: 2 additions & 2 deletions tests/opentelemetry/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _subprocess_task(parent_span_context, errors):
ot_tracer._tracer.flush()


@pytest.mark.snapshot
@pytest.mark.snapshot(ignores=["meta.tracestate"])
def test_otel_trace_across_fork(oteltracer):
errors = multiprocessing.Queue()
with oteltracer.start_as_current_span("root") as root:
Expand All @@ -102,7 +102,7 @@ def test_otel_trace_across_fork(oteltracer):
assert errors.empty(), errors.get()


@pytest.mark.snapshot(wait_for_num_traces=1)
@pytest.mark.snapshot(wait_for_num_traces=1, ignores=["meta.tracestate"])
@pytest.mark.parametrize("decision", [MANUAL_KEEP_KEY, MANUAL_DROP_KEY], ids=["manual.keep", "manual.drop"])
def test_sampling_decisions_across_processes(oteltracer, decision):
# sampling decision in the subprocess task should be the same as the parent
Expand Down
7 changes: 5 additions & 2 deletions tests/opentelemetry/test_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_otel_get_span_context(oteltracer):
# By default ddtrace set sampled=True for all spans
assert span_context.trace_flags == TraceFlags.SAMPLED
# Default tracestate values set on all Datadog Spans
assert span_context.trace_state.to_header() == "dd=s:1;t.dm:-0"
assert span_context.trace_state.to_header() == "dd=p:{:016x};s:1;t.dm:-0".format(span_context.span_id)


def test_otel_get_span_context_with_multiple_tracesates(oteltracer):
Expand All @@ -206,7 +206,10 @@ def test_otel_get_span_context_with_multiple_tracesates(oteltracer):
otelspan._ddspan._context._meta["_dd.p.some_val"] = "tehehe"

span_context = otelspan.get_span_context()
assert span_context.trace_state.to_header() == "dd=s:1;t.dm:-0;t.congo:t61rcWkgMzE;t.some_val:tehehe"
assert (
span_context.trace_state.to_header()
== "dd=p:{:016x};s:1;t.dm:-0;t.congo:t61rcWkgMzE;t.some_val:tehehe".format(span_context.span_id)
)


def test_otel_get_span_context_with_default_trace_state(oteltracer):
Expand Down
Loading

0 comments on commit 90a3e3f

Please sign in to comment.