chore: make span.sampled reference span.context.sampling_priority and…

… deprecate it (#8461) With `span.sampled` being outdated, and no longer reliable as a way to tell if a span is actually being sampled, we make this change to make its value reference `span.context.sampling_priority` which is what actually matters when it comes to sampling. This deprecates span.sampled and removes its use internally, replacing it with checks for span.context.sampling_priority to mimic behavior. If the value of `span.context.sampling_priority`, is `None` we know that sampling hasn't yet run, and then maintain the original behavior of `span.sampled` being `True` until sampling is run, if it's >0 we know that the span has been sampled, if <=0 we know the span has not been sampled. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Munir Abdinur <munir.abdinur@datadoghq.com>
DataDog · Feb 27, 2024 · 81511c9 · 81511c9
1 parent 9fff66c
commit 81511c9
Show file tree

Hide file tree

Showing 9 changed files with 55 additions and 29 deletions.
diff --git a/ddtrace/_trace/span.py b/ddtrace/_trace/span.py
@@ -42,6 +42,8 @@
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.sampling import SamplingMechanism
 from ddtrace.internal.sampling import set_sampling_decision_maker
+from ddtrace.internal.utils.deprecations import DDTraceDeprecationWarning
+from ddtrace.vendor.debtcollector import deprecate
 
 
 _NUMERIC_TAGS = (ANALYTICS_SAMPLE_RATE_KEY,)
@@ -82,8 +84,6 @@ class Span(object):
         "span_type",
         "start_ns",
         "duration_ns",
-        # Sampler attributes
-        "sampled",
         # Internal attributes
         "_context",
         "_local_root",
@@ -168,10 +168,8 @@ def __init__(
         self.parent_id = parent_id  # type: Optional[int]
         self._on_finish_callbacks = [] if on_finish is None else on_finish
 
-        # sampling
-        self.sampled = True  # type: bool
-
         self._context = context._with_span(self) if context else None  # type: Optional[Context]
+
         self._links = {}  # type: Dict[int, SpanLink]
         if links:
             self._links = {link.span_id: link for link in links}
@@ -260,6 +258,30 @@ def duration(self, value):
         # type: (float) -> None
         self.duration_ns = int(value * 1e9)
 
+    @property
+    def sampled(self):
+        # type: () -> Optional[bool]
+        deprecate(
+            "span.sampled is deprecated and will be removed in a future version of the tracer.",
+            message="""span.sampled references the state of span.context.sampling_priority.
+            Please use span.context.sampling_priority instead to check if a span is sampled.""",
+            category=DDTraceDeprecationWarning,
+        )
+        if self.context.sampling_priority is None:
+            # this maintains original span.sampled behavior, where all spans would start
+            # with span.sampled = True until sampling runs
+            return True
+        return self.context.sampling_priority > 0
+
+    @sampled.setter
+    def sampled(self, value):
+        deprecate(
+            "span.sampled is deprecated and will be removed in a future version of the tracer.",
+            message="""span.sampled has a no-op setter.
+            Please use span.set_tag('manual.keep'/'manual.drop') to keep or drop spans.""",
+            category=DDTraceDeprecationWarning,
+        )
+
     def finish(self, finish_time=None):
         # type: (Optional[float]) -> None
         """Mark the end time of the span and submit it to the tracer.

diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py
@@ -695,7 +695,6 @@ def _start_span(
 
             # Extra attributes when from a local parent
             if parent:
-                span.sampled = parent.sampled
                 span._parent = parent
                 span._local_root = parent._local_root
 
@@ -750,7 +749,7 @@ def _start_span(
             self._services.add(service)
 
         if not trace_id:
-            span.sampled = self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler))
+            self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler))
 
         # Only call span processors if the tracer is enabled
         if self.enabled:

diff --git a/ddtrace/contrib/algoliasearch/patch.py b/ddtrace/contrib/algoliasearch/patch.py
@@ -125,8 +125,7 @@ def _patched_search(func, instance, wrapt_args, wrapt_kwargs):
         span.set_tag_str(SPAN_KIND, SpanKind.CLIENT)
 
         span.set_tag(SPAN_MEASURED_KEY)
-
-        if not span.sampled:
+        if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
             return func(*wrapt_args, **wrapt_kwargs)
 
         if config.algoliasearch.collect_query_text:

diff --git a/ddtrace/contrib/elasticsearch/patch.py b/ddtrace/contrib/elasticsearch/patch.py
@@ -142,8 +142,8 @@ def _perform_request(func, instance, args, kwargs):
 
             span.set_tag(SPAN_MEASURED_KEY)
 
-            # Don't instrument if the trace is not sampled
-            if not span.sampled:
+            # Only instrument if trace is sampled or if we haven't tried to sample yet
+            if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
                 yield func(*args, **kwargs)
                 return
 

diff --git a/ddtrace/contrib/psycopg/extensions.py b/ddtrace/contrib/psycopg/extensions.py
@@ -43,7 +43,7 @@ def execute(self, query, vars=None):  # noqa: A002
                 s.set_tag_str(SPAN_KIND, SpanKind.CLIENT)
 
                 s.set_tag(SPAN_MEASURED_KEY)
-                if not s.sampled:
+                if s.context.sampling_priority is None or s.context.sampling_priority <= 0:
                     return super(TracedCursor, self).execute(query, vars)
 
                 s.resource = query

diff --git a/ddtrace/internal/sampling.py b/ddtrace/internal/sampling.py
@@ -305,7 +305,6 @@ def _apply_rate_limit(span, sampled, limiter):
 def _set_priority(span, priority):
     # type: (Span, int) -> None
     span.context.sampling_priority = priority
-    span.sampled = priority > 0  # Positive priorities mean it was kept
 
 
 def _get_highest_precedence_rule_matching(span, rules):

diff --git a/ddtrace/llmobs/_integrations/base.py b/ddtrace/llmobs/_integrations/base.py
@@ -73,12 +73,13 @@ def llmobs_enabled(self) -> bool:
         return config._llmobs_enabled
 
     def is_pc_sampled_span(self, span: Span) -> bool:
-        if not span.sampled:
+        if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
             return False
         return self._span_pc_sampler.sample(span)
 
     def is_pc_sampled_log(self, span: Span) -> bool:
-        if not self.logs_enabled or not span.sampled:
+        sampled = span.context.sampling_priority is not None or span.context.sampling_priority <= 0  # type: ignore
+        if not self.logs_enabled or not sampled:
             return False
         return self._log_pc_sampler.sample(span)
 

diff --git a/tests/tracer/test_sampler.py b/tests/tracer/test_sampler.py
@@ -104,7 +104,11 @@ def test_sample_rate_deviation(self):
 
             samples = tracer.pop()
             # non sampled spans do not have sample rate applied
-            sampled_spans = [s for s in samples if s.sampled]
+            sampled_spans = [s for s in samples if s.get_metric(SAMPLE_RATE_METRIC_KEY)]
+            if sample_rate != 1:
+                assert len(sampled_spans) != len(samples)
+            else:
+                assert len(sampled_spans) == len(samples)
             assert (
                 sampled_spans[0].get_metric(SAMPLE_RATE_METRIC_KEY) == sample_rate
             ), "Sampled span should have sample rate properly assigned"
@@ -131,7 +135,8 @@ def test_deterministic_behavior(self):
             assert (
                 len(samples) <= 1
             ), "evaluating sampling rules against a span should result in either dropping or not dropping it"
-            sampled = 1 == len([sample for sample in samples if sample.sampled is True])
+            # sample rate metric is only set on sampled spans
+            sampled = 1 == len([sample for sample in samples if sample.get_metric(SAMPLE_RATE_METRIC_KEY) is not None])
             for _ in range(10):
                 other_span = Span(str(i), trace_id=span.trace_id)
                 assert sampled == tracer._sampler.sample(
@@ -916,10 +921,11 @@ def test_datadog_sampler_sample_rules(sampler, sampling_priority, sampling_mecha
     dummy_tracer.configure(sampler=sampler)
     dummy_tracer.trace("span").finish()
     spans = dummy_tracer.pop()
-
     assert len(spans) > 0, "A tracer using DatadogSampler should always emit its spans"
     span = spans[0]
-    assert span.sampled, "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set"
+    assert (
+        span.context.sampling_priority is not None
+    ), "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set"
     trace_tag = "-%d" % sampling_mechanism if sampling_mechanism is not None else None
     assert_sampling_decision_tags(
         span, rule=rule, limit=limit, sampling_priority=sampling_priority, trace_tag=trace_tag

diff --git a/tests/tracer/test_tracer.py b/tests/tracer/test_tracer.py
@@ -1713,37 +1713,37 @@ def test_context_priority(tracer, test_spans):
 
 def test_spans_sampled_out(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = False
+        span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
 
     spans = test_spans.pop()
     assert len(spans) == 3
     for span in spans:
-        assert span.sampled is False
+        assert span.context.sampling_priority <= 0
 
 
 def test_spans_sampled_one(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = False
+        span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
 
     spans = test_spans.pop()
     assert len(spans) == 3
 
 
 def test_spans_sampled_all(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = True
+        span.context.sampling_priority = 1
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
 
     spans = test_spans.pop()
     assert len(spans) == 3