From 81511c9557d46ddd5cd061d6202031610b707694 Mon Sep 17 00:00:00 2001
From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com>
Date: Tue, 27 Feb 2024 16:17:22 -0500
Subject: [PATCH] chore: make span.sampled reference
 span.context.sampling_priority and deprecate it (#8461)

With `span.sampled` being outdated, and no longer reliable as a way to
tell if a span is actually being sampled, we make this change to make
its value reference `span.context.sampling_priority` which is what
actually matters when it comes to sampling. This deprecates span.sampled
and removes its use internally, replacing it with checks for
span.context.sampling_priority to mimic behavior.

If the value of `span.context.sampling_priority`, is `None` we know that
sampling hasn't yet run, and then maintain the original behavior of
`span.sampled` being `True` until sampling is run, if it's >0 we know
that the span has been sampled, if <=0 we know the span has not been
sampled.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.
- [x] If change touches code that signs or publishes builds or packages,
or handles credentials of any kind, I've requested a review from
`@DataDog/security-design-and-guidance`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: Munir Abdinur <munir.abdinur@datadoghq.com>
---
 ddtrace/_trace/span.py                 | 32 ++++++++++++++++++++++----
 ddtrace/_trace/tracer.py               |  3 +--
 ddtrace/contrib/algoliasearch/patch.py |  3 +--
 ddtrace/contrib/elasticsearch/patch.py |  4 ++--
 ddtrace/contrib/psycopg/extensions.py  |  2 +-
 ddtrace/internal/sampling.py           |  1 -
 ddtrace/llmobs/_integrations/base.py   |  5 ++--
 tests/tracer/test_sampler.py           | 14 +++++++----
 tests/tracer/test_tracer.py            | 20 ++++++++--------
 9 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/ddtrace/_trace/span.py b/ddtrace/_trace/span.py
index 2460e8fce6..bd290c9106 100644
--- a/ddtrace/_trace/span.py
+++ b/ddtrace/_trace/span.py
@@ -42,6 +42,8 @@
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.sampling import SamplingMechanism
 from ddtrace.internal.sampling import set_sampling_decision_maker
+from ddtrace.internal.utils.deprecations import DDTraceDeprecationWarning
+from ddtrace.vendor.debtcollector import deprecate
 
 
 _NUMERIC_TAGS = (ANALYTICS_SAMPLE_RATE_KEY,)
@@ -82,8 +84,6 @@ class Span(object):
         "span_type",
         "start_ns",
         "duration_ns",
-        # Sampler attributes
-        "sampled",
         # Internal attributes
         "_context",
         "_local_root",
@@ -168,10 +168,8 @@ def __init__(
         self.parent_id = parent_id  # type: Optional[int]
         self._on_finish_callbacks = [] if on_finish is None else on_finish
 
-        # sampling
-        self.sampled = True  # type: bool
-
         self._context = context._with_span(self) if context else None  # type: Optional[Context]
+
         self._links = {}  # type: Dict[int, SpanLink]
         if links:
             self._links = {link.span_id: link for link in links}
@@ -260,6 +258,30 @@ def duration(self, value):
         # type: (float) -> None
         self.duration_ns = int(value * 1e9)
 
+    @property
+    def sampled(self):
+        # type: () -> Optional[bool]
+        deprecate(
+            "span.sampled is deprecated and will be removed in a future version of the tracer.",
+            message="""span.sampled references the state of span.context.sampling_priority.
+            Please use span.context.sampling_priority instead to check if a span is sampled.""",
+            category=DDTraceDeprecationWarning,
+        )
+        if self.context.sampling_priority is None:
+            # this maintains original span.sampled behavior, where all spans would start
+            # with span.sampled = True until sampling runs
+            return True
+        return self.context.sampling_priority > 0
+
+    @sampled.setter
+    def sampled(self, value):
+        deprecate(
+            "span.sampled is deprecated and will be removed in a future version of the tracer.",
+            message="""span.sampled has a no-op setter.
+            Please use span.set_tag('manual.keep'/'manual.drop') to keep or drop spans.""",
+            category=DDTraceDeprecationWarning,
+        )
+
     def finish(self, finish_time=None):
         # type: (Optional[float]) -> None
         """Mark the end time of the span and submit it to the tracer.
diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py
index e7ad1b07f5..dca5fe1b8d 100644
--- a/ddtrace/_trace/tracer.py
+++ b/ddtrace/_trace/tracer.py
@@ -695,7 +695,6 @@ def _start_span(
 
             # Extra attributes when from a local parent
             if parent:
-                span.sampled = parent.sampled
                 span._parent = parent
                 span._local_root = parent._local_root
 
@@ -750,7 +749,7 @@ def _start_span(
             self._services.add(service)
 
         if not trace_id:
-            span.sampled = self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler))
+            self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler))
 
         # Only call span processors if the tracer is enabled
         if self.enabled:
diff --git a/ddtrace/contrib/algoliasearch/patch.py b/ddtrace/contrib/algoliasearch/patch.py
index b576e4d108..5408504fc7 100644
--- a/ddtrace/contrib/algoliasearch/patch.py
+++ b/ddtrace/contrib/algoliasearch/patch.py
@@ -125,8 +125,7 @@ def _patched_search(func, instance, wrapt_args, wrapt_kwargs):
         span.set_tag_str(SPAN_KIND, SpanKind.CLIENT)
 
         span.set_tag(SPAN_MEASURED_KEY)
-
-        if not span.sampled:
+        if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
             return func(*wrapt_args, **wrapt_kwargs)
 
         if config.algoliasearch.collect_query_text:
diff --git a/ddtrace/contrib/elasticsearch/patch.py b/ddtrace/contrib/elasticsearch/patch.py
index f23a997147..c2383965eb 100644
--- a/ddtrace/contrib/elasticsearch/patch.py
+++ b/ddtrace/contrib/elasticsearch/patch.py
@@ -142,8 +142,8 @@ def _perform_request(func, instance, args, kwargs):
 
             span.set_tag(SPAN_MEASURED_KEY)
 
-            # Don't instrument if the trace is not sampled
-            if not span.sampled:
+            # Only instrument if trace is sampled or if we haven't tried to sample yet
+            if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
                 yield func(*args, **kwargs)
                 return
 
diff --git a/ddtrace/contrib/psycopg/extensions.py b/ddtrace/contrib/psycopg/extensions.py
index a801114bff..7acbb34b90 100644
--- a/ddtrace/contrib/psycopg/extensions.py
+++ b/ddtrace/contrib/psycopg/extensions.py
@@ -43,7 +43,7 @@ def execute(self, query, vars=None):  # noqa: A002
                 s.set_tag_str(SPAN_KIND, SpanKind.CLIENT)
 
                 s.set_tag(SPAN_MEASURED_KEY)
-                if not s.sampled:
+                if s.context.sampling_priority is None or s.context.sampling_priority <= 0:
                     return super(TracedCursor, self).execute(query, vars)
 
                 s.resource = query
diff --git a/ddtrace/internal/sampling.py b/ddtrace/internal/sampling.py
index c935bf5a14..0d5aa1a278 100644
--- a/ddtrace/internal/sampling.py
+++ b/ddtrace/internal/sampling.py
@@ -305,7 +305,6 @@ def _apply_rate_limit(span, sampled, limiter):
 def _set_priority(span, priority):
     # type: (Span, int) -> None
     span.context.sampling_priority = priority
-    span.sampled = priority > 0  # Positive priorities mean it was kept
 
 
 def _get_highest_precedence_rule_matching(span, rules):
diff --git a/ddtrace/llmobs/_integrations/base.py b/ddtrace/llmobs/_integrations/base.py
index 148adfea06..58cd262038 100644
--- a/ddtrace/llmobs/_integrations/base.py
+++ b/ddtrace/llmobs/_integrations/base.py
@@ -73,12 +73,13 @@ def llmobs_enabled(self) -> bool:
         return config._llmobs_enabled
 
     def is_pc_sampled_span(self, span: Span) -> bool:
-        if not span.sampled:
+        if span.context.sampling_priority is None or span.context.sampling_priority <= 0:
             return False
         return self._span_pc_sampler.sample(span)
 
     def is_pc_sampled_log(self, span: Span) -> bool:
-        if not self.logs_enabled or not span.sampled:
+        sampled = span.context.sampling_priority is not None or span.context.sampling_priority <= 0  # type: ignore
+        if not self.logs_enabled or not sampled:
             return False
         return self._log_pc_sampler.sample(span)
 
diff --git a/tests/tracer/test_sampler.py b/tests/tracer/test_sampler.py
index 86497b0794..2839a1c9a5 100644
--- a/tests/tracer/test_sampler.py
+++ b/tests/tracer/test_sampler.py
@@ -104,7 +104,11 @@ def test_sample_rate_deviation(self):
 
             samples = tracer.pop()
             # non sampled spans do not have sample rate applied
-            sampled_spans = [s for s in samples if s.sampled]
+            sampled_spans = [s for s in samples if s.get_metric(SAMPLE_RATE_METRIC_KEY)]
+            if sample_rate != 1:
+                assert len(sampled_spans) != len(samples)
+            else:
+                assert len(sampled_spans) == len(samples)
             assert (
                 sampled_spans[0].get_metric(SAMPLE_RATE_METRIC_KEY) == sample_rate
             ), "Sampled span should have sample rate properly assigned"
@@ -131,7 +135,8 @@ def test_deterministic_behavior(self):
             assert (
                 len(samples) <= 1
             ), "evaluating sampling rules against a span should result in either dropping or not dropping it"
-            sampled = 1 == len([sample for sample in samples if sample.sampled is True])
+            # sample rate metric is only set on sampled spans
+            sampled = 1 == len([sample for sample in samples if sample.get_metric(SAMPLE_RATE_METRIC_KEY) is not None])
             for _ in range(10):
                 other_span = Span(str(i), trace_id=span.trace_id)
                 assert sampled == tracer._sampler.sample(
@@ -916,10 +921,11 @@ def test_datadog_sampler_sample_rules(sampler, sampling_priority, sampling_mecha
     dummy_tracer.configure(sampler=sampler)
     dummy_tracer.trace("span").finish()
     spans = dummy_tracer.pop()
-
     assert len(spans) > 0, "A tracer using DatadogSampler should always emit its spans"
     span = spans[0]
-    assert span.sampled, "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set"
+    assert (
+        span.context.sampling_priority is not None
+    ), "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set"
     trace_tag = "-%d" % sampling_mechanism if sampling_mechanism is not None else None
     assert_sampling_decision_tags(
         span, rule=rule, limit=limit, sampling_priority=sampling_priority, trace_tag=trace_tag
diff --git a/tests/tracer/test_tracer.py b/tests/tracer/test_tracer.py
index dad41a74cb..d43a35f2a2 100644
--- a/tests/tracer/test_tracer.py
+++ b/tests/tracer/test_tracer.py
@@ -1713,25 +1713,25 @@ def test_context_priority(tracer, test_spans):
 
 def test_spans_sampled_out(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = False
+        span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
 
     spans = test_spans.pop()
     assert len(spans) == 3
     for span in spans:
-        assert span.sampled is False
+        assert span.context.sampling_priority <= 0
 
 
 def test_spans_sampled_one(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = False
+        span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = False
+            span.context.sampling_priority = 0
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
 
     spans = test_spans.pop()
     assert len(spans) == 3
@@ -1739,11 +1739,11 @@ def test_spans_sampled_one(tracer, test_spans):
 
 def test_spans_sampled_all(tracer, test_spans):
     with tracer.trace("root") as span:
-        span.sampled = True
+        span.context.sampling_priority = 1
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
         with tracer.trace("child") as span:
-            span.sampled = True
+            span.context.sampling_priority = 1
 
     spans = test_spans.pop()
     assert len(spans) == 3