From 81511c9557d46ddd5cd061d6202031610b707694 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 27 Feb 2024 16:17:22 -0500 Subject: [PATCH] chore: make span.sampled reference span.context.sampling_priority and deprecate it (#8461) With `span.sampled` being outdated, and no longer reliable as a way to tell if a span is actually being sampled, we make this change to make its value reference `span.context.sampling_priority` which is what actually matters when it comes to sampling. This deprecates span.sampled and removes its use internally, replacing it with checks for span.context.sampling_priority to mimic behavior. If the value of `span.context.sampling_priority`, is `None` we know that sampling hasn't yet run, and then maintain the original behavior of `span.sampled` being `True` until sampling is run, if it's >0 we know that the span has been sampled, if <=0 we know the span has not been sampled. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Munir Abdinur --- ddtrace/_trace/span.py | 32 ++++++++++++++++++++++---- ddtrace/_trace/tracer.py | 3 +-- ddtrace/contrib/algoliasearch/patch.py | 3 +-- ddtrace/contrib/elasticsearch/patch.py | 4 ++-- ddtrace/contrib/psycopg/extensions.py | 2 +- ddtrace/internal/sampling.py | 1 - ddtrace/llmobs/_integrations/base.py | 5 ++-- tests/tracer/test_sampler.py | 14 +++++++---- tests/tracer/test_tracer.py | 20 ++++++++-------- 9 files changed, 55 insertions(+), 29 deletions(-) diff --git a/ddtrace/_trace/span.py b/ddtrace/_trace/span.py index 2460e8fce6..bd290c9106 100644 --- a/ddtrace/_trace/span.py +++ b/ddtrace/_trace/span.py @@ -42,6 +42,8 @@ from ddtrace.internal.logger import get_logger from ddtrace.internal.sampling import SamplingMechanism from ddtrace.internal.sampling import set_sampling_decision_maker +from ddtrace.internal.utils.deprecations import DDTraceDeprecationWarning +from ddtrace.vendor.debtcollector import deprecate _NUMERIC_TAGS = (ANALYTICS_SAMPLE_RATE_KEY,) @@ -82,8 +84,6 @@ class Span(object): "span_type", "start_ns", "duration_ns", - # Sampler attributes - "sampled", # Internal attributes "_context", "_local_root", @@ -168,10 +168,8 @@ def __init__( self.parent_id = parent_id # type: Optional[int] self._on_finish_callbacks = [] if on_finish is None else on_finish - # sampling - self.sampled = True # type: bool - self._context = context._with_span(self) if context else None # type: Optional[Context] + self._links = {} # type: Dict[int, SpanLink] if links: self._links = {link.span_id: link for link in links} @@ -260,6 +258,30 @@ def duration(self, value): # type: (float) -> None self.duration_ns = int(value * 1e9) + @property + def sampled(self): + # type: () -> Optional[bool] + deprecate( + "span.sampled is deprecated and will be removed in a future version of the tracer.", + message="""span.sampled references the state of span.context.sampling_priority. + Please use span.context.sampling_priority instead to check if a span is sampled.""", + category=DDTraceDeprecationWarning, + ) + if self.context.sampling_priority is None: + # this maintains original span.sampled behavior, where all spans would start + # with span.sampled = True until sampling runs + return True + return self.context.sampling_priority > 0 + + @sampled.setter + def sampled(self, value): + deprecate( + "span.sampled is deprecated and will be removed in a future version of the tracer.", + message="""span.sampled has a no-op setter. + Please use span.set_tag('manual.keep'/'manual.drop') to keep or drop spans.""", + category=DDTraceDeprecationWarning, + ) + def finish(self, finish_time=None): # type: (Optional[float]) -> None """Mark the end time of the span and submit it to the tracer. diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index e7ad1b07f5..dca5fe1b8d 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -695,7 +695,6 @@ def _start_span( # Extra attributes when from a local parent if parent: - span.sampled = parent.sampled span._parent = parent span._local_root = parent._local_root @@ -750,7 +749,7 @@ def _start_span( self._services.add(service) if not trace_id: - span.sampled = self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler)) + self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler)) # Only call span processors if the tracer is enabled if self.enabled: diff --git a/ddtrace/contrib/algoliasearch/patch.py b/ddtrace/contrib/algoliasearch/patch.py index b576e4d108..5408504fc7 100644 --- a/ddtrace/contrib/algoliasearch/patch.py +++ b/ddtrace/contrib/algoliasearch/patch.py @@ -125,8 +125,7 @@ def _patched_search(func, instance, wrapt_args, wrapt_kwargs): span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) span.set_tag(SPAN_MEASURED_KEY) - - if not span.sampled: + if span.context.sampling_priority is None or span.context.sampling_priority <= 0: return func(*wrapt_args, **wrapt_kwargs) if config.algoliasearch.collect_query_text: diff --git a/ddtrace/contrib/elasticsearch/patch.py b/ddtrace/contrib/elasticsearch/patch.py index f23a997147..c2383965eb 100644 --- a/ddtrace/contrib/elasticsearch/patch.py +++ b/ddtrace/contrib/elasticsearch/patch.py @@ -142,8 +142,8 @@ def _perform_request(func, instance, args, kwargs): span.set_tag(SPAN_MEASURED_KEY) - # Don't instrument if the trace is not sampled - if not span.sampled: + # Only instrument if trace is sampled or if we haven't tried to sample yet + if span.context.sampling_priority is None or span.context.sampling_priority <= 0: yield func(*args, **kwargs) return diff --git a/ddtrace/contrib/psycopg/extensions.py b/ddtrace/contrib/psycopg/extensions.py index a801114bff..7acbb34b90 100644 --- a/ddtrace/contrib/psycopg/extensions.py +++ b/ddtrace/contrib/psycopg/extensions.py @@ -43,7 +43,7 @@ def execute(self, query, vars=None): # noqa: A002 s.set_tag_str(SPAN_KIND, SpanKind.CLIENT) s.set_tag(SPAN_MEASURED_KEY) - if not s.sampled: + if s.context.sampling_priority is None or s.context.sampling_priority <= 0: return super(TracedCursor, self).execute(query, vars) s.resource = query diff --git a/ddtrace/internal/sampling.py b/ddtrace/internal/sampling.py index c935bf5a14..0d5aa1a278 100644 --- a/ddtrace/internal/sampling.py +++ b/ddtrace/internal/sampling.py @@ -305,7 +305,6 @@ def _apply_rate_limit(span, sampled, limiter): def _set_priority(span, priority): # type: (Span, int) -> None span.context.sampling_priority = priority - span.sampled = priority > 0 # Positive priorities mean it was kept def _get_highest_precedence_rule_matching(span, rules): diff --git a/ddtrace/llmobs/_integrations/base.py b/ddtrace/llmobs/_integrations/base.py index 148adfea06..58cd262038 100644 --- a/ddtrace/llmobs/_integrations/base.py +++ b/ddtrace/llmobs/_integrations/base.py @@ -73,12 +73,13 @@ def llmobs_enabled(self) -> bool: return config._llmobs_enabled def is_pc_sampled_span(self, span: Span) -> bool: - if not span.sampled: + if span.context.sampling_priority is None or span.context.sampling_priority <= 0: return False return self._span_pc_sampler.sample(span) def is_pc_sampled_log(self, span: Span) -> bool: - if not self.logs_enabled or not span.sampled: + sampled = span.context.sampling_priority is not None or span.context.sampling_priority <= 0 # type: ignore + if not self.logs_enabled or not sampled: return False return self._log_pc_sampler.sample(span) diff --git a/tests/tracer/test_sampler.py b/tests/tracer/test_sampler.py index 86497b0794..2839a1c9a5 100644 --- a/tests/tracer/test_sampler.py +++ b/tests/tracer/test_sampler.py @@ -104,7 +104,11 @@ def test_sample_rate_deviation(self): samples = tracer.pop() # non sampled spans do not have sample rate applied - sampled_spans = [s for s in samples if s.sampled] + sampled_spans = [s for s in samples if s.get_metric(SAMPLE_RATE_METRIC_KEY)] + if sample_rate != 1: + assert len(sampled_spans) != len(samples) + else: + assert len(sampled_spans) == len(samples) assert ( sampled_spans[0].get_metric(SAMPLE_RATE_METRIC_KEY) == sample_rate ), "Sampled span should have sample rate properly assigned" @@ -131,7 +135,8 @@ def test_deterministic_behavior(self): assert ( len(samples) <= 1 ), "evaluating sampling rules against a span should result in either dropping or not dropping it" - sampled = 1 == len([sample for sample in samples if sample.sampled is True]) + # sample rate metric is only set on sampled spans + sampled = 1 == len([sample for sample in samples if sample.get_metric(SAMPLE_RATE_METRIC_KEY) is not None]) for _ in range(10): other_span = Span(str(i), trace_id=span.trace_id) assert sampled == tracer._sampler.sample( @@ -916,10 +921,11 @@ def test_datadog_sampler_sample_rules(sampler, sampling_priority, sampling_mecha dummy_tracer.configure(sampler=sampler) dummy_tracer.trace("span").finish() spans = dummy_tracer.pop() - assert len(spans) > 0, "A tracer using DatadogSampler should always emit its spans" span = spans[0] - assert span.sampled, "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set" + assert ( + span.context.sampling_priority is not None + ), "A span emitted from a tracer using DatadogSampler should always have the 'sampled' flag set" trace_tag = "-%d" % sampling_mechanism if sampling_mechanism is not None else None assert_sampling_decision_tags( span, rule=rule, limit=limit, sampling_priority=sampling_priority, trace_tag=trace_tag diff --git a/tests/tracer/test_tracer.py b/tests/tracer/test_tracer.py index dad41a74cb..d43a35f2a2 100644 --- a/tests/tracer/test_tracer.py +++ b/tests/tracer/test_tracer.py @@ -1713,25 +1713,25 @@ def test_context_priority(tracer, test_spans): def test_spans_sampled_out(tracer, test_spans): with tracer.trace("root") as span: - span.sampled = False + span.context.sampling_priority = 0 with tracer.trace("child") as span: - span.sampled = False + span.context.sampling_priority = 0 with tracer.trace("child") as span: - span.sampled = False + span.context.sampling_priority = 0 spans = test_spans.pop() assert len(spans) == 3 for span in spans: - assert span.sampled is False + assert span.context.sampling_priority <= 0 def test_spans_sampled_one(tracer, test_spans): with tracer.trace("root") as span: - span.sampled = False + span.context.sampling_priority = 0 with tracer.trace("child") as span: - span.sampled = False + span.context.sampling_priority = 0 with tracer.trace("child") as span: - span.sampled = True + span.context.sampling_priority = 1 spans = test_spans.pop() assert len(spans) == 3 @@ -1739,11 +1739,11 @@ def test_spans_sampled_one(tracer, test_spans): def test_spans_sampled_all(tracer, test_spans): with tracer.trace("root") as span: - span.sampled = True + span.context.sampling_priority = 1 with tracer.trace("child") as span: - span.sampled = True + span.context.sampling_priority = 1 with tracer.trace("child") as span: - span.sampled = True + span.context.sampling_priority = 1 spans = test_spans.pop() assert len(spans) == 3