From 8287871dc66d26a663c6b74481d813d85b464e55 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 20 Feb 2024 13:52:03 -0500 Subject: [PATCH 01/69] fix history --- ddtrace/_trace/processor/__init__.py | 17 ++++++++++-- ddtrace/_trace/span.py | 2 ++ ddtrace/_trace/tracer.py | 27 ++++++++++++++----- ddtrace/contrib/aiohttp/patch.py | 2 +- ddtrace/contrib/botocore/patch.py | 4 +-- ddtrace/contrib/botocore/services/kinesis.py | 18 +++++++------ ddtrace/contrib/botocore/services/sqs.py | 16 ++++++----- .../botocore/services/stepfunctions.py | 9 ++++--- ddtrace/contrib/botocore/utils.py | 10 +++---- ddtrace/contrib/celery/signals.py | 2 +- .../contrib/grpc/aio_client_interceptor.py | 2 +- ddtrace/contrib/grpc/client_interceptor.py | 2 +- ddtrace/contrib/httplib/patch.py | 2 +- ddtrace/contrib/httpx/patch.py | 10 +++---- ddtrace/contrib/kafka/patch.py | 2 +- ddtrace/contrib/requests/connection.py | 2 +- ddtrace/contrib/rq/__init__.py | 2 +- ddtrace/contrib/urllib3/patch.py | 2 +- ddtrace/propagation/http.py | 11 ++++++-- ...tracetagsprocessor_only_adds_new_tags.json | 10 +++---- ..._single_span_sampling[sampling_rule0].json | 14 ++++++---- 21 files changed, 107 insertions(+), 59 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index cf8fdc9fd24..af61a1c8159 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -143,14 +143,23 @@ class TraceSamplingProcessor(TraceProcessor): """ _compute_stats_enabled = attr.ib(type=bool) + sampler = attr.ib() def process_trace(self, trace): # type: (List[Span]) -> Optional[List[Span]] if trace: + chunk_root = trace[0] + root_ctx = chunk_root._context + + # only sample if we haven't already sampled + # this sampling decision can still be overridden manually + if root_ctx and not root_ctx.sampling_priority: + _update_span_tags_from_context(chunk_root, root_ctx) + self.sampler.sample(trace[0]) # When stats computation is enabled in the tracer then we can # safely drop the traces. if self._compute_stats_enabled: - priority = trace[0]._context.sampling_priority if trace[0]._context is not None else None + priority = root_ctx.sampling_priority if root_ctx is not None else None if priority is not None and priority <= 0: # When any span is marked as keep by a single span sampling # decision then we still send all and only those spans. @@ -188,6 +197,10 @@ def on_span_finish(self, span): span.set_metric("_dd.top_level", 1) +def _update_span_tags_from_context(span, context): + span._context._update_tags(span) + + @attr.s class TraceTagsProcessor(TraceProcessor): """Processor that applies trace-level tags to the trace.""" @@ -211,7 +224,7 @@ def process_trace(self, trace): if not ctx: return trace - ctx._update_tags(chunk_root) + _update_span_tags_from_context(chunk_root, ctx) self._set_git_metadata(chunk_root) chunk_root.set_tag_str("language", "python") # for 128 bit trace ids diff --git a/ddtrace/_trace/span.py b/ddtrace/_trace/span.py index 8fc7e298efe..0f6021e4ab6 100644 --- a/ddtrace/_trace/span.py +++ b/ddtrace/_trace/span.py @@ -286,6 +286,8 @@ def _finish_ns(self, finish_time_ns): def _override_sampling_decision(self, decision): self.context.sampling_priority = decision set_sampling_decision_maker(self.context, SamplingMechanism.MANUAL) + # need to update the root span to keep track of the sampler ran there + # if user has set manual keep, we can avoid running sampling later for key in (SAMPLING_RULE_DECISION, SAMPLING_AGENT_DECISION, SAMPLING_LIMIT_DECISION): if key in self._local_root._metrics: del self._local_root._metrics[key] diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 636a1aae8a2..0950383a9b5 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -64,7 +64,6 @@ from ddtrace.sampler import BasePrioritySampler from ddtrace.sampler import BaseSampler from ddtrace.sampler import DatadogSampler -from ddtrace.sampler import RateSampler from ddtrace.settings.asm import config as asm_config from ddtrace.settings.peer_service import _ps_config @@ -113,13 +112,17 @@ def _default_span_processors_factory( compute_stats_enabled: bool, single_span_sampling_rules: List[SpanSamplingRule], agent_url: str, + trace_sampler: BaseSampler, profiling_span_processor: EndpointCallCounterProcessor, ) -> Tuple[List[SpanProcessor], Optional[Any], List[SpanProcessor]]: # FIXME: type should be AppsecSpanProcessor but we have a cyclic import here """Construct the default list of span processors to use.""" trace_processors: List[TraceProcessor] = [] - trace_processors += [TraceTagsProcessor(), PeerServiceProcessor(_ps_config), BaseServiceProcessor()] - trace_processors += [TraceSamplingProcessor(compute_stats_enabled)] + trace_processors += [PeerServiceProcessor(_ps_config), BaseServiceProcessor()] + trace_processors += [ + TraceSamplingProcessor(compute_stats_enabled, trace_sampler), + TraceTagsProcessor(), + ] trace_processors += trace_filters span_processors: List[SpanProcessor] = [] @@ -262,6 +265,7 @@ def __init__( self._compute_stats, self._single_span_sampling_rules, self._agent_url, + self._sampler, self._endpoint_call_counter_span_processor, ) if config._data_streams_enabled: @@ -497,6 +501,7 @@ def configure( self._compute_stats, self._single_span_sampling_rules, self._agent_url, + self._sampler, self._endpoint_call_counter_span_processor, ) @@ -562,6 +567,7 @@ def _child_after_fork(self): self._compute_stats, self._single_span_sampling_rules, self._agent_url, + self._sampler, self._endpoint_call_counter_span_processor, ) @@ -747,9 +753,6 @@ def _start_span( if service and service not in self._services and self._is_span_internal(span): self._services.add(service) - if not trace_id: - span.sampled = self._sampler.sample(span, allow_false=isinstance(self._sampler, RateSampler)) - # Only call span processors if the tracer is enabled if self.enabled: for p in chain(self._span_processors, SpanProcessor.__processors__, self._deferred_processors): @@ -1075,8 +1078,20 @@ def _on_global_config_update(self, cfg, items): sample_rate = cfg._trace_sample_rate else: sample_rate = None + sampler = DatadogSampler(default_sample_rate=sample_rate) + self._sampler = sampler + # we need to update the processor that uses the span + # alternatively we could re-run tracer.configure() + for aggregator in self._deferred_processors: + if type(aggregator) == SpanAggregator: + for processor in aggregator._trace_processors: + if type(processor) == TraceSamplingProcessor: + processor.sampler = self._sampler + break + else: + log.debug("No TraceSamplingProcessor available to update sampling rate from remote config") if "tags" in items: self._tags = cfg.tags.copy() diff --git a/ddtrace/contrib/aiohttp/patch.py b/ddtrace/contrib/aiohttp/patch.py index 3830ee4ace5..a776bdef6d1 100644 --- a/ddtrace/contrib/aiohttp/patch.py +++ b/ddtrace/contrib/aiohttp/patch.py @@ -88,7 +88,7 @@ async def _traced_clientsession_request(aiohttp, pin, func, instance, args, kwar span.service = url.host if pin._config["distributed_tracing"]: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, sampler=pin.tracer._sampler, span=span) kwargs["headers"] = headers span.set_tag_str(COMPONENT, config.aiohttp_client.integration_name) diff --git a/ddtrace/contrib/botocore/patch.py b/ddtrace/contrib/botocore/patch.py index 81f2809e02e..cd203c85446 100644 --- a/ddtrace/contrib/botocore/patch.py +++ b/ddtrace/contrib/botocore/patch.py @@ -209,12 +209,12 @@ def patched_api_call_fallback(original_func, instance, args, kwargs, function_va if config.botocore["distributed_tracing"]: try: if endpoint_name == "lambda" and operation == "Invoke": - inject_trace_to_client_context(params, span) + inject_trace_to_client_context(params, span, pin.tracer) span.name = schematize_cloud_faas_operation( trace_operation, cloud_provider="aws", cloud_service="lambda" ) if endpoint_name == "events" and operation == "PutEvents": - inject_trace_to_eventbridge_detail(params, span) + inject_trace_to_eventbridge_detail(params, span, pin.tracer) span.name = schematize_cloud_messaging_operation( trace_operation, cloud_provider="aws", diff --git a/ddtrace/contrib/botocore/services/kinesis.py b/ddtrace/contrib/botocore/services/kinesis.py index 2785545d8c1..34a2272cf4c 100644 --- a/ddtrace/contrib/botocore/services/kinesis.py +++ b/ddtrace/contrib/botocore/services/kinesis.py @@ -36,11 +36,12 @@ class TraceInjectionSizeExceed(Exception): pass -def inject_trace_to_kinesis_stream_data(record, span): - # type: (Dict[str, Any], Span) -> None +def inject_trace_to_kinesis_stream_data(record, span, tracer): + # type: (Dict[str, Any], Span, Any) -> None """ :record: contains args for the current botocore action, Kinesis record is at index 1 :span: the span which provides the trace context to be propagated + :tracer: the tracer which provices the sampler for sampling Inject trace headers into the Kinesis record's Data field in addition to the existing data. Only possible if the existing data is JSON string or base64 encoded JSON string Max data size per record is 1MB (https://aws.amazon.com/kinesis/data-streams/faqs/) @@ -53,7 +54,7 @@ def inject_trace_to_kinesis_stream_data(record, span): line_break, data_obj = get_kinesis_data_object(data) if data_obj is not None: data_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, data_obj["_datadog"]) + HTTPPropagator.inject(span.context, data_obj["_datadog"], sampler=tracer._sampler, span=span) data_json = json.dumps(data_obj) # if original string had a line break, add it back @@ -70,11 +71,12 @@ def inject_trace_to_kinesis_stream_data(record, span): record["Data"] = data_json -def inject_trace_to_kinesis_stream(params, span): - # type: (List[Any], Span) -> None +def inject_trace_to_kinesis_stream(params, span, tracer): + # type: (List[Any], Span, Any) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated + :tracer: the tracer which provices the sampler for sampling Max data size per record is 1MB (https://aws.amazon.com/kinesis/data-streams/faqs/) """ core.dispatch("botocore.kinesis.start", [params]) @@ -83,9 +85,9 @@ def inject_trace_to_kinesis_stream(params, span): if records: record = records[0] - inject_trace_to_kinesis_stream_data(record, span) + inject_trace_to_kinesis_stream_data(record, span, tracer) elif "Data" in params: - inject_trace_to_kinesis_stream_data(params, span) + inject_trace_to_kinesis_stream_data(params, span, tracer) def patched_kinesis_api_call(original_func, instance, args, kwargs, function_vars): @@ -141,7 +143,7 @@ def patched_kinesis_api_call(original_func, instance, args, kwargs, function_var if args and config.botocore["distributed_tracing"]: try: if endpoint_name == "kinesis" and operation in {"PutRecord", "PutRecords"}: - inject_trace_to_kinesis_stream(params, span) + inject_trace_to_kinesis_stream(params, span, pin.tracer) span.name = schematize_cloud_messaging_operation( trace_operation, cloud_provider="aws", diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py index 21f9636768d..864a3204a77 100644 --- a/ddtrace/contrib/botocore/services/sqs.py +++ b/ddtrace/contrib/botocore/services/sqs.py @@ -66,17 +66,18 @@ def inject_trace_data_to_message_attributes(trace_data, entry, endpoint_service= log.warning("skipping trace injection, max number (10) of MessageAttributes exceeded") -def inject_trace_to_sqs_or_sns_batch_message(params, span, endpoint_service=None): - # type: (Any, Span, Optional[str]) -> None +def inject_trace_to_sqs_or_sns_batch_message(params, span, tracer, endpoint_service=None): + # type: (Any, Span, Optional[str], Any) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated + :tracer: the tracer which provices the sampler for sampling :endpoint_service: endpoint of message, "sqs" or "sns" Inject trace headers info into MessageAttributes for all SQS or SNS records inside a batch """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data) + HTTPPropagator.inject(span.context, trace_data, sampler=tracer._sampler, span=span) # An entry here is an SNS or SQS record, and depending on how it was published, # it could either show up under Entries (in case of PutRecords), @@ -90,16 +91,17 @@ def inject_trace_to_sqs_or_sns_batch_message(params, span, endpoint_service=None log.warning("Skipping injecting Datadog attributes to records, no records available") -def inject_trace_to_sqs_or_sns_message(params, span, endpoint_service=None): - # type: (Any, Span, Optional[str]) -> None +def inject_trace_to_sqs_or_sns_message(params, span, tracer, endpoint_service=None): + # type: (Any, Span, Optional[str], Any) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated + :tracer: the tracer which provices the sampler for sampling :endpoint_service: endpoint of message, "sqs" or "sns" Inject trace headers info into MessageAttributes for the SQS or SNS record """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data) + HTTPPropagator.inject(span.context, trace_data, sampler=tracer._sampler, span=span) core.dispatch("botocore.sqs_sns.start", [endpoint_service, trace_data, params]) inject_trace_data_to_message_attributes(trace_data, params, endpoint_service) @@ -169,6 +171,7 @@ def patched_sqs_api_call(original_func, instance, args, kwargs, function_vars): inject_trace_to_sqs_or_sns_message( params, span, + pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( @@ -181,6 +184,7 @@ def patched_sqs_api_call(original_func, instance, args, kwargs, function_vars): inject_trace_to_sqs_or_sns_batch_message( params, span, + pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( diff --git a/ddtrace/contrib/botocore/services/stepfunctions.py b/ddtrace/contrib/botocore/services/stepfunctions.py index 7aa32a514e3..36c28797505 100644 --- a/ddtrace/contrib/botocore/services/stepfunctions.py +++ b/ddtrace/contrib/botocore/services/stepfunctions.py @@ -21,11 +21,12 @@ log = get_logger(__name__) -def inject_trace_to_stepfunction_input(params, span): - # type: (Any, Span) -> None +def inject_trace_to_stepfunction_input(params, span, tracer): + # type: (Any, Span, Any) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated + :tracer: the tracer which provices the sampler for sampling Inject the trace headers into the StepFunction input if the input is a JSON string """ @@ -42,7 +43,7 @@ def inject_trace_to_stepfunction_input(params, span): log.warning("Input already has trace context.") return params["input"]["_datadog"] = {} - HTTPPropagator.inject(span.context, params["input"]["_datadog"]) + HTTPPropagator.inject(span.context, params["input"]["_datadog"], tracer._sampler, span=span) return elif isinstance(params["input"], str): @@ -54,7 +55,7 @@ def inject_trace_to_stepfunction_input(params, span): if isinstance(input_obj, dict): input_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, input_obj["_datadog"]) + HTTPPropagator.inject(span.context, input_obj["_datadog"], tracer._sampler, span=span) input_json = json.dumps(input_obj) params["input"] = input_json diff --git a/ddtrace/contrib/botocore/utils.py b/ddtrace/contrib/botocore/utils.py index 46a5e4ba362..c243c429abb 100644 --- a/ddtrace/contrib/botocore/utils.py +++ b/ddtrace/contrib/botocore/utils.py @@ -74,8 +74,8 @@ def get_kinesis_data_object(data): return None, None -def inject_trace_to_eventbridge_detail(params, span): - # type: (Any, Span) -> None +def inject_trace_to_eventbridge_detail(params, span, tracer): + # type: (Any, Span, Any) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated @@ -96,7 +96,7 @@ def inject_trace_to_eventbridge_detail(params, span): continue detail["_datadog"] = {} - HTTPPropagator.inject(span.context, detail["_datadog"]) + HTTPPropagator.inject(span.context, detail["_datadog"], sampler=tracer._sampler, span=span) detail_json = json.dumps(detail) # check if detail size will exceed max size with headers @@ -118,9 +118,9 @@ def modify_client_context(client_context_object, trace_headers): client_context_object["custom"] = trace_headers -def inject_trace_to_client_context(params, span): +def inject_trace_to_client_context(params, span, tracer): trace_headers = {} - HTTPPropagator.inject(span.context, trace_headers) + HTTPPropagator.inject(span.context, trace_headers, sampler=tracer._sampler, span=span) client_context_object = {} if "ClientContext" in params: try: diff --git a/ddtrace/contrib/celery/signals.py b/ddtrace/contrib/celery/signals.py index e16f5ecace9..625d230ac5b 100644 --- a/ddtrace/contrib/celery/signals.py +++ b/ddtrace/contrib/celery/signals.py @@ -137,7 +137,7 @@ def trace_before_publish(*args, **kwargs): if config.celery["distributed_tracing"]: trace_headers = {} - propagator.inject(span.context, trace_headers) + propagator.inject(span.context, trace_headers, sampler=pin.tracer._sampler, span=span) # put distributed trace headers where celery will propagate them task_headers = kwargs.get("headers") or {} diff --git a/ddtrace/contrib/grpc/aio_client_interceptor.py b/ddtrace/contrib/grpc/aio_client_interceptor.py index 2e08c64ffc4..3fef8bfa947 100644 --- a/ddtrace/contrib/grpc/aio_client_interceptor.py +++ b/ddtrace/contrib/grpc/aio_client_interceptor.py @@ -125,7 +125,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc_aio_client.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, tracer._sampler, span=span) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/grpc/client_interceptor.py b/ddtrace/contrib/grpc/client_interceptor.py index 59dc4ab10fa..253b29116fe 100644 --- a/ddtrace/contrib/grpc/client_interceptor.py +++ b/ddtrace/contrib/grpc/client_interceptor.py @@ -206,7 +206,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, sampler=tracer._sampler, span=span) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index 47aa60bb03f..0fbd248484c 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -98,7 +98,7 @@ def _wrap_request(func, instance, args, kwargs): headers = args[3] else: headers = kwargs.setdefault("headers", {}) - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, pin.tracer._sampler, span) except Exception: log.debug("error configuring request", exc_info=True) span = getattr(instance, "_datadog_span", None) diff --git a/ddtrace/contrib/httpx/patch.py b/ddtrace/contrib/httpx/patch.py index 11647da55e1..050ce68b117 100644 --- a/ddtrace/contrib/httpx/patch.py +++ b/ddtrace/contrib/httpx/patch.py @@ -84,12 +84,12 @@ def _get_service_name(pin, request): return ext_service(pin, config.httpx) -def _init_span(span, request): - # type: (Span, httpx.Request) -> None +def _init_span(span, request, tracer): + # type: (Span, httpx.Request, Any) -> None span.set_tag(SPAN_MEASURED_KEY) if distributed_tracing_enabled(config.httpx): - HTTPPropagator.inject(span.context, request.headers) + HTTPPropagator.inject(span.context, request.headers, sampler=tracer._sampler, span=span) sample_rate = config.httpx.get_analytics_sample_rate(use_global_config=True) if sample_rate is not None: @@ -131,7 +131,7 @@ async def _wrapped_async_send( # set span.kind to the operation type being performed span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) - _init_span(span, req) + _init_span(span, req, pin.tracer) resp = None try: resp = await wrapped(*args, **kwargs) @@ -160,7 +160,7 @@ def _wrapped_sync_send( # set span.kind to the operation type being performed span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) - _init_span(span, req) + _init_span(span, req, pin.tracer) resp = None try: resp = wrapped(*args, **kwargs) diff --git a/ddtrace/contrib/kafka/patch.py b/ddtrace/contrib/kafka/patch.py index 4218b11b4c2..25bd719e5eb 100644 --- a/ddtrace/contrib/kafka/patch.py +++ b/ddtrace/contrib/kafka/patch.py @@ -189,7 +189,7 @@ def traced_produce(func, instance, args, kwargs): if config.kafka.distributed_tracing_enabled: # inject headers with Datadog tags: headers = get_argument_value(args, kwargs, 6, "headers", True) or {} - Propagator.inject(span.context, headers) + Propagator.inject(span.context, headers, sampler=pin.tracer._sampler, span=span) args, kwargs = set_argument_value(args, kwargs, 6, "headers", headers) return func(*args, **kwargs) diff --git a/ddtrace/contrib/requests/connection.py b/ddtrace/contrib/requests/connection.py index 7f1f2431883..dc27744b33e 100644 --- a/ddtrace/contrib/requests/connection.py +++ b/ddtrace/contrib/requests/connection.py @@ -115,7 +115,7 @@ def _wrap_send(func, instance, args, kwargs): # propagate distributed tracing headers if cfg.get("distributed_tracing"): - HTTPPropagator.inject(span.context, request.headers) + HTTPPropagator.inject(span.context, request.headers, tracer._sampler, span) response = response_headers = None try: diff --git a/ddtrace/contrib/rq/__init__.py b/ddtrace/contrib/rq/__init__.py index 970f45d5199..01a45452a41 100644 --- a/ddtrace/contrib/rq/__init__.py +++ b/ddtrace/contrib/rq/__init__.py @@ -151,7 +151,7 @@ def traced_queue_enqueue_job(rq, pin, func, instance, args, kwargs): # If the queue is_async then add distributed tracing headers to the job if instance.is_async and config.rq.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, job.meta) + HTTPPropagator.inject(span.context, job.meta, pin.tracer._sampler, span) return func(*args, **kwargs) diff --git a/ddtrace/contrib/urllib3/patch.py b/ddtrace/contrib/urllib3/patch.py index a2ae3df30e9..975f97130d1 100644 --- a/ddtrace/contrib/urllib3/patch.py +++ b/ddtrace/contrib/urllib3/patch.py @@ -122,7 +122,7 @@ def _wrap_urlopen(func, instance, args, kwargs): if request_headers is None: request_headers = {} kwargs["headers"] = request_headers - HTTPPropagator.inject(span.context, request_headers) + HTTPPropagator.inject(span.context, request_headers, sampler=pin.tracer._sampler, span=span) if config.urllib3.analytics_enabled: span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.urllib3.get_analytics_sample_rate()) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 92dc4710b4a..ce1e75cbfa9 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -1,5 +1,6 @@ import re import sys +from typing import Any # noqa:F401 from typing import Dict # noqa:F401 from typing import FrozenSet # noqa:F401 from typing import List # noqa:F401 @@ -8,6 +9,8 @@ from typing import Tuple # noqa:F401 from typing import cast # noqa:F401 +from ddtrace._trace.span import Span # noqa:F401 + if sys.version_info >= (3, 8): from typing import Literal # noqa:F401 @@ -890,8 +893,8 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): return primary_context @staticmethod - def inject(span_context, headers): - # type: (Context, Dict[str, str]) -> None + def inject(span_context, headers, sampler=None, span=None): + # type: (Context, Dict[str, str], Any, Optional[Span]) -> None """Inject Context attributes that have to be propagated as HTTP headers. Here is an example using `requests`:: @@ -919,6 +922,10 @@ def parent_call(): for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] + if sampler and span: + if not span.context.sampling_priority: + sampler.sample(span._local_root) + if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject: diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracetagsprocessor_only_adds_new_tags.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracetagsprocessor_only_adds_new_tags.json index f961988a878..c87d41556e5 100644 --- a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracetagsprocessor_only_adds_new_tags.json +++ b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracetagsprocessor_only_adds_new_tags.json @@ -9,16 +9,16 @@ "type": "", "error": 0, "meta": { - "_dd.p.dm": "-0", + "_dd.p.tid": "65d4cb4600000000", "language": "python", - "runtime-id": "6f2eeb0b30c54268b96f6eca8a2d66ba" + "runtime-id": "1c638d86e4034cf685c2df4b2c21b792" }, "metrics": { "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 2, - "process_id": 15844 + "process_id": 81660 }, - "duration": 37975, - "start": 1690574476848896124 + "duration": 66000, + "start": 1708444486203789000 }]] diff --git a/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule0].json b/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule0].json index aeecdfd65ab..d4b7f0993a0 100644 --- a/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule0].json +++ b/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule0].json @@ -5,16 +5,20 @@ "resource": "child", "trace_id": 0, "span_id": 1, - "parent_id": 13700571859906052169, + "parent_id": 6958320721841808734, "type": "", "error": 0, "meta": { - "_dd.base_service": "" + "_dd.base_service": "", + "_dd.p.dm": "-3", + "_dd.p.tid": "65cfa9ba00000000", + "language": "python" }, "metrics": { "_dd.span_sampling.mechanism": 8, - "_dd.tracer_kr": 1.0 + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": -1 }, - "duration": 14791, - "start": 1692900312248783421 + "duration": 48000, + "start": 1708108218885144000 }]] From cf4f3ee9f0811016257de6669c0d9e6391c4ce21 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 20 Feb 2024 15:29:42 -0500 Subject: [PATCH 02/69] fix botocore tests due to missing tracer pass in --- ddtrace/contrib/botocore/patch.py | 1 + ddtrace/contrib/botocore/services/sqs.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/contrib/botocore/patch.py b/ddtrace/contrib/botocore/patch.py index cd203c85446..dfe07e06ea8 100644 --- a/ddtrace/contrib/botocore/patch.py +++ b/ddtrace/contrib/botocore/patch.py @@ -225,6 +225,7 @@ def patched_api_call_fallback(original_func, instance, args, kwargs, function_va inject_trace_to_sqs_or_sns_message( params, span, + pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py index 864a3204a77..6b7cf17f434 100644 --- a/ddtrace/contrib/botocore/services/sqs.py +++ b/ddtrace/contrib/botocore/services/sqs.py @@ -92,7 +92,7 @@ def inject_trace_to_sqs_or_sns_batch_message(params, span, tracer, endpoint_serv def inject_trace_to_sqs_or_sns_message(params, span, tracer, endpoint_service=None): - # type: (Any, Span, Optional[str], Any) -> None + # type: (Any, Span, Any, Optional[str]) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated From 8d8efb334b4c2a40c2f30582c7e63a4d2145bc74 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 20 Feb 2024 16:25:24 -0500 Subject: [PATCH 03/69] sample before dbm propagation --- ddtrace/propagation/_database_monitoring.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 9f3163d92ac..960da9b7613 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING # noqa:F401 from typing import Union # noqa:F401 +from ddtrace import tracer from ddtrace.internal.logger import get_logger from ddtrace.settings.peer_service import PeerServiceConfig from ddtrace.vendor.sqlcommenter import generate_sql_comment as _generate_sql_comment @@ -52,6 +53,7 @@ def __init__(self, sql_pos, sql_kw, sql_injector=default_sql_injector): def inject(self, dbspan, args, kwargs): dbm_comment = self._get_dbm_comment(dbspan) + tracer.sample(dbspan._local_root) if dbm_comment is None: # injection_mode is disabled return args, kwargs From 5b571e62612b7ef6889bcffc35d106d6e72b7f33 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 20 Feb 2024 17:10:18 -0500 Subject: [PATCH 04/69] just import tracer into propagator rather than pass in at integration level --- ddtrace/contrib/aiohttp/patch.py | 2 +- ddtrace/contrib/botocore/patch.py | 5 ++--- ddtrace/contrib/botocore/services/kinesis.py | 18 ++++++++---------- ddtrace/contrib/botocore/services/sqs.py | 16 ++++++---------- .../contrib/botocore/services/stepfunctions.py | 9 ++++----- ddtrace/contrib/botocore/utils.py | 10 +++++----- ddtrace/contrib/celery/signals.py | 2 +- ddtrace/contrib/grpc/aio_client_interceptor.py | 2 +- ddtrace/contrib/grpc/client_interceptor.py | 2 +- ddtrace/contrib/httplib/patch.py | 2 +- ddtrace/contrib/httpx/patch.py | 10 +++++----- ddtrace/contrib/kafka/patch.py | 2 +- ddtrace/contrib/requests/connection.py | 2 +- ddtrace/contrib/rq/__init__.py | 2 +- ddtrace/contrib/urllib3/patch.py | 2 +- ddtrace/propagation/http.py | 10 +++++----- 16 files changed, 44 insertions(+), 52 deletions(-) diff --git a/ddtrace/contrib/aiohttp/patch.py b/ddtrace/contrib/aiohttp/patch.py index a776bdef6d1..3830ee4ace5 100644 --- a/ddtrace/contrib/aiohttp/patch.py +++ b/ddtrace/contrib/aiohttp/patch.py @@ -88,7 +88,7 @@ async def _traced_clientsession_request(aiohttp, pin, func, instance, args, kwar span.service = url.host if pin._config["distributed_tracing"]: - HTTPPropagator.inject(span.context, headers, sampler=pin.tracer._sampler, span=span) + HTTPPropagator.inject(span.context, headers) kwargs["headers"] = headers span.set_tag_str(COMPONENT, config.aiohttp_client.integration_name) diff --git a/ddtrace/contrib/botocore/patch.py b/ddtrace/contrib/botocore/patch.py index dfe07e06ea8..81f2809e02e 100644 --- a/ddtrace/contrib/botocore/patch.py +++ b/ddtrace/contrib/botocore/patch.py @@ -209,12 +209,12 @@ def patched_api_call_fallback(original_func, instance, args, kwargs, function_va if config.botocore["distributed_tracing"]: try: if endpoint_name == "lambda" and operation == "Invoke": - inject_trace_to_client_context(params, span, pin.tracer) + inject_trace_to_client_context(params, span) span.name = schematize_cloud_faas_operation( trace_operation, cloud_provider="aws", cloud_service="lambda" ) if endpoint_name == "events" and operation == "PutEvents": - inject_trace_to_eventbridge_detail(params, span, pin.tracer) + inject_trace_to_eventbridge_detail(params, span) span.name = schematize_cloud_messaging_operation( trace_operation, cloud_provider="aws", @@ -225,7 +225,6 @@ def patched_api_call_fallback(original_func, instance, args, kwargs, function_va inject_trace_to_sqs_or_sns_message( params, span, - pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( diff --git a/ddtrace/contrib/botocore/services/kinesis.py b/ddtrace/contrib/botocore/services/kinesis.py index 34a2272cf4c..2785545d8c1 100644 --- a/ddtrace/contrib/botocore/services/kinesis.py +++ b/ddtrace/contrib/botocore/services/kinesis.py @@ -36,12 +36,11 @@ class TraceInjectionSizeExceed(Exception): pass -def inject_trace_to_kinesis_stream_data(record, span, tracer): - # type: (Dict[str, Any], Span, Any) -> None +def inject_trace_to_kinesis_stream_data(record, span): + # type: (Dict[str, Any], Span) -> None """ :record: contains args for the current botocore action, Kinesis record is at index 1 :span: the span which provides the trace context to be propagated - :tracer: the tracer which provices the sampler for sampling Inject trace headers into the Kinesis record's Data field in addition to the existing data. Only possible if the existing data is JSON string or base64 encoded JSON string Max data size per record is 1MB (https://aws.amazon.com/kinesis/data-streams/faqs/) @@ -54,7 +53,7 @@ def inject_trace_to_kinesis_stream_data(record, span, tracer): line_break, data_obj = get_kinesis_data_object(data) if data_obj is not None: data_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, data_obj["_datadog"], sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, data_obj["_datadog"]) data_json = json.dumps(data_obj) # if original string had a line break, add it back @@ -71,12 +70,11 @@ def inject_trace_to_kinesis_stream_data(record, span, tracer): record["Data"] = data_json -def inject_trace_to_kinesis_stream(params, span, tracer): - # type: (List[Any], Span, Any) -> None +def inject_trace_to_kinesis_stream(params, span): + # type: (List[Any], Span) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated - :tracer: the tracer which provices the sampler for sampling Max data size per record is 1MB (https://aws.amazon.com/kinesis/data-streams/faqs/) """ core.dispatch("botocore.kinesis.start", [params]) @@ -85,9 +83,9 @@ def inject_trace_to_kinesis_stream(params, span, tracer): if records: record = records[0] - inject_trace_to_kinesis_stream_data(record, span, tracer) + inject_trace_to_kinesis_stream_data(record, span) elif "Data" in params: - inject_trace_to_kinesis_stream_data(params, span, tracer) + inject_trace_to_kinesis_stream_data(params, span) def patched_kinesis_api_call(original_func, instance, args, kwargs, function_vars): @@ -143,7 +141,7 @@ def patched_kinesis_api_call(original_func, instance, args, kwargs, function_var if args and config.botocore["distributed_tracing"]: try: if endpoint_name == "kinesis" and operation in {"PutRecord", "PutRecords"}: - inject_trace_to_kinesis_stream(params, span, pin.tracer) + inject_trace_to_kinesis_stream(params, span) span.name = schematize_cloud_messaging_operation( trace_operation, cloud_provider="aws", diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py index 6b7cf17f434..21f9636768d 100644 --- a/ddtrace/contrib/botocore/services/sqs.py +++ b/ddtrace/contrib/botocore/services/sqs.py @@ -66,18 +66,17 @@ def inject_trace_data_to_message_attributes(trace_data, entry, endpoint_service= log.warning("skipping trace injection, max number (10) of MessageAttributes exceeded") -def inject_trace_to_sqs_or_sns_batch_message(params, span, tracer, endpoint_service=None): - # type: (Any, Span, Optional[str], Any) -> None +def inject_trace_to_sqs_or_sns_batch_message(params, span, endpoint_service=None): + # type: (Any, Span, Optional[str]) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated - :tracer: the tracer which provices the sampler for sampling :endpoint_service: endpoint of message, "sqs" or "sns" Inject trace headers info into MessageAttributes for all SQS or SNS records inside a batch """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data, sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, trace_data) # An entry here is an SNS or SQS record, and depending on how it was published, # it could either show up under Entries (in case of PutRecords), @@ -91,17 +90,16 @@ def inject_trace_to_sqs_or_sns_batch_message(params, span, tracer, endpoint_serv log.warning("Skipping injecting Datadog attributes to records, no records available") -def inject_trace_to_sqs_or_sns_message(params, span, tracer, endpoint_service=None): - # type: (Any, Span, Any, Optional[str]) -> None +def inject_trace_to_sqs_or_sns_message(params, span, endpoint_service=None): + # type: (Any, Span, Optional[str]) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated - :tracer: the tracer which provices the sampler for sampling :endpoint_service: endpoint of message, "sqs" or "sns" Inject trace headers info into MessageAttributes for the SQS or SNS record """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data, sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, trace_data) core.dispatch("botocore.sqs_sns.start", [endpoint_service, trace_data, params]) inject_trace_data_to_message_attributes(trace_data, params, endpoint_service) @@ -171,7 +169,6 @@ def patched_sqs_api_call(original_func, instance, args, kwargs, function_vars): inject_trace_to_sqs_or_sns_message( params, span, - pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( @@ -184,7 +181,6 @@ def patched_sqs_api_call(original_func, instance, args, kwargs, function_vars): inject_trace_to_sqs_or_sns_batch_message( params, span, - pin.tracer, endpoint_service=endpoint_name, ) span.name = schematize_cloud_messaging_operation( diff --git a/ddtrace/contrib/botocore/services/stepfunctions.py b/ddtrace/contrib/botocore/services/stepfunctions.py index 36c28797505..7aa32a514e3 100644 --- a/ddtrace/contrib/botocore/services/stepfunctions.py +++ b/ddtrace/contrib/botocore/services/stepfunctions.py @@ -21,12 +21,11 @@ log = get_logger(__name__) -def inject_trace_to_stepfunction_input(params, span, tracer): - # type: (Any, Span, Any) -> None +def inject_trace_to_stepfunction_input(params, span): + # type: (Any, Span) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated - :tracer: the tracer which provices the sampler for sampling Inject the trace headers into the StepFunction input if the input is a JSON string """ @@ -43,7 +42,7 @@ def inject_trace_to_stepfunction_input(params, span, tracer): log.warning("Input already has trace context.") return params["input"]["_datadog"] = {} - HTTPPropagator.inject(span.context, params["input"]["_datadog"], tracer._sampler, span=span) + HTTPPropagator.inject(span.context, params["input"]["_datadog"]) return elif isinstance(params["input"], str): @@ -55,7 +54,7 @@ def inject_trace_to_stepfunction_input(params, span, tracer): if isinstance(input_obj, dict): input_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, input_obj["_datadog"], tracer._sampler, span=span) + HTTPPropagator.inject(span.context, input_obj["_datadog"]) input_json = json.dumps(input_obj) params["input"] = input_json diff --git a/ddtrace/contrib/botocore/utils.py b/ddtrace/contrib/botocore/utils.py index c243c429abb..46a5e4ba362 100644 --- a/ddtrace/contrib/botocore/utils.py +++ b/ddtrace/contrib/botocore/utils.py @@ -74,8 +74,8 @@ def get_kinesis_data_object(data): return None, None -def inject_trace_to_eventbridge_detail(params, span, tracer): - # type: (Any, Span, Any) -> None +def inject_trace_to_eventbridge_detail(params, span): + # type: (Any, Span) -> None """ :params: contains the params for the current botocore action :span: the span which provides the trace context to be propagated @@ -96,7 +96,7 @@ def inject_trace_to_eventbridge_detail(params, span, tracer): continue detail["_datadog"] = {} - HTTPPropagator.inject(span.context, detail["_datadog"], sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, detail["_datadog"]) detail_json = json.dumps(detail) # check if detail size will exceed max size with headers @@ -118,9 +118,9 @@ def modify_client_context(client_context_object, trace_headers): client_context_object["custom"] = trace_headers -def inject_trace_to_client_context(params, span, tracer): +def inject_trace_to_client_context(params, span): trace_headers = {} - HTTPPropagator.inject(span.context, trace_headers, sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, trace_headers) client_context_object = {} if "ClientContext" in params: try: diff --git a/ddtrace/contrib/celery/signals.py b/ddtrace/contrib/celery/signals.py index 625d230ac5b..e16f5ecace9 100644 --- a/ddtrace/contrib/celery/signals.py +++ b/ddtrace/contrib/celery/signals.py @@ -137,7 +137,7 @@ def trace_before_publish(*args, **kwargs): if config.celery["distributed_tracing"]: trace_headers = {} - propagator.inject(span.context, trace_headers, sampler=pin.tracer._sampler, span=span) + propagator.inject(span.context, trace_headers) # put distributed trace headers where celery will propagate them task_headers = kwargs.get("headers") or {} diff --git a/ddtrace/contrib/grpc/aio_client_interceptor.py b/ddtrace/contrib/grpc/aio_client_interceptor.py index 3fef8bfa947..2e08c64ffc4 100644 --- a/ddtrace/contrib/grpc/aio_client_interceptor.py +++ b/ddtrace/contrib/grpc/aio_client_interceptor.py @@ -125,7 +125,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc_aio_client.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers, tracer._sampler, span=span) + HTTPPropagator.inject(span.context, headers) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/grpc/client_interceptor.py b/ddtrace/contrib/grpc/client_interceptor.py index 253b29116fe..59dc4ab10fa 100644 --- a/ddtrace/contrib/grpc/client_interceptor.py +++ b/ddtrace/contrib/grpc/client_interceptor.py @@ -206,7 +206,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers, sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, headers) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index 0fbd248484c..47aa60bb03f 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -98,7 +98,7 @@ def _wrap_request(func, instance, args, kwargs): headers = args[3] else: headers = kwargs.setdefault("headers", {}) - HTTPPropagator.inject(span.context, headers, pin.tracer._sampler, span) + HTTPPropagator.inject(span.context, headers) except Exception: log.debug("error configuring request", exc_info=True) span = getattr(instance, "_datadog_span", None) diff --git a/ddtrace/contrib/httpx/patch.py b/ddtrace/contrib/httpx/patch.py index 050ce68b117..11647da55e1 100644 --- a/ddtrace/contrib/httpx/patch.py +++ b/ddtrace/contrib/httpx/patch.py @@ -84,12 +84,12 @@ def _get_service_name(pin, request): return ext_service(pin, config.httpx) -def _init_span(span, request, tracer): - # type: (Span, httpx.Request, Any) -> None +def _init_span(span, request): + # type: (Span, httpx.Request) -> None span.set_tag(SPAN_MEASURED_KEY) if distributed_tracing_enabled(config.httpx): - HTTPPropagator.inject(span.context, request.headers, sampler=tracer._sampler, span=span) + HTTPPropagator.inject(span.context, request.headers) sample_rate = config.httpx.get_analytics_sample_rate(use_global_config=True) if sample_rate is not None: @@ -131,7 +131,7 @@ async def _wrapped_async_send( # set span.kind to the operation type being performed span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) - _init_span(span, req, pin.tracer) + _init_span(span, req) resp = None try: resp = await wrapped(*args, **kwargs) @@ -160,7 +160,7 @@ def _wrapped_sync_send( # set span.kind to the operation type being performed span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) - _init_span(span, req, pin.tracer) + _init_span(span, req) resp = None try: resp = wrapped(*args, **kwargs) diff --git a/ddtrace/contrib/kafka/patch.py b/ddtrace/contrib/kafka/patch.py index 25bd719e5eb..4218b11b4c2 100644 --- a/ddtrace/contrib/kafka/patch.py +++ b/ddtrace/contrib/kafka/patch.py @@ -189,7 +189,7 @@ def traced_produce(func, instance, args, kwargs): if config.kafka.distributed_tracing_enabled: # inject headers with Datadog tags: headers = get_argument_value(args, kwargs, 6, "headers", True) or {} - Propagator.inject(span.context, headers, sampler=pin.tracer._sampler, span=span) + Propagator.inject(span.context, headers) args, kwargs = set_argument_value(args, kwargs, 6, "headers", headers) return func(*args, **kwargs) diff --git a/ddtrace/contrib/requests/connection.py b/ddtrace/contrib/requests/connection.py index dc27744b33e..7f1f2431883 100644 --- a/ddtrace/contrib/requests/connection.py +++ b/ddtrace/contrib/requests/connection.py @@ -115,7 +115,7 @@ def _wrap_send(func, instance, args, kwargs): # propagate distributed tracing headers if cfg.get("distributed_tracing"): - HTTPPropagator.inject(span.context, request.headers, tracer._sampler, span) + HTTPPropagator.inject(span.context, request.headers) response = response_headers = None try: diff --git a/ddtrace/contrib/rq/__init__.py b/ddtrace/contrib/rq/__init__.py index 01a45452a41..970f45d5199 100644 --- a/ddtrace/contrib/rq/__init__.py +++ b/ddtrace/contrib/rq/__init__.py @@ -151,7 +151,7 @@ def traced_queue_enqueue_job(rq, pin, func, instance, args, kwargs): # If the queue is_async then add distributed tracing headers to the job if instance.is_async and config.rq.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, job.meta, pin.tracer._sampler, span) + HTTPPropagator.inject(span.context, job.meta) return func(*args, **kwargs) diff --git a/ddtrace/contrib/urllib3/patch.py b/ddtrace/contrib/urllib3/patch.py index 975f97130d1..a2ae3df30e9 100644 --- a/ddtrace/contrib/urllib3/patch.py +++ b/ddtrace/contrib/urllib3/patch.py @@ -122,7 +122,7 @@ def _wrap_urlopen(func, instance, args, kwargs): if request_headers is None: request_headers = {} kwargs["headers"] = request_headers - HTTPPropagator.inject(span.context, request_headers, sampler=pin.tracer._sampler, span=span) + HTTPPropagator.inject(span.context, request_headers) if config.urllib3.analytics_enabled: span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.urllib3.get_analytics_sample_rate()) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index ce1e75cbfa9..185023e7208 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -9,6 +9,7 @@ from typing import Tuple # noqa:F401 from typing import cast # noqa:F401 +from ddtrace import tracer from ddtrace._trace.span import Span # noqa:F401 @@ -893,8 +894,8 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): return primary_context @staticmethod - def inject(span_context, headers, sampler=None, span=None): - # type: (Context, Dict[str, str], Any, Optional[Span]) -> None + def inject(span_context, headers, span=None): + # type: (Context, Dict[str, str], Optional[Span]) -> None """Inject Context attributes that have to be propagated as HTTP headers. Here is an example using `requests`:: @@ -921,10 +922,9 @@ def parent_call(): if config.propagation_http_baggage_enabled is True and span_context._baggage is not None: for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] - - if sampler and span: + if tracer._sampler and span: if not span.context.sampling_priority: - sampler.sample(span._local_root) + tracer._sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) From 65def8b246a011b724b692bf34665b5bdf9fd448 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Wed, 21 Feb 2024 13:02:11 -0500 Subject: [PATCH 05/69] chore(trace): handle import of ddtrace.tracer --- ddtrace/__init__.py | 9 +++++++++ ddtrace/tracer.py | 4 ++-- tests/tracer/test_tracer.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/ddtrace/__init__.py b/ddtrace/__init__.py index 76c1195e42a..6002383a926 100644 --- a/ddtrace/__init__.py +++ b/ddtrace/__init__.py @@ -1,4 +1,5 @@ import sys +import warnings LOADED_MODULES = frozenset(sys.modules.keys()) @@ -41,6 +42,14 @@ from ddtrace.vendor import debtcollector from .version import get_version # noqa: E402 +# DEV: Import deprecated tracer module in order to retain side-effect of package +# initialization, which added this module to sys.modules. We catch deprecation +# warnings as this is only to retain a side effect of the package +# initialization. +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + from .tracer import Tracer as _ + __version__ = get_version() diff --git a/ddtrace/tracer.py b/ddtrace/tracer.py index 3ba7e72ac55..afb4e05492d 100644 --- a/ddtrace/tracer.py +++ b/ddtrace/tracer.py @@ -4,7 +4,7 @@ deprecate( - "The tracer module is deprecated and will be moved.", - message="A new tracer interface will be provided by the trace sub-package.", + "The ddtrace.tracer module is deprecated and will be removed.", + message="A new interface will be provided by the trace sub-package.", category=DDTraceDeprecationWarning, ) diff --git a/tests/tracer/test_tracer.py b/tests/tracer/test_tracer.py index 0963c93d7f8..dad41a74cb1 100644 --- a/tests/tracer/test_tracer.py +++ b/tests/tracer/test_tracer.py @@ -1969,3 +1969,18 @@ def test_installed_excepthook(): assert sys.excepthook is telemetry._excepthook # Reset exception hooks telemetry.uninstall_excepthook() + + +@pytest.mark.subprocess(parametrize={"IMPORT_DDTRACE_TRACER": ["true", "false"]}) +def test_import_ddtrace_tracer_not_module(): + import os + + import_ddtrace_tracer = os.environ["IMPORT_DDTRACE_TRACER"] == "true" + + if import_ddtrace_tracer: + import ddtrace.tracer # noqa: F401 + + from ddtrace import Tracer + from ddtrace import tracer + + assert isinstance(tracer, Tracer) From 5c41da72cce51b64ffa232e0160d43a6ce06c61b Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 22 Feb 2024 16:09:46 -0500 Subject: [PATCH 06/69] change tracer import --- ddtrace/propagation/http.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 980f25c6f77..54497d0efd3 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -9,7 +9,7 @@ from typing import Tuple # noqa:F401 from typing import cast # noqa:F401 -from ddtrace import tracer +import ddtrace from ddtrace._trace.span import Span # noqa:F401 @@ -926,9 +926,9 @@ def parent_call(): if config.propagation_http_baggage_enabled is True and span_context._baggage is not None: for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] - if tracer._sampler and span: + if ddtrace.tracer._sampler and span: if not span.context.sampling_priority: - tracer._sampler.sample(span._local_root) + ddtrace.tracer._sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) From b0767ada1eca9b5552c030dfed2e25615731e512 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 22 Feb 2024 16:21:05 -0500 Subject: [PATCH 07/69] sample correctly before database propagation --- ddtrace/propagation/_database_monitoring.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 960da9b7613..b7fac497216 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING # noqa:F401 from typing import Union # noqa:F401 -from ddtrace import tracer +import ddtrace from ddtrace.internal.logger import get_logger from ddtrace.settings.peer_service import PeerServiceConfig from ddtrace.vendor.sqlcommenter import generate_sql_comment as _generate_sql_comment @@ -53,7 +53,7 @@ def __init__(self, sql_pos, sql_kw, sql_injector=default_sql_injector): def inject(self, dbspan, args, kwargs): dbm_comment = self._get_dbm_comment(dbspan) - tracer.sample(dbspan._local_root) + ddtrace.tracer._sampler.sample(dbspan._local_root) if dbm_comment is None: # injection_mode is disabled return args, kwargs From fbfaabdd9e8a9d7824bbfc9c875bf47fa26e8d58 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 22 Feb 2024 17:27:29 -0500 Subject: [PATCH 08/69] add span into inject for httppropagator --- ddtrace/contrib/aiohttp/patch.py | 2 +- ddtrace/contrib/botocore/services/kinesis.py | 2 +- ddtrace/contrib/botocore/services/sqs.py | 4 ++-- ddtrace/contrib/botocore/services/stepfunctions.py | 4 ++-- ddtrace/contrib/botocore/utils.py | 4 ++-- ddtrace/contrib/celery/signals.py | 2 +- ddtrace/contrib/grpc/aio_client_interceptor.py | 2 +- ddtrace/contrib/grpc/client_interceptor.py | 2 +- ddtrace/contrib/httplib/patch.py | 2 +- ddtrace/contrib/httpx/patch.py | 2 +- ddtrace/contrib/kafka/patch.py | 2 +- ddtrace/contrib/kombu/patch.py | 2 +- ddtrace/contrib/requests/connection.py | 2 +- ddtrace/contrib/rq/__init__.py | 2 +- ddtrace/contrib/urllib3/patch.py | 2 +- ddtrace/propagation/http.py | 2 +- tests/contrib/urllib3/test_urllib3.py | 1 - 17 files changed, 19 insertions(+), 20 deletions(-) diff --git a/ddtrace/contrib/aiohttp/patch.py b/ddtrace/contrib/aiohttp/patch.py index 3830ee4ace5..b1d230f0895 100644 --- a/ddtrace/contrib/aiohttp/patch.py +++ b/ddtrace/contrib/aiohttp/patch.py @@ -88,7 +88,7 @@ async def _traced_clientsession_request(aiohttp, pin, func, instance, args, kwar span.service = url.host if pin._config["distributed_tracing"]: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, span) kwargs["headers"] = headers span.set_tag_str(COMPONENT, config.aiohttp_client.integration_name) diff --git a/ddtrace/contrib/botocore/services/kinesis.py b/ddtrace/contrib/botocore/services/kinesis.py index 2785545d8c1..15755a1706b 100644 --- a/ddtrace/contrib/botocore/services/kinesis.py +++ b/ddtrace/contrib/botocore/services/kinesis.py @@ -53,7 +53,7 @@ def inject_trace_to_kinesis_stream_data(record, span): line_break, data_obj = get_kinesis_data_object(data) if data_obj is not None: data_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, data_obj["_datadog"]) + HTTPPropagator.inject(span.context, data_obj["_datadog"], span) data_json = json.dumps(data_obj) # if original string had a line break, add it back diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py index 21f9636768d..1920dd5b93d 100644 --- a/ddtrace/contrib/botocore/services/sqs.py +++ b/ddtrace/contrib/botocore/services/sqs.py @@ -76,7 +76,7 @@ def inject_trace_to_sqs_or_sns_batch_message(params, span, endpoint_service=None """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data) + HTTPPropagator.inject(span.context, trace_data, span) # An entry here is an SNS or SQS record, and depending on how it was published, # it could either show up under Entries (in case of PutRecords), @@ -99,7 +99,7 @@ def inject_trace_to_sqs_or_sns_message(params, span, endpoint_service=None): Inject trace headers info into MessageAttributes for the SQS or SNS record """ trace_data = {} - HTTPPropagator.inject(span.context, trace_data) + HTTPPropagator.inject(span.context, trace_data, span) core.dispatch("botocore.sqs_sns.start", [endpoint_service, trace_data, params]) inject_trace_data_to_message_attributes(trace_data, params, endpoint_service) diff --git a/ddtrace/contrib/botocore/services/stepfunctions.py b/ddtrace/contrib/botocore/services/stepfunctions.py index 7aa32a514e3..2788f9cb7c9 100644 --- a/ddtrace/contrib/botocore/services/stepfunctions.py +++ b/ddtrace/contrib/botocore/services/stepfunctions.py @@ -42,7 +42,7 @@ def inject_trace_to_stepfunction_input(params, span): log.warning("Input already has trace context.") return params["input"]["_datadog"] = {} - HTTPPropagator.inject(span.context, params["input"]["_datadog"]) + HTTPPropagator.inject(span.context, params["input"]["_datadog"], span) return elif isinstance(params["input"], str): @@ -54,7 +54,7 @@ def inject_trace_to_stepfunction_input(params, span): if isinstance(input_obj, dict): input_obj["_datadog"] = {} - HTTPPropagator.inject(span.context, input_obj["_datadog"]) + HTTPPropagator.inject(span.context, input_obj["_datadog"], span) input_json = json.dumps(input_obj) params["input"] = input_json diff --git a/ddtrace/contrib/botocore/utils.py b/ddtrace/contrib/botocore/utils.py index 46a5e4ba362..1ef7b8a550e 100644 --- a/ddtrace/contrib/botocore/utils.py +++ b/ddtrace/contrib/botocore/utils.py @@ -96,7 +96,7 @@ def inject_trace_to_eventbridge_detail(params, span): continue detail["_datadog"] = {} - HTTPPropagator.inject(span.context, detail["_datadog"]) + HTTPPropagator.inject(span.context, detail["_datadog"], span) detail_json = json.dumps(detail) # check if detail size will exceed max size with headers @@ -120,7 +120,7 @@ def modify_client_context(client_context_object, trace_headers): def inject_trace_to_client_context(params, span): trace_headers = {} - HTTPPropagator.inject(span.context, trace_headers) + HTTPPropagator.inject(span.context, trace_headers, span) client_context_object = {} if "ClientContext" in params: try: diff --git a/ddtrace/contrib/celery/signals.py b/ddtrace/contrib/celery/signals.py index e16f5ecace9..70c97a0ddc1 100644 --- a/ddtrace/contrib/celery/signals.py +++ b/ddtrace/contrib/celery/signals.py @@ -137,7 +137,7 @@ def trace_before_publish(*args, **kwargs): if config.celery["distributed_tracing"]: trace_headers = {} - propagator.inject(span.context, trace_headers) + propagator.inject(span.context, trace_headers, span) # put distributed trace headers where celery will propagate them task_headers = kwargs.get("headers") or {} diff --git a/ddtrace/contrib/grpc/aio_client_interceptor.py b/ddtrace/contrib/grpc/aio_client_interceptor.py index 2e08c64ffc4..598bb814636 100644 --- a/ddtrace/contrib/grpc/aio_client_interceptor.py +++ b/ddtrace/contrib/grpc/aio_client_interceptor.py @@ -125,7 +125,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc_aio_client.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, span) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/grpc/client_interceptor.py b/ddtrace/contrib/grpc/client_interceptor.py index 59dc4ab10fa..57e1dcdb9a2 100644 --- a/ddtrace/contrib/grpc/client_interceptor.py +++ b/ddtrace/contrib/grpc/client_interceptor.py @@ -206,7 +206,7 @@ def _intercept_client_call(self, method_kind, client_call_details): # propagate distributed tracing headers if available headers = {} if config.grpc.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, span) metadata = [] if client_call_details.metadata is not None: diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index 47aa60bb03f..3e39d788d2c 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -98,7 +98,7 @@ def _wrap_request(func, instance, args, kwargs): headers = args[3] else: headers = kwargs.setdefault("headers", {}) - HTTPPropagator.inject(span.context, headers) + HTTPPropagator.inject(span.context, headers, span) except Exception: log.debug("error configuring request", exc_info=True) span = getattr(instance, "_datadog_span", None) diff --git a/ddtrace/contrib/httpx/patch.py b/ddtrace/contrib/httpx/patch.py index 11647da55e1..7e4581abf16 100644 --- a/ddtrace/contrib/httpx/patch.py +++ b/ddtrace/contrib/httpx/patch.py @@ -89,7 +89,7 @@ def _init_span(span, request): span.set_tag(SPAN_MEASURED_KEY) if distributed_tracing_enabled(config.httpx): - HTTPPropagator.inject(span.context, request.headers) + HTTPPropagator.inject(span.context, request.headers, span) sample_rate = config.httpx.get_analytics_sample_rate(use_global_config=True) if sample_rate is not None: diff --git a/ddtrace/contrib/kafka/patch.py b/ddtrace/contrib/kafka/patch.py index 4218b11b4c2..6b8e8bfc34d 100644 --- a/ddtrace/contrib/kafka/patch.py +++ b/ddtrace/contrib/kafka/patch.py @@ -189,7 +189,7 @@ def traced_produce(func, instance, args, kwargs): if config.kafka.distributed_tracing_enabled: # inject headers with Datadog tags: headers = get_argument_value(args, kwargs, 6, "headers", True) or {} - Propagator.inject(span.context, headers) + Propagator.inject(span.context, headers, span) args, kwargs = set_argument_value(args, kwargs, 6, "headers", headers) return func(*args, **kwargs) diff --git a/ddtrace/contrib/kombu/patch.py b/ddtrace/contrib/kombu/patch.py index e1297c8d196..1ced199745d 100644 --- a/ddtrace/contrib/kombu/patch.py +++ b/ddtrace/contrib/kombu/patch.py @@ -160,7 +160,7 @@ def traced_publish(func, instance, args, kwargs): s.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.kombu.get_analytics_sample_rate()) # run the command if config.kombu.distributed_tracing_enabled: - propagator.inject(s.context, args[HEADER_POS]) + propagator.inject(s.context, args[HEADER_POS], s) core.dispatch( "kombu.amqp.publish.pre", [args, kwargs, s] ) # Has to happen after trace injection for actual payload size diff --git a/ddtrace/contrib/requests/connection.py b/ddtrace/contrib/requests/connection.py index 7f1f2431883..ef992d37895 100644 --- a/ddtrace/contrib/requests/connection.py +++ b/ddtrace/contrib/requests/connection.py @@ -115,7 +115,7 @@ def _wrap_send(func, instance, args, kwargs): # propagate distributed tracing headers if cfg.get("distributed_tracing"): - HTTPPropagator.inject(span.context, request.headers) + HTTPPropagator.inject(span.context, request.headers, span) response = response_headers = None try: diff --git a/ddtrace/contrib/rq/__init__.py b/ddtrace/contrib/rq/__init__.py index 970f45d5199..968d4048fe6 100644 --- a/ddtrace/contrib/rq/__init__.py +++ b/ddtrace/contrib/rq/__init__.py @@ -151,7 +151,7 @@ def traced_queue_enqueue_job(rq, pin, func, instance, args, kwargs): # If the queue is_async then add distributed tracing headers to the job if instance.is_async and config.rq.distributed_tracing_enabled: - HTTPPropagator.inject(span.context, job.meta) + HTTPPropagator.inject(span.context, job.meta, span) return func(*args, **kwargs) diff --git a/ddtrace/contrib/urllib3/patch.py b/ddtrace/contrib/urllib3/patch.py index a2ae3df30e9..3ffc9b76845 100644 --- a/ddtrace/contrib/urllib3/patch.py +++ b/ddtrace/contrib/urllib3/patch.py @@ -122,7 +122,7 @@ def _wrap_urlopen(func, instance, args, kwargs): if request_headers is None: request_headers = {} kwargs["headers"] = request_headers - HTTPPropagator.inject(span.context, request_headers) + HTTPPropagator.inject(span.context, request_headers, span) if config.urllib3.analytics_enabled: span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.urllib3.get_analytics_sample_rate()) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 54497d0efd3..e56e6c4aec9 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -897,6 +897,7 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): primary_context._span_links = links return primary_context + # DEV: Change method signature to just take span and pull out context for next major version 3.0 @staticmethod def inject(span_context, headers, span=None): # type: (Context, Dict[str, str], Optional[Span]) -> None @@ -929,7 +930,6 @@ def parent_call(): if ddtrace.tracer._sampler and span: if not span.context.sampling_priority: ddtrace.tracer._sampler.sample(span._local_root) - if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject: diff --git a/tests/contrib/urllib3/test_urllib3.py b/tests/contrib/urllib3/test_urllib3.py index 67a16ebcf36..b8b08c7d68e 100644 --- a/tests/contrib/urllib3/test_urllib3.py +++ b/tests/contrib/urllib3/test_urllib3.py @@ -495,7 +495,6 @@ def test_distributed_tracing_enabled(self): "traceparent": s.context._traceparent, "tracestate": s.context._tracestate, } - if int(urllib3.__version__.split(".")[0]) >= 2: m_make_request.assert_called_with( mock.ANY, From 4bb7b17e5fe70e0cb54ba8cffabe48699260d82a Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 26 Feb 2024 14:58:48 -0500 Subject: [PATCH 09/69] update tags in sampler --- ddtrace/_trace/processor/__init__.py | 1 - ddtrace/_trace/span.py | 2 -- ddtrace/sampler.py | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index af61a1c8159..df39efd3eb0 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -154,7 +154,6 @@ def process_trace(self, trace): # only sample if we haven't already sampled # this sampling decision can still be overridden manually if root_ctx and not root_ctx.sampling_priority: - _update_span_tags_from_context(chunk_root, root_ctx) self.sampler.sample(trace[0]) # When stats computation is enabled in the tracer then we can # safely drop the traces. diff --git a/ddtrace/_trace/span.py b/ddtrace/_trace/span.py index 3da3c0f38e8..2460e8fce6f 100644 --- a/ddtrace/_trace/span.py +++ b/ddtrace/_trace/span.py @@ -286,8 +286,6 @@ def _finish_ns(self, finish_time_ns): def _override_sampling_decision(self, decision): self.context.sampling_priority = decision set_sampling_decision_maker(self.context, SamplingMechanism.MANUAL) - # need to update the root span to keep track of the sampler ran there - # if user has set manual keep, we can avoid running sampling later for key in (SAMPLING_RULE_DECISION, SAMPLING_AGENT_DECISION, SAMPLING_LIMIT_DECISION): if key in self._local_root._metrics: del self._local_root._metrics[key] diff --git a/ddtrace/sampler.py b/ddtrace/sampler.py index 11828f6758f..d9c1a29f894 100644 --- a/ddtrace/sampler.py +++ b/ddtrace/sampler.py @@ -304,6 +304,9 @@ def sample(self, span, allow_false=True): """ If allow_false is False, this function will return True regardless of the sampling decision """ + # update tags from the context so we don't miss any when evaluating sampling + span.context._update_tags(span) + matched_rule = _get_highest_precedence_rule_matching(span, self.rules) sampler = self._default_sampler # type: BaseSampler From 3dcbfa2b7bdf0d313603397d520ac1d53011efe2 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 26 Feb 2024 15:32:06 -0500 Subject: [PATCH 10/69] fix dbm inject to sample before generating comment --- ddtrace/propagation/_database_monitoring.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 97e1f1fa0cf..463ac727249 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -63,8 +63,11 @@ def __init__( self.peer_db_name_tag = peer_db_name_tag def inject(self, dbspan, args, kwargs): + # run sampling before injection to propagate correct sampling priority + if ddtrace.tracer._sampler and not dbspan.context.sampling_priority: + ddtrace.tracer._sampler.sample(dbspan._local_root) + dbm_comment = self._get_dbm_comment(dbspan) - ddtrace.tracer._sampler.sample(dbspan._local_root) if dbm_comment is None: # injection_mode is disabled return args, kwargs From 60789ee8a14d3a83c9ecff5e933ed5b9f20a036a Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 26 Feb 2024 16:37:58 -0500 Subject: [PATCH 11/69] fix remote config updating sampler when resetting to user original setting --- ddtrace/_trace/processor/__init__.py | 1 - ddtrace/_trace/tracer.py | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index df39efd3eb0..b555f75c5f3 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -152,7 +152,6 @@ def process_trace(self, trace): root_ctx = chunk_root._context # only sample if we haven't already sampled - # this sampling decision can still be overridden manually if root_ctx and not root_ctx.sampling_priority: self.sampler.sample(trace[0]) # When stats computation is enabled in the tracer then we can diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 3d2b20acad6..0377ddfb34a 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -1074,6 +1074,7 @@ def _on_global_config_update(self, cfg, items): # Reset the user sampler if one exists if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler: self._sampler = self._user_sampler + self._update_sampler_on_processor() return if cfg._get_source("_trace_sample_rate") != "default": @@ -1084,16 +1085,7 @@ def _on_global_config_update(self, cfg, items): sampler = DatadogSampler(default_sample_rate=sample_rate) self._sampler = sampler - # we need to update the processor that uses the span - # alternatively we could re-run tracer.configure() - for aggregator in self._deferred_processors: - if type(aggregator) == SpanAggregator: - for processor in aggregator._trace_processors: - if type(processor) == TraceSamplingProcessor: - processor.sampler = self._sampler - break - else: - log.debug("No TraceSamplingProcessor available to update sampling rate from remote config") + self._update_sampler_on_processor() if "tags" in items: self._tags = cfg.tags.copy() @@ -1116,3 +1108,15 @@ def _on_global_config_update(self, cfg, items): from ddtrace.contrib.logging import unpatch unpatch() + + def _update_sampler_on_processor(self): + # we need to update the processor that uses the span + # alternatively we could re-run tracer.configure() + for aggregator in self._deferred_processors: + if type(aggregator) == SpanAggregator: + for processor in aggregator._trace_processors: + if type(processor) == TraceSamplingProcessor: + processor.sampler = self._sampler + break + else: + log.debug("No TraceSamplingProcessor available to update sampling rate from remote config") From 33f17778889fefda5b13dd07318518ced8eaed07 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 12:49:37 -0500 Subject: [PATCH 12/69] run sampler before generating dbm comments in tests --- tests/internal/test_database_monitoring.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/internal/test_database_monitoring.py b/tests/internal/test_database_monitoring.py index fc578c6631a..a25f0876df4 100644 --- a/tests/internal/test_database_monitoring.py +++ b/tests/internal/test_database_monitoring.py @@ -97,6 +97,10 @@ def test_dbm_propagation_full_mode(): dbspan = tracer.trace("dbname", service="orders-db") + # since inject() below will call the sampler we just call the sampler here + # so sampling priority will align in the traceparent + tracer._sampler.sample(dbspan._local_root) + # when dbm propagation mode is full sql comments should be generated with dbm tags and traceparent keys dbm_popagator = _database_monitoring._DBM_Propagator(0, "procedure") sqlcomment = dbm_popagator._get_dbm_comment(dbspan) @@ -173,6 +177,10 @@ def test_dbm_peer_entity_tags(): dbspan.set_tag("out.host", "some-hostname") dbspan.set_tag("db.name", "some-db") + # since inject() below will call the sampler we just call the sampler here + # so sampling priority will align in the traceparent + tracer._sampler.sample(dbspan._local_root) + # when dbm propagation mode is full sql comments should be generated with dbm tags and traceparent keys dbm_propagator = _database_monitoring._DBM_Propagator(0, "procedure") sqlcomment = dbm_propagator._get_dbm_comment(dbspan) From b087291e740ac2f59d0de57ad6e5c11c12bdb025 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 16:17:55 -0500 Subject: [PATCH 13/69] fix otel tests --- tests/opentelemetry/test_span.py | 2 ++ tests/opentelemetry/test_trace.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tests/opentelemetry/test_span.py b/tests/opentelemetry/test_span.py index 0bb67a7c592..9c12ce6a548 100644 --- a/tests/opentelemetry/test_span.py +++ b/tests/opentelemetry/test_span.py @@ -187,6 +187,7 @@ def test_otel_span_exception_handling(oteltracer): def test_otel_get_span_context(oteltracer): otelspan = oteltracer.start_span("otel-server") + otelspan.end() span_context = otelspan.get_span_context() assert span_context.trace_id == otelspan._ddspan.trace_id @@ -204,6 +205,7 @@ def test_otel_get_span_context_with_multiple_tracesates(oteltracer): otelspan = oteltracer.start_span("otel-server") otelspan._ddspan._context._meta["_dd.p.congo"] = "t61rcWkgMzE" otelspan._ddspan._context._meta["_dd.p.some_val"] = "tehehe" + otelspan.end() span_context = otelspan.get_span_context() assert span_context.trace_state.to_header() == "dd=s:1;t.dm:-0;t.congo:t61rcWkgMzE;t.some_val:tehehe" diff --git a/tests/opentelemetry/test_trace.py b/tests/opentelemetry/test_trace.py index 376ef432197..caaced37fae 100644 --- a/tests/opentelemetry/test_trace.py +++ b/tests/opentelemetry/test_trace.py @@ -162,6 +162,10 @@ def test_otel_start_current_span_without_default_args(oteltracer): @pytest.mark.snapshot(ignores=["metrics.net.peer.port", "meta.traceparent", "meta.flask.version"]) def test_distributed_trace_with_flask_app(flask_client, oteltracer): # noqa:F811 with oteltracer.start_as_current_span("test-otel-distributed-trace") as otel_span: + # need to manually run sampling before running the inject() since it's difficult + # to pass the span into the inject() method in this test + oteltracer._tracer._sampler.sample(otel_span._ddspan) + headers = { "traceparent": otel_span._ddspan.context._traceparent, "tracestate": otel_span._ddspan.context._tracestate, From 840564bdee64d021ae8a6052ab86a22a44ef61ef Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 17:06:39 -0500 Subject: [PATCH 14/69] fix tests after merge of .sampled change and fix otel context test --- ddtrace/contrib/algoliasearch/patch.py | 5 +++-- ddtrace/contrib/elasticsearch/patch.py | 7 ++++--- ddtrace/llmobs/_integrations/base.py | 11 ++++++++--- tests/opentelemetry/test_context.py | 1 + 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ddtrace/contrib/algoliasearch/patch.py b/ddtrace/contrib/algoliasearch/patch.py index 5408504fc72..8a32e07bfc3 100644 --- a/ddtrace/contrib/algoliasearch/patch.py +++ b/ddtrace/contrib/algoliasearch/patch.py @@ -125,8 +125,9 @@ def _patched_search(func, instance, wrapt_args, wrapt_kwargs): span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) span.set_tag(SPAN_MEASURED_KEY) - if span.context.sampling_priority is None or span.context.sampling_priority <= 0: - return func(*wrapt_args, **wrapt_kwargs) + if span.context.sampling_priority is not None: + if span.context.sampling_priority <= 0: + return func(*wrapt_args, **wrapt_kwargs) if config.algoliasearch.collect_query_text: span.set_tag_str("query.text", wrapt_kwargs.get("query", wrapt_args[0])) diff --git a/ddtrace/contrib/elasticsearch/patch.py b/ddtrace/contrib/elasticsearch/patch.py index c2383965ebf..dbed39eab72 100644 --- a/ddtrace/contrib/elasticsearch/patch.py +++ b/ddtrace/contrib/elasticsearch/patch.py @@ -143,9 +143,10 @@ def _perform_request(func, instance, args, kwargs): span.set_tag(SPAN_MEASURED_KEY) # Only instrument if trace is sampled or if we haven't tried to sample yet - if span.context.sampling_priority is None or span.context.sampling_priority <= 0: - yield func(*args, **kwargs) - return + if span.context.sampling_priority is not None: + if span.context.sampling_priority <= 0: + yield func(*args, **kwargs) + return method, target = args params = kwargs.get("params") diff --git a/ddtrace/llmobs/_integrations/base.py b/ddtrace/llmobs/_integrations/base.py index 58cd2620386..d3ab8effc45 100644 --- a/ddtrace/llmobs/_integrations/base.py +++ b/ddtrace/llmobs/_integrations/base.py @@ -73,12 +73,17 @@ def llmobs_enabled(self) -> bool: return config._llmobs_enabled def is_pc_sampled_span(self, span: Span) -> bool: - if span.context.sampling_priority is None or span.context.sampling_priority <= 0: - return False + if span.context.sampling_priority is not None: + if span.context.sampling_priority <= 0: + return False return self._span_pc_sampler.sample(span) def is_pc_sampled_log(self, span: Span) -> bool: - sampled = span.context.sampling_priority is not None or span.context.sampling_priority <= 0 # type: ignore + if span.context.sampling_priority is not None: + if span.context.sampling_priority <= 0: + sampled = False + sampled = True + if not self.logs_enabled or not sampled: return False return self._log_pc_sampler.sample(span) diff --git a/tests/opentelemetry/test_context.py b/tests/opentelemetry/test_context.py index 24839f22491..097c0157f1c 100644 --- a/tests/opentelemetry/test_context.py +++ b/tests/opentelemetry/test_context.py @@ -93,6 +93,7 @@ def _subprocess_task(parent_span_context, errors): def test_otel_trace_across_fork(oteltracer): errors = multiprocessing.Queue() with oteltracer.start_as_current_span("root") as root: + oteltracer._tracer._sampler.sample(root._ddspan) p = multiprocessing.Process(target=_subprocess_task, args=(root.get_span_context(), errors)) try: p.start() From 7dfc689465d537de9c14055c5b18da7de0abea99 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 17:49:39 -0500 Subject: [PATCH 15/69] run sampler before forking in integration-snapshot tests --- tests/integration/test_integration_snapshots.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 7d8fa3a7d65..9c537013535 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -124,7 +124,9 @@ def task(tracer): pass tracer.shutdown() - with tracer.trace("parent"): + with tracer.trace("parent") as parent_span: + # need to run sampling before forking + tracer._sampler.sample(parent_span) p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() From b38d646d0feab1fb67367d7c7a4d070292ba1625 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 17:53:01 -0500 Subject: [PATCH 16/69] fix logic for openai checking if sampled --- ddtrace/llmobs/_integrations/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ddtrace/llmobs/_integrations/base.py b/ddtrace/llmobs/_integrations/base.py index d3ab8effc45..7e0f103c168 100644 --- a/ddtrace/llmobs/_integrations/base.py +++ b/ddtrace/llmobs/_integrations/base.py @@ -81,10 +81,9 @@ def is_pc_sampled_span(self, span: Span) -> bool: def is_pc_sampled_log(self, span: Span) -> bool: if span.context.sampling_priority is not None: if span.context.sampling_priority <= 0: - sampled = False - sampled = True + return False - if not self.logs_enabled or not sampled: + if not self.logs_enabled: return False return self._log_pc_sampler.sample(span) From 7e873ba57a9de11b2e1faf0dd7379d5097c2ba14 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 27 Feb 2024 18:05:56 -0500 Subject: [PATCH 17/69] fix other integration_snapshot test by sampling before forking --- tests/integration/test_integration_snapshots.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 9c537013535..45050942590 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -154,7 +154,9 @@ def task2(tracer): p.join() tracer.shutdown() - with tracer.trace("parent"): + with tracer.trace("parent") as parent_span: + # need to sample first + tracer._sampler.sample(parent_span) p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() From 1d70f3a3cd2508aaaf2e884bd484fdeedf2bdbdf Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 28 Feb 2024 16:29:52 -0500 Subject: [PATCH 18/69] if HTTPPropagator.inject called without span, get current root span to run sampling with --- ddtrace/propagation/http.py | 12 +++++++++--- tests/opentelemetry/test_trace.py | 4 +--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 0f22c22c9cb..08cc61c12c3 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -949,9 +949,15 @@ def parent_call(): if config.propagation_http_baggage_enabled is True and span_context._baggage is not None: for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] - if ddtrace.tracer._sampler and span: - if not span.context.sampling_priority: - ddtrace.tracer._sampler.sample(span._local_root) + if ddtrace.tracer._sampler: + if span is None: + # if a span is not passed in explicitly, we do our best to grab the current root span to sample + span = ddtrace.tracer.current_root_span() + else: + span = span._local_root + if span is not None: + if not span.context.sampling_priority: + ddtrace.tracer._sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject: diff --git a/tests/opentelemetry/test_trace.py b/tests/opentelemetry/test_trace.py index caaced37fae..017af60ad30 100644 --- a/tests/opentelemetry/test_trace.py +++ b/tests/opentelemetry/test_trace.py @@ -162,10 +162,8 @@ def test_otel_start_current_span_without_default_args(oteltracer): @pytest.mark.snapshot(ignores=["metrics.net.peer.port", "meta.traceparent", "meta.flask.version"]) def test_distributed_trace_with_flask_app(flask_client, oteltracer): # noqa:F811 with oteltracer.start_as_current_span("test-otel-distributed-trace") as otel_span: - # need to manually run sampling before running the inject() since it's difficult - # to pass the span into the inject() method in this test + # need to manually run sampling before grabbing otel_span._ddspan.context._tracestate oteltracer._tracer._sampler.sample(otel_span._ddspan) - headers = { "traceparent": otel_span._ddspan.context._traceparent, "tracestate": otel_span._ddspan.context._tracestate, From 4350138372981487faf9e79742fa078e13c8ca66 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 28 Feb 2024 17:17:38 -0500 Subject: [PATCH 19/69] use setter for tracer._sampler to always update the processor that uses the sampler --- ddtrace/_trace/tracer.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 772108788f5..b45b5cc86ac 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -319,6 +319,24 @@ def deregister_on_start_span(self, func: Callable) -> Callable: self._hooks.deregister(self.__class__.start_span, func) return func + @property + def _sampler(self): + return self._sampler_current + + @_sampler.setter + def _sampler(self, value): + self._sampler_current = value + # we need to update the processor that uses the sampler + if getattr(self, "_deferred_processors", None): + for aggregator in self._deferred_processors: + if type(aggregator) == SpanAggregator: + for processor in aggregator._trace_processors: + if type(processor) == TraceSamplingProcessor: + processor.sampler = value + break + else: + log.debug("No TraceSamplingProcessor available to update sampling rate") + @property def debug_logging(self): return log.isEnabledFor(logging.DEBUG) @@ -1073,7 +1091,6 @@ def _on_global_config_update(self, cfg, items): # Reset the user sampler if one exists if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler: self._sampler = self._user_sampler - self._update_sampler_on_processor() return if cfg._get_source("_trace_sample_rate") != "default": @@ -1084,7 +1101,6 @@ def _on_global_config_update(self, cfg, items): sampler = DatadogSampler(default_sample_rate=sample_rate) self._sampler = sampler - self._update_sampler_on_processor() if "tags" in items: self._tags = cfg.tags.copy() @@ -1107,15 +1123,3 @@ def _on_global_config_update(self, cfg, items): from ddtrace.contrib.logging import unpatch unpatch() - - def _update_sampler_on_processor(self): - # we need to update the processor that uses the span - # alternatively we could re-run tracer.configure() - for aggregator in self._deferred_processors: - if type(aggregator) == SpanAggregator: - for processor in aggregator._trace_processors: - if type(processor) == TraceSamplingProcessor: - processor.sampler = self._sampler - break - else: - log.debug("No TraceSamplingProcessor available to update sampling rate from remote config") From 4567753d894da6ab315b063ba3fdeed07c0be17b Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 28 Feb 2024 17:51:08 -0500 Subject: [PATCH 20/69] sample before forking --- ddtrace/_trace/tracer.py | 6 ++++ ddtrace/internal/forksafe.py | 35 +++++++++++++++++++ .../integration/test_integration_snapshots.py | 8 ++--- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index b45b5cc86ac..ebdf7572bf1 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -230,6 +230,7 @@ def __init__( self.context_provider = context_provider or DefaultContextProvider() self._user_sampler: Optional[BaseSampler] = None self._sampler: BaseSampler = DatadogSampler() + forksafe.register_before_in_child(self.sample_before_fork) self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url self._compute_stats = config._trace_compute_stats self._agent_url: str = agent.get_trace_url() if url is None else url @@ -319,6 +320,10 @@ def deregister_on_start_span(self, func: Callable) -> Callable: self._hooks.deregister(self.__class__.start_span, func) return func + def sample_before_fork(self) -> None: + span = self.current_root_span() + self._sampler.sample(span) + @property def _sampler(self): return self._sampler_current @@ -1036,6 +1041,7 @@ def shutdown(self, timeout: Optional[float] = None) -> None: atexit.unregister(self._atexit) forksafe.unregister(self._child_after_fork) + forksafe.unregister_before_in_child(self.sample_before_fork) self.start_span = self._start_span_after_shutdown # type: ignore[assignment] diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index a3fc0d0d97a..febd43dad10 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -14,6 +14,7 @@ _registry = [] # type: typing.List[typing.Callable[[], None]] +_registry_before_in_child = [] # type: typing.List[typing.Callable[[], None]] # Some integrations might require after-fork hooks to be executed after the # actual call to os.fork with earlier versions of Python (<= 3.6), else issues @@ -50,6 +51,20 @@ def ddtrace_after_in_child(): log.exception("Exception ignored in forksafe hook %r", hook) +def ddtrace_before_in_child(): + # type: () -> None + global _registry_before_in_child + + # DEV: we make a copy of the registry to prevent hook execution from + # introducing new hooks, potentially causing an infinite loop. + for hook in list(_registry_before_in_child): + try: + hook() + except Exception: + # Mimic the behaviour of Python's fork hooks. + log.exception("Exception ignored in forksafe hook %r", hook) + + def register(after_in_child): # type: (typing.Callable[[], None]) -> typing.Callable[[], None] """Register a function to be called after fork in the child process. @@ -61,6 +76,17 @@ def register(after_in_child): return after_in_child +def register_before_in_child(before_in_child): + # type: (typing.Callable[[], None]) -> typing.Callable[[], None] + """Register a function to be called before fork in the parent process. + + Note that ``before_in_child`` will be called in all parent processes across + multiple forks unless it is unregistered. + """ + _registry_before_in_child.append(before_in_child) + return before_in_child + + def unregister(after_in_child): # type: (typing.Callable[[], None]) -> None try: @@ -69,6 +95,14 @@ def unregister(after_in_child): log.info("after_in_child hook %s was unregistered without first being registered", after_in_child.__name__) +def unregister_before_in_child(before_in_child): + # type: (typing.Callable[[], None]) -> None + try: + _registry_before_in_child.remove(before_in_child) + except ValueError: + log.info("before_in_child hook %s was unregistered without first being registered", before_in_child.__name__) + + if hasattr(os, "register_at_fork"): os.register_at_fork(after_in_child=ddtrace_after_in_child, after_in_parent=set_forked) elif hasattr(os, "fork"): @@ -88,6 +122,7 @@ def _after_fork(): _os_fork = os.fork def _fork(): + ddtrace_before_in_child() pid = _os_fork() if pid == 0 and _soft: ddtrace_after_in_child() diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 45050942590..7d8fa3a7d65 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -124,9 +124,7 @@ def task(tracer): pass tracer.shutdown() - with tracer.trace("parent") as parent_span: - # need to run sampling before forking - tracer._sampler.sample(parent_span) + with tracer.trace("parent"): p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() @@ -154,9 +152,7 @@ def task2(tracer): p.join() tracer.shutdown() - with tracer.trace("parent") as parent_span: - # need to sample first - tracer._sampler.sample(parent_span) + with tracer.trace("parent"): p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() From f2beeea905b081f1b171eae30eeeb3f080373285 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 1 Mar 2024 13:26:06 -0500 Subject: [PATCH 21/69] add before fork sampling --- ddtrace/internal/forksafe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index febd43dad10..8c36543c664 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -104,7 +104,9 @@ def unregister_before_in_child(before_in_child): if hasattr(os, "register_at_fork"): - os.register_at_fork(after_in_child=ddtrace_after_in_child, after_in_parent=set_forked) + os.register_at_fork( + before=ddtrace_before_in_child, after_in_child=ddtrace_after_in_child, after_in_parent=set_forked + ) elif hasattr(os, "fork"): # DEV: This "should" be the correct way of implementing this, but it doesn't # work if hooks create new threads. From efcf424173754e21924fa9cf61fe1375f199e573 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 1 Mar 2024 15:17:11 -0500 Subject: [PATCH 22/69] manually sample before multiprocessing.Process tests --- tests/integration/test_integration_snapshots.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 7d8fa3a7d65..4c3c5ccda72 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -124,7 +124,9 @@ def task(tracer): pass tracer.shutdown() - with tracer.trace("parent"): + with tracer.trace("parent") as parent: + # need to manually sample before Popen + tracer._sampler.sample(parent) p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() @@ -152,7 +154,9 @@ def task2(tracer): p.join() tracer.shutdown() - with tracer.trace("parent"): + with tracer.trace("parent") as parent: + # need to manually sample before Popen + tracer._sampler.sample(parent) p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() From bfc0c63b8f1563daf1d598653eeb0291ad6c9a4a Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 1 Mar 2024 15:46:12 -0500 Subject: [PATCH 23/69] check if there is an active span before calling sample for fork --- ddtrace/_trace/tracer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index ebdf7572bf1..dad7785def6 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -322,7 +322,8 @@ def deregister_on_start_span(self, func: Callable) -> Callable: def sample_before_fork(self) -> None: span = self.current_root_span() - self._sampler.sample(span) + if span is not None: + self._sampler.sample(span) @property def _sampler(self): From bba6b979a3c21d0f2e7ed5886e0759b666ffb2a8 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 1 Mar 2024 16:32:37 -0500 Subject: [PATCH 24/69] make sure we check that sampling has not already run, before trying to sample before fork --- ddtrace/_trace/tracer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index dad7785def6..0b0e4503c04 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -322,7 +322,7 @@ def deregister_on_start_span(self, func: Callable) -> Callable: def sample_before_fork(self) -> None: span = self.current_root_span() - if span is not None: + if span is not None and not span.context.sampling_priority: self._sampler.sample(span) @property From fab0b2714a55056b09a2ad04d309caf8c5dbc452 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 1 Mar 2024 17:30:05 -0500 Subject: [PATCH 25/69] check that context.sampling_priority is None, not just is --- ddtrace/_trace/processor/__init__.py | 2 +- ddtrace/_trace/tracer.py | 2 +- ddtrace/propagation/_database_monitoring.py | 2 +- ddtrace/propagation/http.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index b555f75c5f3..8c3ce8406fc 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -152,7 +152,7 @@ def process_trace(self, trace): root_ctx = chunk_root._context # only sample if we haven't already sampled - if root_ctx and not root_ctx.sampling_priority: + if root_ctx and root_ctx.sampling_priority is None: self.sampler.sample(trace[0]) # When stats computation is enabled in the tracer then we can # safely drop the traces. diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 0b0e4503c04..5fde10a9d96 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -322,7 +322,7 @@ def deregister_on_start_span(self, func: Callable) -> Callable: def sample_before_fork(self) -> None: span = self.current_root_span() - if span is not None and not span.context.sampling_priority: + if span is not None and span.context.sampling_priority is None: self._sampler.sample(span) @property diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 463ac727249..67a9bc11b36 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -64,7 +64,7 @@ def __init__( def inject(self, dbspan, args, kwargs): # run sampling before injection to propagate correct sampling priority - if ddtrace.tracer._sampler and not dbspan.context.sampling_priority: + if ddtrace.tracer._sampler and dbspan.context.sampling_priority is None: ddtrace.tracer._sampler.sample(dbspan._local_root) dbm_comment = self._get_dbm_comment(dbspan) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 235eab34ff9..ead567d60ba 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -957,7 +957,7 @@ def parent_call(): else: span = span._local_root if span is not None: - if not span.context.sampling_priority: + if span.context.sampling_priority is None: ddtrace.tracer._sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) From 05d819ad72ea24ed14d217905ba01ba045ce6d0f Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 13:09:12 -0500 Subject: [PATCH 26/69] update propagation tests where sampling runs due to inject now --- tests/tracer/test_propagation.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py index 601d5b3947f..99a284a4da3 100644 --- a/tests/tracer/test_propagation.py +++ b/tests/tracer/test_propagation.py @@ -74,6 +74,7 @@ def test_inject_with_baggage_http_propagation(tracer): # noqa: F811 def test_inject_128bit_trace_id_datadog(): from ddtrace._trace.context import Context from ddtrace.internal.constants import HIGHER_ORDER_TRACE_ID_BITS + from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY from ddtrace.propagation.http import HTTPPropagator from tests.utils import DummyTracer @@ -91,7 +92,9 @@ def test_inject_128bit_trace_id_datadog(): assert headers == { "x-datadog-trace-id": str(span._trace_id_64bits), "x-datadog-parent-id": str(span.span_id), - "x-datadog-tags": "=".join([HIGHER_ORDER_TRACE_ID_BITS, trace_id_hob_hex]), + "x-datadog-tags": "{}=-0,".format(SAMPLING_DECISION_TRACE_TAG_KEY) + + "=".join([HIGHER_ORDER_TRACE_ID_BITS, trace_id_hob_hex]), + "x-datadog-sampling-priority": "1", } @@ -114,7 +117,7 @@ def test_inject_128bit_trace_id_b3multi(): HTTPPropagator.inject(span.context, headers) trace_id_hex = "{:032x}".format(span.trace_id) span_id_hex = "{:016x}".format(span.span_id) - assert headers == {"x-b3-traceid": trace_id_hex, "x-b3-spanid": span_id_hex} + assert headers == {"x-b3-traceid": trace_id_hex, "x-b3-spanid": span_id_hex, "x-b3-sampled": "1"} @pytest.mark.subprocess( @@ -136,7 +139,7 @@ def test_inject_128bit_trace_id_b3_single_header(): HTTPPropagator.inject(span.context, headers) trace_id_hex = "{:032x}".format(span.trace_id) span_id_hex = "{:016x}".format(span.span_id) - assert headers == {"b3": "%s-%s" % (trace_id_hex, span_id_hex)} + assert headers == {"b3": "%s-%s-1" % (trace_id_hex, span_id_hex)} @pytest.mark.subprocess( @@ -158,7 +161,7 @@ def test_inject_128bit_trace_id_tracecontext(): HTTPPropagator.inject(span.context, headers) trace_id_hex = "{:032x}".format(span.trace_id) span_id_hex = "{:016x}".format(span.span_id) - assert headers["traceparent"] == "00-%s-%s-00" % (trace_id_hex, span_id_hex) + assert headers["traceparent"] == "00-%s-%s-01" % (trace_id_hex, span_id_hex) def test_inject_tags_unicode(tracer): # noqa: F811 From 0a7ec87069ec1ccb88464ca9aea57b9bce216297 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 15:53:12 -0500 Subject: [PATCH 27/69] default to span being sampled when sampling has yet to run for appsec --- ddtrace/appsec/_api_security/api_manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ddtrace/appsec/_api_security/api_manager.py b/ddtrace/appsec/_api_security/api_manager.py index 6bf5982be8e..2602d4bb931 100644 --- a/ddtrace/appsec/_api_security/api_manager.py +++ b/ddtrace/appsec/_api_security/api_manager.py @@ -128,8 +128,12 @@ def _schema_callback(self, env): try: # check both current span and root span for sampling priority + # if sampling has not yet run for the span, we default to treating it as sampled + if root.context.sampling_priority is None and env.span.context.sampling_priority is None: + priorities = (1,) + else: + priorities = (root.context.sampling_priority or 0, env.span.context.sampling_priority or 0) # if any of them is set to USER_KEEP or USER_REJECT, we should respect it - priorities = (root.context.sampling_priority or 0, env.span.context.sampling_priority or 0) if constants.USER_KEEP in priorities: priority = constants.USER_KEEP elif constants.USER_REJECT in priorities: From ccc56fd26658c571652390e7111359b7917b92b6 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 17:25:23 -0500 Subject: [PATCH 28/69] first run at rn and docs updates --- docs/advanced_usage.rst | 20 ++++++++++++++++--- .../notes/lazy_sampling-93057adeaccbb46f.yaml | 19 ++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 7f7986d1be3..6deb160b063 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -81,7 +81,13 @@ context:: Span objects are owned by the execution in which they are created and must be finished in the same execution. The span context can be used to continue a trace in a different execution by passing it and activating it on the other - end. See the sections below for how to propagate traces across task, thread or + end. Note, that in all instances of crossing into another + execution (besides `os.fork` which is automatically instrumented), + sampling should be run manually before entering the new execution + to ensure that the sampling decision is the same across the trace. + This can be done using `tracer.sampler.sample(tracer.current_root_span())` + + See the sections below for how to propagate traces across task, thread or process boundaries. @@ -100,7 +106,9 @@ threads:: # `second_thread`s parent will be the `main_thread` span time.sleep(1) - with tracer.trace("main_thread"): + with tracer.trace("main_thread") as root_span: + # sample so the sampling_priority is the same across the trace + tracer.sampler.sample(tracer.current_root_span()) thread = threading.Thread(target=_target, args=(tracer.current_trace_context(),)) thread.start() thread.join() @@ -118,6 +126,8 @@ to :class:`~concurrent.futures.ThreadPoolExecutor` tasks:: @tracer.wrap() def eat_all_the_things(): + # sample so the sampling_priority is the same across the trace + tracer.sampler.sample(tracer.current_root_span()) with ThreadPoolExecutor() as e: e.submit(eat, "cookie") e.map(eat, ("panna cotta", "tiramisu", "gelato")) @@ -140,6 +150,8 @@ span has to be propagated as a context:: tracer.shutdown() with tracer.trace("work"): + # sample so the sampling_priority is the same across the trace + tracer.sampler.sample(tracer.current_root_span()) proc = Process(target=_target, args=(tracer.current_trace_context(),)) proc.start() time.sleep(1) @@ -226,7 +238,9 @@ To trace requests across hosts, the spans on the secondary hosts must be linked - On the server side, it means to read propagated attributes and set them to the active tracing context. - On the client side, it means to propagate the attributes, commonly as a header/metadata. -`ddtrace` already provides default propagators but you can also implement your own. +`ddtrace` already provides default propagators but you can also implement your own. If utilizing your own propagator +make sure to run `tracer.sampler.sample(tracer.current_root_span())` before propagating downstream, +to ensure that the sampling decision is the same across the trace. Web Frameworks ^^^^^^^^^^^^^^ diff --git a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml new file mode 100644 index 00000000000..b9adef989ec --- /dev/null +++ b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml @@ -0,0 +1,19 @@ +--- +prelude: > + tracing: This release adds support for lazy sampling, essentially moving when we make a sampling decision for + a trace to the latest possible moment, these include the following: + 1. Before encoding a trace chunk to be sent to the agent + 2. Before making an outgoing request via HTTP, gRPC, or a DB call for any automatically instrumented integration + 3. Before running ``os.fork()`` + It should be noted that if a user has application egress points (that are not automatically instrumented), + to other Datadog components (downstream instrumented services, databases, or execution context changes), + and rely on the Python tracer to make the sampling decision (don't have an upstream service doing this) + they will need to manually sample those traces. For more information please see: + https://ddtrace.readthedocs.io/en/stable/advanced_usage.html#tracing-context-management +features: + - | + tracing: Added support for lazy sampling, the benefit of which is the ability to make a sampling decision using + ``DD_TRACE_SAMPLING_RULES`` based on any span attribute (service, resource, tags, name)regardless of when the + value for the attribute is set. This change is particularly beneficial for sampling on tags, since the vast + majority of tags are set after the span is created. Since sampling was previously done at span creation time, + this meant that those tags could not be used for sampling decisions. From f0618780a11c590272f3b2a7e2bab4576bb28d7f Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 18:25:53 -0500 Subject: [PATCH 29/69] change popen snapshot and test names, add flakey fork test --- .../integration/test_integration_snapshots.py | 27 ++++++++++- ..._tracer_trace_across_multiple_popens.json} | 0 ...ts.test_tracer_trace_across_only_fork.json | 47 +++++++++++++++++++ ...shots.test_tracer_trace_across_popen.json} | 0 4 files changed, 72 insertions(+), 2 deletions(-) rename tests/snapshots/{tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_forks.json => tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_popens.json} (100%) create mode 100644 tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json rename tests/snapshots/{tests.integration.test_integration_snapshots.test_tracer_trace_across_fork.json => tests.integration.test_integration_snapshots.test_tracer_trace_across_popen.json} (100%) diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 4c3c5ccda72..e45c3217063 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -112,7 +112,7 @@ def test_synchronous_writer(): @snapshot(async_mode=False) -def test_tracer_trace_across_fork(): +def test_tracer_trace_across_popen(): """ When a trace is started in a parent process and a child process is spawned The trace should be continued in the child process @@ -135,7 +135,7 @@ def task(tracer): @snapshot(async_mode=False) -def test_tracer_trace_across_multiple_forks(): +def test_tracer_trace_across_multiple_popens(): """ When a trace is started and crosses multiple process boundaries The trace should be continued in the child processes @@ -163,6 +163,29 @@ def task2(tracer): tracer.shutdown() +@snapshot(async_mode=False) +def test_tracer_trace_across_only_fork(): + """ + When a trace is started in a parent process and a child process is spawned + The trace should be continued in the child process. The fact that + the child span has does not have '_dd.p.dm' shows that sampling was run + before fork automatically. + """ + tracer = Tracer() + + with tracer.trace("parent"): + # Create a child process + # using os.fork() method + pid = os.fork() + + # pid greater than 0 represents + # the parent process + if pid == 0: + tracer.trace("child") + + tracer.shutdown() + + @snapshot() def test_wrong_span_name_type_not_sent(): """Span names should be a text type.""" diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_forks.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_popens.json similarity index 100% rename from tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_forks.json rename to tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_multiple_popens.json diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json new file mode 100644 index 00000000000..2d765af1d3c --- /dev/null +++ b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json @@ -0,0 +1,47 @@ +[[ + { + "name": "parent", + "service": "", + "resource": "parent", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "65e6528700000000", + "language": "python", + "runtime-id": "dc1a7b919e604e4f80d345b1332732b7" + }, + "metrics": { + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 80715 + }, + "duration": 1789000, + "start": 1709593223433895000 + }, + { + "name": "child", + "service": "", + "resource": "child", + "trace_id": 0, + "span_id": 2, + "parent_id": 1, + "type": "", + "error": 0, + "meta": { + "_dd.p.tid": "65e6528700000000", + "language": "python", + "runtime-id": "67a247dc01ba4cb3afe7312182747093" + }, + "metrics": { + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 80718 + }, + "duration": 0, + "start": 1709593223438223000 + }]] diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_fork.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_popen.json similarity index 100% rename from tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_fork.json rename to tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_popen.json From 568ad3147232413ef3ba4c8e8e8a495d932429bc Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 18:29:03 -0500 Subject: [PATCH 30/69] clean up --- tests/contrib/urllib3/test_urllib3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/contrib/urllib3/test_urllib3.py b/tests/contrib/urllib3/test_urllib3.py index 0e644e96857..6ba3a8eac1c 100644 --- a/tests/contrib/urllib3/test_urllib3.py +++ b/tests/contrib/urllib3/test_urllib3.py @@ -496,6 +496,7 @@ def test_distributed_tracing_enabled(self): # outgoing headers must contain last parent span id in tracestate "tracestate": s.context._tracestate.replace("dd=", "dd=p:{:016x};".format(s.span_id)), } + if int(urllib3.__version__.split(".")[0]) >= 2: m_make_request.assert_called_with( mock.ANY, From e24e9ba9481f172218338c6c25b0fe5e57e879e2 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 4 Mar 2024 18:48:41 -0500 Subject: [PATCH 31/69] add to rn --- releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml index b9adef989ec..63b68a2b0c9 100644 --- a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml +++ b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml @@ -5,10 +5,13 @@ prelude: > 1. Before encoding a trace chunk to be sent to the agent 2. Before making an outgoing request via HTTP, gRPC, or a DB call for any automatically instrumented integration 3. Before running ``os.fork()`` - It should be noted that if a user has application egress points (that are not automatically instrumented), + For most users this change shouldn't have any impact on their traces, but it does allow for more flexibility + in sampling (see ``features`` release note). + It should be noted that if a user has application egress points that are not automatically instrumented, to other Datadog components (downstream instrumented services, databases, or execution context changes), - and rely on the Python tracer to make the sampling decision (don't have an upstream service doing this) - they will need to manually sample those traces. For more information please see: + and rely on the Python tracer to make the sampling decision (don't have an upstream service doing this), + they will need to manually run the sampler for those traces, or use ``HttpPropagator.inject()``. + For more information please see: https://ddtrace.readthedocs.io/en/stable/advanced_usage.html#tracing-context-management features: - | From c0ee71e3b8ac71b8f827b1aa4a14e8442863603d Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 18 Mar 2024 15:00:47 -0400 Subject: [PATCH 32/69] decrease flakiness of test_priority_sampling tets --- tests/integration/test_priority_sampling.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_priority_sampling.py b/tests/integration/test_priority_sampling.py index 54cd10975f0..efe7f16ab9f 100644 --- a/tests/integration/test_priority_sampling.py +++ b/tests/integration/test_priority_sampling.py @@ -8,6 +8,7 @@ from ddtrace.internal.encoding import JSONEncoder from ddtrace.internal.encoding import MsgpackEncoderV03 as Encoder from ddtrace.internal.writer import AgentWriter +from ddtrace.tracer import Tracer from tests.integration.utils import parametrize_with_all_encodings from tests.integration.utils import skip_if_testagent from tests.utils import override_global_config @@ -115,7 +116,7 @@ def test_priority_sampling_response(): @pytest.mark.snapshot(agent_sample_rate_by_service={"service:test,env:": 0.9999}) def test_agent_sample_rate_keep(): """Ensure that the agent sample rate is respected when a trace is auto sampled.""" - from ddtrace import tracer + tracer = Tracer() # First trace won't actually have the sample rate applied since the response has not yet been received. with tracer.trace(""): @@ -126,6 +127,7 @@ def test_agent_sample_rate_keep(): # Subsequent traces should have the rate applied. with tracer.trace("test", service="test") as span: pass + tracer.flush() assert span.get_metric("_dd.agent_psr") == pytest.approx(0.9999) assert span.get_metric("_sampling_priority_v1") == AUTO_KEEP assert span.get_tag("_dd.p.dm") == "-1" @@ -135,7 +137,9 @@ def test_agent_sample_rate_keep(): @pytest.mark.snapshot(agent_sample_rate_by_service={"service:test,env:": 0.0001}) def test_agent_sample_rate_reject(): """Ensure that the agent sample rate is respected when a trace is auto rejected.""" - from ddtrace import tracer + from ddtrace.tracer import Tracer + + tracer = Tracer() # First trace won't actually have the sample rate applied since the response has not yet been received. with tracer.trace(""): @@ -147,6 +151,7 @@ def test_agent_sample_rate_reject(): # Subsequent traces should have the rate applied. with tracer.trace("test", service="test") as span: pass + tracer.flush() assert span.get_metric("_dd.agent_psr") == pytest.approx(0.0001) assert span.get_metric("_sampling_priority_v1") == AUTO_REJECT assert span.get_tag("_dd.p.dm") == "-1" From 0e4881bfc8e10b672b26aa5146248e8fc663f322 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 18 Mar 2024 17:03:31 -0400 Subject: [PATCH 33/69] decrease flakiness of test_settings tests --- tests/integration/test_settings.py | 92 ++++++++++++++++-------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/tests/integration/test_settings.py b/tests/integration/test_settings.py index 47856c58c70..28d427463f1 100644 --- a/tests/integration/test_settings.py +++ b/tests/integration/test_settings.py @@ -5,17 +5,21 @@ from .test_integration import AGENT_VERSION -def _get_latest_telemetry_config_item(events, item_name): +def _get_telemetry_config_items(events, item_name): + items = [] for event in reversed(sorted(events, key=lambda e: (e["tracer_time"], e["seq_id"]))): for item in reversed(event.get("payload", {}).get("configuration", [])): if item_name == item["name"]: - return item + items.append(item) + if items: + return items return None @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") def test_setting_origin_environment(test_agent_session, run_python_code_in_subprocess): env = os.environ.copy() + env.update( { "DD_TRACE_SAMPLE_RATE": "0.1", @@ -36,31 +40,29 @@ def test_setting_origin_environment(test_agent_session, run_python_code_in_subpr assert status == 0, err events = test_agent_session.get_events() - assert _get_latest_telemetry_config_item(events, "trace_sample_rate") == { + events_trace_sample_rate = _get_telemetry_config_items(events, "trace_sample_rate") + + assert { "name": "trace_sample_rate", "value": "0.1", "origin": "env_var", - } - assert _get_latest_telemetry_config_item(events, "logs_injection_enabled") == { - "name": "logs_injection_enabled", - "value": "true", - "origin": "env_var", - } - assert _get_latest_telemetry_config_item(events, "trace_header_tags") == { + } in events_trace_sample_rate + + events_logs_injection_enabled = _get_telemetry_config_items(events, "logs_injection_enabled") + assert {"name": "logs_injection_enabled", "value": "true", "origin": "env_var"} in events_logs_injection_enabled + + events_trace_header_tags = _get_telemetry_config_items(events, "trace_header_tags") + assert { "name": "trace_header_tags", "value": "X-Header-Tag-1:header_tag_1,X-Header-Tag-2:header_tag_2", "origin": "env_var", - } - assert _get_latest_telemetry_config_item(events, "trace_tags") == { - "name": "trace_tags", - "value": "team:apm,component:web", - "origin": "env_var", - } - assert _get_latest_telemetry_config_item(events, "tracing_enabled") == { - "name": "tracing_enabled", - "value": "true", - "origin": "env_var", - } + } in events_trace_header_tags + + events_trace_tags = _get_telemetry_config_items(events, "trace_tags") + assert {"name": "trace_tags", "value": "team:apm,component:web", "origin": "env_var"} in events_trace_tags + + events_tracing_enabled = _get_telemetry_config_items(events, "tracing_enabled") + assert {"name": "tracing_enabled", "value": "true", "origin": "env_var"} in events_tracing_enabled @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") @@ -93,31 +95,40 @@ def test_setting_origin_code(test_agent_session, run_python_code_in_subprocess): assert status == 0, err events = test_agent_session.get_events() - assert _get_latest_telemetry_config_item(events, "trace_sample_rate") == { + events_trace_sample_rate = _get_telemetry_config_items(events, "trace_sample_rate") + assert { "name": "trace_sample_rate", "value": "0.2", "origin": "code", - } - assert _get_latest_telemetry_config_item(events, "logs_injection_enabled") == { + } in events_trace_sample_rate + + events_logs_injection_enabled = _get_telemetry_config_items(events, "logs_injection_enabled") + assert { "name": "logs_injection_enabled", "value": "false", "origin": "code", - } - assert _get_latest_telemetry_config_item(events, "trace_header_tags") == { + } in events_logs_injection_enabled + + events_trace_header_tags = _get_telemetry_config_items(events, "trace_header_tags") + assert { "name": "trace_header_tags", "value": "header:value", "origin": "code", - } - assert _get_latest_telemetry_config_item(events, "trace_tags") == { + } in events_trace_header_tags + + events_trace_tags = _get_telemetry_config_items(events, "trace_tags") + assert { "name": "trace_tags", "value": "header:value", "origin": "code", - } - assert _get_latest_telemetry_config_item(events, "tracing_enabled") == { + } in events_trace_tags + + events_tracing_enabled = _get_telemetry_config_items(events, "tracing_enabled") + assert { "name": "tracing_enabled", "value": "false", "origin": "code", - } + } in events_tracing_enabled @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") @@ -155,11 +166,8 @@ def test_remoteconfig_sampling_rate_default(test_agent_session, run_python_code_ assert status == 0, err events = test_agent_session.get_events() - assert _get_latest_telemetry_config_item(events, "trace_sample_rate") == { - "name": "trace_sample_rate", - "value": "1.0", - "origin": "default", - } + events_trace_sample_rate = _get_telemetry_config_items(events, "trace_sample_rate") + assert {"name": "trace_sample_rate", "value": "0.5", "origin": "remote_config"} in events_trace_sample_rate @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") @@ -178,11 +186,8 @@ def test_remoteconfig_sampling_rate_telemetry(test_agent_session, run_python_cod assert status == 0, err events = test_agent_session.get_events() - assert _get_latest_telemetry_config_item(events, "trace_sample_rate") == { - "name": "trace_sample_rate", - "value": "0.5", - "origin": "remote_config", - } + events_trace_sample_rate = _get_telemetry_config_items(events, "trace_sample_rate") + assert {"name": "trace_sample_rate", "value": "0.5", "origin": "remote_config"} in events_trace_sample_rate @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") @@ -211,8 +216,9 @@ def test_remoteconfig_header_tags_telemetry(test_agent_session, run_python_code_ assert status == 0, err events = test_agent_session.get_events() - assert _get_latest_telemetry_config_item(events, "trace_header_tags") == { + events_trace_header_tags = _get_telemetry_config_items(events, "trace_header_tags") + assert { "name": "trace_header_tags", "value": "used:header_tag_69,unused:header_tag_70,used-with-default:", "origin": "remote_config", - } + } in events_trace_header_tags From 40cd8b28d41c66e2e9b6e568ea799458f474cd9c Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 18 Mar 2024 17:16:36 -0400 Subject: [PATCH 34/69] add wrong_metric_types snapshot that must've been missed in merge conflict --- ...ace_with_wrong_metrics_types_not_sent.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json new file mode 100644 index 00000000000..ee8853d06f9 --- /dev/null +++ b/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json @@ -0,0 +1,25 @@ +[[ + { + "name": "parent", + "service": "", + "resource": "parent", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "65f8a77100000000", + "language": "python", + "runtime-id": "005360373bf04c7fb732555994db4f78" + }, + "metrics": { + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 5837 + }, + "duration": 1004386709, + "start": 1710794609240060721 + }]] From 0b43db855dacee07514ef74d7e39466d67e149d8 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 18 Mar 2024 17:30:01 -0400 Subject: [PATCH 35/69] fix flakey extended sampling snapshot test --- tests/integration/test_sampling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_sampling.py b/tests/integration/test_sampling.py index 806bf02cb0b..95f22fd4138 100644 --- a/tests/integration/test_sampling.py +++ b/tests/integration/test_sampling.py @@ -204,10 +204,10 @@ def test_extended_sampling_w_None_meta(writer, tracer): tracer.trace("should_send1", resource="banana").finish() -@snapshot_parametrized_with_writers -def test_extended_sampling_w_metrics(writer, tracer): +@snapshot() +def test_extended_sampling_w_metrics(tracer): sampler = DatadogSampler(rules=[SamplingRule(0, tags={"test": 123}, resource=RESOURCE)]) - tracer.configure(sampler=sampler, writer=writer) + tracer.configure(sampler=sampler) tracer._tags = {"test": 123} tracer.trace("should_not_send", resource=RESOURCE).finish() From a311d0bfcad36cfbd924937ea924099237787937 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 11:06:03 -0400 Subject: [PATCH 36/69] remove flakey snapshot directly calling fork after realizing pOpen tests test that sampling runs before fork already --- .../integration/test_integration_snapshots.py | 39 ++++----------- ...ts.test_tracer_trace_across_only_fork.json | 47 ------------------- 2 files changed, 8 insertions(+), 78 deletions(-) delete mode 100644 tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 7895e512981..e4b9ca4f577 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -115,7 +115,9 @@ def test_synchronous_writer(): def test_tracer_trace_across_popen(): """ When a trace is started in a parent process and a child process is spawned - The trace should be continued in the child process + The trace should be continued in the child process. The fact that + the child span has does not have '_dd.p.dm' shows that sampling was run + before fork automatically. """ tracer = Tracer() @@ -124,9 +126,7 @@ def task(tracer): pass tracer.shutdown() - with tracer.trace("parent") as parent: - # need to manually sample before Popen - tracer.sampler.sample(parent) + with tracer.trace("parent"): p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() @@ -138,7 +138,9 @@ def task(tracer): def test_tracer_trace_across_multiple_popens(): """ When a trace is started and crosses multiple process boundaries - The trace should be continued in the child processes + The trace should be continued in the child processes. The fact that + the child span has does not have '_dd.p.dm' shows that sampling was run + before fork automatically. """ tracer = Tracer() @@ -154,38 +156,13 @@ def task2(tracer): p.join() tracer.shutdown() - with tracer.trace("parent") as parent: - # need to manually sample before Popen - tracer.sampler.sample(parent) + with tracer.trace("parent"): p = multiprocessing.Process(target=task, args=(tracer,)) p.start() p.join() tracer.shutdown() -@snapshot(async_mode=False) -def test_tracer_trace_across_only_fork(): - """ - When a trace is started in a parent process and a child process is spawned - The trace should be continued in the child process. The fact that - the child span has does not have '_dd.p.dm' shows that sampling was run - before fork automatically. - """ - tracer = Tracer() - - with tracer.trace("parent"): - # Create a child process - # using os.fork() method - pid = os.fork() - - # pid greater than 0 represents - # the parent process - if pid == 0: - tracer.trace("child") - - tracer.shutdown() - - @snapshot() def test_wrong_span_name_type_not_sent(): """Span names should be a text type.""" diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json deleted file mode 100644 index 2d765af1d3c..00000000000 --- a/tests/snapshots/tests.integration.test_integration_snapshots.test_tracer_trace_across_only_fork.json +++ /dev/null @@ -1,47 +0,0 @@ -[[ - { - "name": "parent", - "service": "", - "resource": "parent", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "65e6528700000000", - "language": "python", - "runtime-id": "dc1a7b919e604e4f80d345b1332732b7" - }, - "metrics": { - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 80715 - }, - "duration": 1789000, - "start": 1709593223433895000 - }, - { - "name": "child", - "service": "", - "resource": "child", - "trace_id": 0, - "span_id": 2, - "parent_id": 1, - "type": "", - "error": 0, - "meta": { - "_dd.p.tid": "65e6528700000000", - "language": "python", - "runtime-id": "67a247dc01ba4cb3afe7312182747093" - }, - "metrics": { - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 80718 - }, - "duration": 0, - "start": 1709593223438223000 - }]] From b07e4024a13353a9f8286f7aa50e86e791befdf5 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 11:07:14 -0400 Subject: [PATCH 37/69] decrease flakiness of propagation multispan test --- tests/integration/test_propagation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_propagation.py b/tests/integration/test_propagation.py index 715799b5071..5bd0a122a8c 100644 --- a/tests/integration/test_propagation.py +++ b/tests/integration/test_propagation.py @@ -37,8 +37,8 @@ def tracer(request): @pytest.mark.snapshot() -def test_trace_tags_multispan(tracer): - # type: (Tracer) -> None +def test_trace_tags_multispan(): + tracer = Tracer() headers = { "x-datadog-trace-id": "1234", "x-datadog-parent-id": "5678", From 8d563c505f7d788321caf78a52c47579db1adfcb Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:03:33 -0400 Subject: [PATCH 38/69] Update releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml index 63b68a2b0c9..8efa4ecdf25 100644 --- a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml +++ b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml @@ -1,7 +1,7 @@ --- prelude: > tracing: This release adds support for lazy sampling, essentially moving when we make a sampling decision for - a trace to the latest possible moment, these include the following: + a trace to the latest possible moment. These include the following: 1. Before encoding a trace chunk to be sent to the agent 2. Before making an outgoing request via HTTP, gRPC, or a DB call for any automatically instrumented integration 3. Before running ``os.fork()`` From 5e824a087003b9e8815136a9d1a90f514ce593ce Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:14:47 -0400 Subject: [PATCH 39/69] Update ddtrace/propagation/http.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/propagation/http.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 597c712001b..b91a55169f3 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -960,8 +960,7 @@ def parent_call(): span = ddtrace.tracer.current_root_span() else: span = span._local_root - if span is not None: - if span.context.sampling_priority is None: + if span is not None and span.context.sampling_priority is None: ddtrace.tracer.sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) From 94534c9373e7efb31e27c8b90af3e5487243d096 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:15:22 -0400 Subject: [PATCH 40/69] Update docs/advanced_usage.rst Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- docs/advanced_usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 6deb160b063..7bc6a36e300 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -81,7 +81,7 @@ context:: Span objects are owned by the execution in which they are created and must be finished in the same execution. The span context can be used to continue a trace in a different execution by passing it and activating it on the other - end. Note, that in all instances of crossing into another + end. Note that in all instances of crossing into another execution (besides `os.fork` which is automatically instrumented), sampling should be run manually before entering the new execution to ensure that the sampling decision is the same across the trace. From f5e0163841c49e2142022d965bd8a26b5a5cf0a2 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:15:42 -0400 Subject: [PATCH 41/69] Update ddtrace/contrib/elasticsearch/patch.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/contrib/elasticsearch/patch.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ddtrace/contrib/elasticsearch/patch.py b/ddtrace/contrib/elasticsearch/patch.py index dbed39eab72..1c8e43f079b 100644 --- a/ddtrace/contrib/elasticsearch/patch.py +++ b/ddtrace/contrib/elasticsearch/patch.py @@ -143,10 +143,9 @@ def _perform_request(func, instance, args, kwargs): span.set_tag(SPAN_MEASURED_KEY) # Only instrument if trace is sampled or if we haven't tried to sample yet - if span.context.sampling_priority is not None: - if span.context.sampling_priority <= 0: - yield func(*args, **kwargs) - return + if span.context.sampling_priority is not None and span.context.sampling_priority <= 0: + yield func(*args, **kwargs) + return method, target = args params = kwargs.get("params") From c916d229fc16e0533e02c4a1d9638a831a4119d4 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:15:56 -0400 Subject: [PATCH 42/69] Update ddtrace/contrib/algoliasearch/patch.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/contrib/algoliasearch/patch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ddtrace/contrib/algoliasearch/patch.py b/ddtrace/contrib/algoliasearch/patch.py index 8a32e07bfc3..8b99caf98ea 100644 --- a/ddtrace/contrib/algoliasearch/patch.py +++ b/ddtrace/contrib/algoliasearch/patch.py @@ -125,9 +125,8 @@ def _patched_search(func, instance, wrapt_args, wrapt_kwargs): span.set_tag_str(SPAN_KIND, SpanKind.CLIENT) span.set_tag(SPAN_MEASURED_KEY) - if span.context.sampling_priority is not None: - if span.context.sampling_priority <= 0: - return func(*wrapt_args, **wrapt_kwargs) + if span.context.sampling_priority is not None and span.context.sampling_priority <= 0: + return func(*wrapt_args, **wrapt_kwargs) if config.algoliasearch.collect_query_text: span.set_tag_str("query.text", wrapt_kwargs.get("query", wrapt_args[0])) From b4e4106943c2cbd1bfe647952d438eba31ccca9e Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:16:46 -0400 Subject: [PATCH 43/69] Update docs/advanced_usage.rst Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- docs/advanced_usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 7bc6a36e300..3039ae2b8d3 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -82,7 +82,7 @@ context:: be finished in the same execution. The span context can be used to continue a trace in a different execution by passing it and activating it on the other end. Note that in all instances of crossing into another - execution (besides `os.fork` which is automatically instrumented), + execution, sampling should be run manually before entering the new execution to ensure that the sampling decision is the same across the trace. This can be done using `tracer.sampler.sample(tracer.current_root_span())` From bff056a0f89018f5e2bb783335cd358d41d37c5a Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:18:40 -0400 Subject: [PATCH 44/69] Update ddtrace/_trace/processor/__init__.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/_trace/processor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index 0bd56d84fab..15b8393dc29 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -197,7 +197,7 @@ def on_span_finish(self, span): def _update_span_tags_from_context(span, context): - span._context._update_tags(span) + context._update_tags(span) @attr.s From 3164297531f72fa4a6345a4964e05494a737637a Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 15:31:59 -0400 Subject: [PATCH 45/69] update before fork naming --- ddtrace/_trace/tracer.py | 4 ++-- ddtrace/internal/forksafe.py | 24 +++++++++++------------- ddtrace/propagation/http.py | 2 +- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 0205d7699c7..ff4284a8686 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -233,7 +233,7 @@ def __init__( self.context_provider = context_provider or DefaultContextProvider() self._user_sampler: Optional[BaseSampler] = None self.sampler: BaseSampler = DatadogSampler() - forksafe.register_before_in_child(self.sample_before_fork) + forksafe.register_before_fork(self.sample_before_fork) self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url self._compute_stats = config._trace_compute_stats self._agent_url: str = agent.get_trace_url() if url is None else url @@ -1046,7 +1046,7 @@ def shutdown(self, timeout: Optional[float] = None) -> None: atexit.unregister(self._atexit) forksafe.unregister(self._child_after_fork) - forksafe.unregister_before_in_child(self.sample_before_fork) + forksafe.unregister_before_fork(self.sample_before_fork) self.start_span = self._start_span_after_shutdown # type: ignore[assignment] diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index 4b64fb906d1..75857cf2659 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -14,7 +14,7 @@ _registry = [] # type: typing.List[typing.Callable[[], None]] -_registry_before_in_child = [] # type: typing.List[typing.Callable[[], None]] +_registry_before_fork = [] # type: typing.List[typing.Callable[[], None]] # Some integrations might require after-fork hooks to be executed after the # actual call to os.fork with earlier versions of Python (<= 3.6), else issues @@ -51,13 +51,13 @@ def ddtrace_after_in_child(): log.exception("Exception ignored in forksafe hook %r", hook) -def ddtrace_before_in_child(): +def ddtrace_before_fork(): # type: () -> None - global _registry_before_in_child + global _registry_before_fork # DEV: we make a copy of the registry to prevent hook execution from # introducing new hooks, potentially causing an infinite loop. - for hook in list(_registry_before_in_child): + for hook in list(_registry_before_fork): try: hook() except Exception: @@ -76,15 +76,15 @@ def register(after_in_child): return after_in_child -def register_before_in_child(before_in_child): +def register_before_fork(before_fork): # type: (typing.Callable[[], None]) -> typing.Callable[[], None] """Register a function to be called before fork in the parent process. Note that ``before_in_child`` will be called in all parent processes across multiple forks unless it is unregistered. """ - _registry_before_in_child.append(before_in_child) - return before_in_child + _registry_before_fork.append(before_fork) + return before_fork def unregister(after_in_child): @@ -95,18 +95,16 @@ def unregister(after_in_child): log.info("after_in_child hook %s was unregistered without first being registered", after_in_child.__name__) -def unregister_before_in_child(before_in_child): +def unregister_before_fork(before_fork): # type: (typing.Callable[[], None]) -> None try: - _registry_before_in_child.remove(before_in_child) + _registry_before_fork.remove(before_fork) except ValueError: - log.info("before_in_child hook %s was unregistered without first being registered", before_in_child.__name__) + log.info("before_in_child hook %s was unregistered without first being registered", before_fork.__name__) if hasattr(os, "register_at_fork"): - os.register_at_fork( - before=ddtrace_before_in_child, after_in_child=ddtrace_after_in_child, after_in_parent=set_forked - ) + os.register_at_fork(before=ddtrace_before_fork, after_in_child=ddtrace_after_in_child, after_in_parent=set_forked) _resetable_objects = weakref.WeakSet() # type: weakref.WeakSet[ResetObject] diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index b91a55169f3..2d602cfc0a8 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -961,7 +961,7 @@ def parent_call(): else: span = span._local_root if span is not None and span.context.sampling_priority is None: - ddtrace.tracer.sampler.sample(span._local_root) + ddtrace.tracer.sampler.sample(span._local_root) if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject: From ebfc3503e494c8013dbaa589d8d6c79845cee447 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 16:11:04 -0400 Subject: [PATCH 46/69] Update ddtrace/propagation/http.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/propagation/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 2d602cfc0a8..0c63bb5f85f 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -954,7 +954,7 @@ def parent_call(): if config.propagation_http_baggage_enabled is True and span_context._baggage is not None: for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] - if ddtrace.tracer.sampler: + if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sampler") and ddtrace.tracer.sampler: if span is None: # if a span is not passed in explicitly, we do our best to grab the current root span to sample span = ddtrace.tracer.current_root_span() From 6100df47ad5619948dd069e784a52431625f4101 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 16:18:01 -0400 Subject: [PATCH 47/69] simplify telemetry config items return --- tests/integration/test_settings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_settings.py b/tests/integration/test_settings.py index 28d427463f1..d631b70ed9e 100644 --- a/tests/integration/test_settings.py +++ b/tests/integration/test_settings.py @@ -12,8 +12,7 @@ def _get_telemetry_config_items(events, item_name): if item_name == item["name"]: items.append(item) if items: - return items - return None + return items or None @pytest.mark.skipif(AGENT_VERSION != "testagent", reason="Tests only compatible with a testagent") From eb7fe91ae207787afc22c0d1f769882399c1a2d6 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 16:49:21 -0400 Subject: [PATCH 48/69] remove uneeded method to update tags in processor, update tags in sampler.sample --- ddtrace/_trace/processor/__init__.py | 6 +----- ddtrace/sampler.py | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index 15b8393dc29..e0ec1f4d1b9 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -196,10 +196,6 @@ def on_span_finish(self, span): span.set_metric("_dd.top_level", 1) -def _update_span_tags_from_context(span, context): - context._update_tags(span) - - @attr.s class TraceTagsProcessor(TraceProcessor): """Processor that applies trace-level tags to the trace.""" @@ -223,7 +219,7 @@ def process_trace(self, trace): if not ctx: return trace - _update_span_tags_from_context(chunk_root, ctx) + ctx._update_tags(chunk_root) self._set_git_metadata(chunk_root) chunk_root.set_tag_str("language", "python") # for 128 bit trace ids diff --git a/ddtrace/sampler.py b/ddtrace/sampler.py index da9d660bc87..69cc58c73d7 100644 --- a/ddtrace/sampler.py +++ b/ddtrace/sampler.py @@ -293,6 +293,8 @@ def _parse_rules_from_env_variable(self, rules): return sampling_rules def sample(self, span): + span.context._update_tags(span) + matched_rule = _get_highest_precedence_rule_matching(span, self.rules) sampler = self._default_sampler # type: BaseSampler From dfeb2cbc0418b5b0ace017b68430c7d82e9a32be Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 16:54:51 -0400 Subject: [PATCH 49/69] update system-tests ref --- .github/workflows/system-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/system-tests.yml b/.github/workflows/system-tests.yml index 03a33a1118f..29b8b805226 100644 --- a/.github/workflows/system-tests.yml +++ b/.github/workflows/system-tests.yml @@ -56,6 +56,7 @@ jobs: uses: actions/checkout@v3 with: repository: 'DataDog/system-tests' + ref: zachg/update_inject_method - name: Checkout dd-trace-py if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' @@ -174,6 +175,7 @@ jobs: uses: actions/checkout@v3 with: repository: 'DataDog/system-tests' + ref: zachg/update_inject_method - name: Checkout dd-trace-py if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v3 From a80df8b42a95b2c73e54df9f4ac6ea0c1b6fc607 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:07:33 -0400 Subject: [PATCH 50/69] Update ddtrace/_trace/tracer.py Co-authored-by: Munir Abdinur --- ddtrace/_trace/tracer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index ff4284a8686..39fa0b136ff 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -118,8 +118,9 @@ def _default_span_processors_factory( # FIXME: type should be AppsecSpanProcessor but we have a cyclic import here """Construct the default list of span processors to use.""" trace_processors: List[TraceProcessor] = [] - trace_processors += [PeerServiceProcessor(_ps_config), BaseServiceProcessor()] trace_processors += [ + PeerServiceProcessor(_ps_config), + BaseServiceProcessor(), TraceSamplingProcessor(compute_stats_enabled, trace_sampler), TraceTagsProcessor(), ] From bd07f2d57bbaf8fd74eb16fd49abea18a04ff8fa Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Tue, 19 Mar 2024 17:50:48 -0400 Subject: [PATCH 51/69] nit --- ddtrace/_trace/tracer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index 39fa0b136ff..6d38108f794 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -234,7 +234,6 @@ def __init__( self.context_provider = context_provider or DefaultContextProvider() self._user_sampler: Optional[BaseSampler] = None self.sampler: BaseSampler = DatadogSampler() - forksafe.register_before_fork(self.sample_before_fork) self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url self._compute_stats = config._trace_compute_stats self._agent_url: str = agent.get_trace_url() if url is None else url @@ -283,6 +282,7 @@ def __init__( self._hooks = _hooks.Hooks() atexit.register(self._atexit) + forksafe.register_before_fork(self.sample_before_fork) forksafe.register(self._child_after_fork) self._shutdown_lock = RLock() From 4838639a1f30684a4ece7afaa4697c73f6b5d910 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 20 Mar 2024 10:34:15 -0400 Subject: [PATCH 52/69] change otel propagation test to test automatic sampling and injection --- tests/opentelemetry/test_trace.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/opentelemetry/test_trace.py b/tests/opentelemetry/test_trace.py index 09b6057d9c1..94415d48f09 100644 --- a/tests/opentelemetry/test_trace.py +++ b/tests/opentelemetry/test_trace.py @@ -161,14 +161,8 @@ def test_otel_start_current_span_without_default_args(oteltracer): ) @pytest.mark.snapshot(ignores=["metrics.net.peer.port", "meta.traceparent", "meta.flask.version"]) def test_distributed_trace_with_flask_app(flask_client, oteltracer): # noqa:F811 - with oteltracer.start_as_current_span("test-otel-distributed-trace") as otel_span: - # need to manually run sampling before grabbing otel_span._ddspan.context._tracestate - oteltracer._tracer.sampler.sample(otel_span._ddspan) - headers = { - "traceparent": otel_span._ddspan.context._traceparent, - "tracestate": otel_span._ddspan.context._tracestate, - } - resp = flask_client.get("/otel", headers=headers) + with oteltracer.start_as_current_span("test-otel-distributed-trace"): + resp = flask_client.get("/otel") assert resp.text == "otel" assert resp.status_code == 200 From 1fc48f34f3215c78b9c8ad217f5403b196541f81 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 20 Mar 2024 15:09:30 -0400 Subject: [PATCH 53/69] clean up --- ddtrace/_trace/tracer.py | 9 ++++----- ddtrace/contrib/botocore/services/sqs.py | 1 - ddtrace/contrib/celery/signals.py | 2 +- ddtrace/propagation/_database_monitoring.py | 2 +- ddtrace/propagation/http.py | 1 - tests/internal/test_database_monitoring.py | 4 ++-- tests/opentelemetry/test_context.py | 2 +- 7 files changed, 9 insertions(+), 12 deletions(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index d7b72de5eaf..faa10fcefd5 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -271,7 +271,7 @@ def __init__( self._compute_stats, self._single_span_sampling_rules, self._agent_url, - self.sampler, + self._sampler, self._endpoint_call_counter_span_processor, ) if config._data_streams_enabled: @@ -356,12 +356,11 @@ def sample_before_fork(self) -> None: if span is not None and span.context.sampling_priority is None: self.sample(span) - @property def _sampler(self): return self._sampler_current - @sampler.setter + @_sampler.setter def _sampler(self, value): self._sampler_current = value # we need to update the processor that uses the sampler @@ -559,7 +558,7 @@ def configure( self._compute_stats, self._single_span_sampling_rules, self._agent_url, - self.sampler, + self._sampler, self._endpoint_call_counter_span_processor, ) @@ -625,7 +624,7 @@ def _child_after_fork(self): self._compute_stats, self._single_span_sampling_rules, self._agent_url, - self.sampler, + self._sampler, self._endpoint_call_counter_span_processor, ) diff --git a/ddtrace/contrib/botocore/services/sqs.py b/ddtrace/contrib/botocore/services/sqs.py index f3a307301b7..f08cd5df5f1 100644 --- a/ddtrace/contrib/botocore/services/sqs.py +++ b/ddtrace/contrib/botocore/services/sqs.py @@ -100,7 +100,6 @@ def inject_trace_to_sqs_or_sns_message(params, span, endpoint_service=None): """ trace_data = {} HTTPPropagator.inject(span.context, trace_data) - core.dispatch("botocore.sqs_sns.start", [endpoint_service, trace_data, params]) inject_trace_data_to_message_attributes(trace_data, params, endpoint_service) diff --git a/ddtrace/contrib/celery/signals.py b/ddtrace/contrib/celery/signals.py index 70c97a0ddc1..e16f5ecace9 100644 --- a/ddtrace/contrib/celery/signals.py +++ b/ddtrace/contrib/celery/signals.py @@ -137,7 +137,7 @@ def trace_before_publish(*args, **kwargs): if config.celery["distributed_tracing"]: trace_headers = {} - propagator.inject(span.context, trace_headers, span) + propagator.inject(span.context, trace_headers) # put distributed trace headers where celery will propagate them task_headers = kwargs.get("headers") or {} diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index db81328f2cb..9de7564fb07 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -66,7 +66,7 @@ def inject(self, dbspan, args, kwargs): # run sampling before injection to propagate correct sampling priority if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): if dbspan.context.sampling_priority is None: - ddtrace.tracer.sample(dbspan) + ddtrace.tracer.sample(dbspan.local_root) dbm_comment = self._get_dbm_comment(dbspan) if dbm_comment is None: diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 2772eac3039..19b919bace0 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -924,7 +924,6 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): primary_context._span_links = links return primary_context - # DEV: Change method signature to just take span and pull out context for next major version 3.0 @staticmethod def inject(span_context, headers): # type: (Context, Dict[str, str]) -> None diff --git a/tests/internal/test_database_monitoring.py b/tests/internal/test_database_monitoring.py index bb5bda4ba77..f7849eab81a 100644 --- a/tests/internal/test_database_monitoring.py +++ b/tests/internal/test_database_monitoring.py @@ -99,7 +99,7 @@ def test_dbm_propagation_full_mode(): # since inject() below will call the sampler we just call the sampler here # so sampling priority will align in the traceparent - tracer.sampler.sample(dbspan._local_root) + tracer.sample(dbspan._local_root) # when dbm propagation mode is full sql comments should be generated with dbm tags and traceparent keys dbm_popagator = _database_monitoring._DBM_Propagator(0, "procedure") @@ -179,7 +179,7 @@ def test_dbm_peer_entity_tags(): # since inject() below will call the sampler we just call the sampler here # so sampling priority will align in the traceparent - tracer.sampler.sample(dbspan._local_root) + tracer.sample(dbspan._local_root) # when dbm propagation mode is full sql comments should be generated with dbm tags and traceparent keys dbm_propagator = _database_monitoring._DBM_Propagator(0, "procedure") diff --git a/tests/opentelemetry/test_context.py b/tests/opentelemetry/test_context.py index c627fbf9f53..9760cd51b2c 100644 --- a/tests/opentelemetry/test_context.py +++ b/tests/opentelemetry/test_context.py @@ -93,7 +93,7 @@ def _subprocess_task(parent_span_context, errors): def test_otel_trace_across_fork(oteltracer): errors = multiprocessing.Queue() with oteltracer.start_as_current_span("root") as root: - oteltracer._tracer.sampler.sample(root._ddspan) + oteltracer._tracer.sample(root._ddspan) p = multiprocessing.Process(target=_subprocess_task, args=(root.get_span_context(), errors)) try: p.start() From 070ee9b3c4bbe8499041b0e0d4cbc0f3a65433e7 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 20 Mar 2024 17:02:06 -0400 Subject: [PATCH 54/69] add test for changing tracer's sampler also changes the processor's sampler --- tests/tracer/test_processors.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/tracer/test_processors.py b/tests/tracer/test_processors.py index c434aa98523..34f1d77bc6a 100644 --- a/tests/tracer/test_processors.py +++ b/tests/tracer/test_processors.py @@ -10,6 +10,7 @@ from ddtrace._trace.processor import SpanProcessor from ddtrace._trace.processor import SpanSamplingProcessor from ddtrace._trace.processor import TraceProcessor +from ddtrace._trace.processor import TraceSamplingProcessor from ddtrace._trace.processor import TraceTagsProcessor from ddtrace._trace.span import Span from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MAX_PER_SEC @@ -387,6 +388,24 @@ def test_span_creation_metrics_disabled_telemetry(): mock_tm.assert_not_called() +def test_changing_tracer_sampler_changes_tracesamplingprocessor_sampler(): + """Changing the tracer sampler should change the sampling processor's sampler""" + tracer = Tracer() + # get processor + for aggregator in tracer._deferred_processors: + if type(aggregator) == SpanAggregator: + for processor in aggregator._trace_processors: + if type(processor) == TraceSamplingProcessor: + sampling_processor = processor + + assert sampling_processor.sampler is tracer._sampler + + new_sampler = mock.Mock() + tracer._sampler = new_sampler + + assert sampling_processor.sampler is new_sampler + + def test_single_span_sampling_processor(): """Test that single span sampling tags are applied to spans that should get sampled""" From 50ff37dd223f0a34a9f3f677b4e077d43e524ff4 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 20 Mar 2024 17:16:38 -0400 Subject: [PATCH 55/69] clean up --- ddtrace/propagation/_database_monitoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 9de7564fb07..8e2612cdd09 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -66,7 +66,7 @@ def inject(self, dbspan, args, kwargs): # run sampling before injection to propagate correct sampling priority if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): if dbspan.context.sampling_priority is None: - ddtrace.tracer.sample(dbspan.local_root) + ddtrace.tracer.sample(dbspan._local_root) dbm_comment = self._get_dbm_comment(dbspan) if dbm_comment is None: From ebb1e2294c5ea8e566f369371ea6222e2efa834e Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Wed, 20 Mar 2024 17:30:14 -0400 Subject: [PATCH 56/69] refactor forksafe hook running code --- ddtrace/internal/forksafe.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index 75857cf2659..687fb2d7a49 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -1,6 +1,7 @@ """ An API to provide fork-safe functions. """ +import functools import logging import os import threading @@ -37,13 +38,9 @@ def has_forked(): return _forked -def ddtrace_after_in_child(): - # type: () -> None - global _registry - - # DEV: we make a copy of the registry to prevent hook execution from - # introducing new hooks, potentially causing an infinite loop. - for hook in list(_registry): +def run_hooks(registry): + # type: (typing.List[typing.Callable[[], None]]) -> None + for hook in registry: try: hook() except Exception: @@ -51,18 +48,8 @@ def ddtrace_after_in_child(): log.exception("Exception ignored in forksafe hook %r", hook) -def ddtrace_before_fork(): - # type: () -> None - global _registry_before_fork - - # DEV: we make a copy of the registry to prevent hook execution from - # introducing new hooks, potentially causing an infinite loop. - for hook in list(_registry_before_fork): - try: - hook() - except Exception: - # Mimic the behaviour of Python's fork hooks. - log.exception("Exception ignored in forksafe hook %r", hook) +ddtrace_before_fork = functools.partial(run_hooks, _registry_before_fork) +ddtrace_after_in_child = functools.partial(run_hooks, _registry) def register(after_in_child): From 15328e09767b72dfa8628a90abda5d74fbae427e Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 21 Mar 2024 12:21:19 -0400 Subject: [PATCH 57/69] more forksafe refactoring --- ddtrace/internal/forksafe.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index 687fb2d7a49..38e5f3f71e4 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -52,26 +52,13 @@ def run_hooks(registry): ddtrace_after_in_child = functools.partial(run_hooks, _registry) -def register(after_in_child): - # type: (typing.Callable[[], None]) -> typing.Callable[[], None] - """Register a function to be called after fork in the child process. - - Note that ``after_in_child`` will be called in all child processes across - multiple forks unless it is unregistered. - """ - _registry.append(after_in_child) - return after_in_child +def register_hook(registry, hook): + _registry.append(hook) + return hook -def register_before_fork(before_fork): - # type: (typing.Callable[[], None]) -> typing.Callable[[], None] - """Register a function to be called before fork in the parent process. - - Note that ``before_in_child`` will be called in all parent processes across - multiple forks unless it is unregistered. - """ - _registry_before_fork.append(before_fork) - return before_fork +register_before_fork = functools.partial(register_hook, _registry_before_fork) +register = functools.partial(register_hook, _registry) def unregister(after_in_child): From 1da9cc7f032e58b105e56ddbdb73921aaf162041 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 21 Mar 2024 12:34:30 -0400 Subject: [PATCH 58/69] more forksafe refactoring --- ddtrace/internal/forksafe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index 38e5f3f71e4..f17376aa141 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -53,7 +53,7 @@ def run_hooks(registry): def register_hook(registry, hook): - _registry.append(hook) + registry.append(hook) return hook From 2df6f37792d1f51490f677b2e04b796c5e707e2e Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 21 Mar 2024 12:58:17 -0400 Subject: [PATCH 59/69] add span arg back to inject method --- ddtrace/propagation/http.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 19b919bace0..a173d697a1d 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -925,8 +925,8 @@ def _resolve_contexts(contexts, styles_w_ctx, normalized_headers): return primary_context @staticmethod - def inject(span_context, headers): - # type: (Context, Dict[str, str]) -> None + def inject(span_context, headers, non_active_span=None): + # type: (Context, Dict[str, str], Optional[Span]) -> None """Inject Context attributes that have to be propagated as HTTP headers. Here is an example using `requests`:: @@ -944,7 +944,18 @@ def parent_call(): :param Context span_context: Span context to propagate. :param dict headers: HTTP headers to extend with tracing attributes. + :param Span non_active_span: Only to be used if injecting a non-active span. """ + if non_active_span is not None and non_active_span.context is not span_context: + log.error( + "span_context and non_active_span.context are not the same, but should be, non_active_span.context " + "will be used to generate distributed tracing headers. span_context: {}, non_active_span.context: {}", + span_context, + non_active_span.context, + ) + + span_context = non_active_span.context + # Not a valid context to propagate if span_context.trace_id is None or span_context.span_id is None: log.debug("tried to inject invalid context %r", span_context) From 24b05fc22cc78a14c38ed6a7d006f05d02059214 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Thu, 21 Mar 2024 17:35:30 -0400 Subject: [PATCH 60/69] refactor span sampling span processor into tracesampling processor so run order is correct --- ddtrace/_trace/processor/__init__.py | 63 +++++++++++----------------- ddtrace/_trace/tracer.py | 6 +-- 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/ddtrace/_trace/processor/__init__.py b/ddtrace/_trace/processor/__init__.py index e0ec1f4d1b9..a0e379d5def 100644 --- a/ddtrace/_trace/processor/__init__.py +++ b/ddtrace/_trace/processor/__init__.py @@ -135,24 +135,28 @@ def unregister(self): @attr.s class TraceSamplingProcessor(TraceProcessor): - """Processor that keeps traces that have sampled spans. If all spans - are unsampled then ``None`` is returned. + """Processor that runs both trace and span sampling rules. - Note that this processor is only effective if complete traces are sent. If - the spans of a trace are divided in separate lists then it's possible that - parts of the trace are unsampled when the whole trace should be sampled. + * Span sampling must be applied after trace sampling priority has been set. + * Span sampling rules are specified with a sample rate or rate limit as well as glob patterns + for matching spans on service and name. + * If the span sampling decision is to keep the span, then span sampling metrics are added to the span. + * If a dropped trace includes a span that had been kept by a span sampling rule, then the span is sent to the + Agent even if the dropped trace is not (as is the case when trace stats computation is enabled). """ _compute_stats_enabled = attr.ib(type=bool) sampler = attr.ib() + single_span_rules = attr.ib(type=List[SpanSamplingRule]) def process_trace(self, trace): # type: (List[Span]) -> Optional[List[Span]] + if trace: chunk_root = trace[0] root_ctx = chunk_root._context - # only sample if we haven't already sampled + # only trace sample if we haven't already sampled if root_ctx and root_ctx.sampling_priority is None: self.sampler.sample(trace[0]) # When stats computation is enabled in the tracer then we can @@ -166,6 +170,20 @@ def process_trace(self, trace): return single_spans or None + # single span sampling rules are applied after trace sampling + if self.single_span_rules: + for span in trace: + if span.context.sampling_priority is not None and span.context.sampling_priority <= 0: + for rule in self.single_span_rules: + if rule.match(span): + rule.sample(span) + # If stats computation is enabled, we won't send all spans to the agent. + # In order to ensure that the agent does not update priority sampling rates + # due to single spans sampling, we set all of these spans to manual keep. + if config._trace_compute_stats: + span.set_metric(SAMPLING_PRIORITY_KEY, USER_KEEP) + break + return trace log.debug("dropping trace %d with %d spans", trace[0].trace_id, len(trace)) @@ -367,36 +385,3 @@ def _queue_span_count_metrics(self, metric_name, tag_name, min_count=100): TELEMETRY_NAMESPACE_TAG_TRACER, metric_name, count, tags=((tag_name, tag_value),) ) self._span_metrics[metric_name] = defaultdict(int) - - -@attr.s -class SpanSamplingProcessor(SpanProcessor): - """SpanProcessor for sampling single spans: - - * Span sampling must be applied after trace sampling priority has been set. - * Span sampling rules are specified with a sample rate or rate limit as well as glob patterns - for matching spans on service and name. - * If the span sampling decision is to keep the span, then span sampling metrics are added to the span. - * If a dropped trace includes a span that had been kept by a span sampling rule, then the span is sent to the - Agent even if the dropped trace is not (as is the case when trace stats computation is enabled). - """ - - rules = attr.ib(type=List[SpanSamplingRule]) - - def on_span_start(self, span): - # type: (Span) -> None - pass - - def on_span_finish(self, span): - # type: (Span) -> None - # only sample if the span isn't already going to be sampled by trace sampler - if span.context.sampling_priority is not None and span.context.sampling_priority <= 0: - for rule in self.rules: - if rule.match(span): - rule.sample(span) - # If stats computation is enabled, we won't send all spans to the agent. - # In order to ensure that the agent does not update priority sampling rates - # due to single spans sampling, we set all of these spans to manual keep. - if config._trace_compute_stats: - span.set_metric(SAMPLING_PRIORITY_KEY, USER_KEEP) - break diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index faa10fcefd5..c8196943fbe 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -21,7 +21,6 @@ from ddtrace._trace.context import Context from ddtrace._trace.processor import SpanAggregator from ddtrace._trace.processor import SpanProcessor -from ddtrace._trace.processor import SpanSamplingProcessor from ddtrace._trace.processor import TopLevelSpanProcessor from ddtrace._trace.processor import TraceProcessor from ddtrace._trace.processor import TraceSamplingProcessor @@ -123,7 +122,7 @@ def _default_span_processors_factory( trace_processors += [ PeerServiceProcessor(_ps_config), BaseServiceProcessor(), - TraceSamplingProcessor(compute_stats_enabled, trace_sampler), + TraceSamplingProcessor(compute_stats_enabled, trace_sampler, single_span_sampling_rules), TraceTagsProcessor(), ] trace_processors += trace_filters @@ -172,9 +171,6 @@ def _default_span_processors_factory( span_processors.append(profiling_span_processor) - if single_span_sampling_rules: - span_processors.append(SpanSamplingProcessor(single_span_sampling_rules)) - # These need to run after all the other processors deferred_processors: List[SpanProcessor] = [ SpanAggregator( From f024603f67a7043edaeb76f119d5f7783c3ed2be Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 22 Mar 2024 11:01:01 -0400 Subject: [PATCH 61/69] add logic for picking root_span in inject method --- ddtrace/propagation/http.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index a173d697a1d..f2a85030dfa 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -11,6 +11,7 @@ import ddtrace from ddtrace._trace.span import Span # noqa:F401 +from ddtrace._trace.span import _is_top_level if sys.version_info >= (3, 8): @@ -966,7 +967,14 @@ def parent_call(): headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): - root_span = ddtrace.tracer.current_root_span() + if non_active_span is not None: + if _is_top_level(non_active_span): + root_span = non_active_span + else: + root_span = non_active_span._local_root # type: ignore + else: + root_span = ddtrace.tracer.current_root_span() # type: ignore + if root_span is not None and root_span.context.sampling_priority is None: ddtrace.tracer.sample(root_span) From 72b33ba9bf20abb2e25bd30ca99a91e189c65bd6 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 22 Mar 2024 11:32:41 -0400 Subject: [PATCH 62/69] reset system-tests run to target main --- .github/workflows/system-tests.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/system-tests.yml b/.github/workflows/system-tests.yml index 29b8b805226..72359e8a068 100644 --- a/.github/workflows/system-tests.yml +++ b/.github/workflows/system-tests.yml @@ -19,7 +19,7 @@ jobs: - id: run_needed name: Check if run is needed run: | - git fetch origin ${{ github.event.pull_request.base.sha }} + git fetch origin ${{ github.event.pull_request.base.sha || 'main' }} export PATHS=$(git diff --name-only HEAD ${{ github.event.pull_request.base.sha }}) python -c "import os,sys,fnmatch;sys.exit(not bool([_ for pattern in {'ddtrace/*', 'setup*', 'pyproject.toml', '.github/workflows/system-tests.yml'} for _ in fnmatch.filter(os.environ['PATHS'].splitlines(), pattern)]))" continue-on-error: true @@ -50,13 +50,12 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.9' - + - name: Checkout system tests if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v3 with: repository: 'DataDog/system-tests' - ref: zachg/update_inject_method - name: Checkout dd-trace-py if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' @@ -70,13 +69,13 @@ jobs: if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' run: ./build.sh - - name: Run INTEGRATIONS + - name: Run INTEGRATIONS if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' - run: ./run.sh INTEGRATIONS + run: ./run.sh INTEGRATIONS - - name: Run CROSSED_TRACING_LIBRARIES + - name: Run CROSSED_TRACING_LIBRARIES if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' - run: ./run.sh CROSSED_TRACING_LIBRARIES + run: ./run.sh CROSSED_TRACING_LIBRARIES - name: Run if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' @@ -175,7 +174,6 @@ jobs: uses: actions/checkout@v3 with: repository: 'DataDog/system-tests' - ref: zachg/update_inject_method - name: Checkout dd-trace-py if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v3 From 312b7d0212dc1037d389ced7926d6cde346a00d1 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Fri, 22 Mar 2024 16:54:21 -0400 Subject: [PATCH 63/69] switch out spansampling processor for tracesamplingprocessor in tests --- tests/tracer/test_processors.py | 55 ++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/tests/tracer/test_processors.py b/tests/tracer/test_processors.py index 34f1d77bc6a..deba77bf237 100644 --- a/tests/tracer/test_processors.py +++ b/tests/tracer/test_processors.py @@ -8,7 +8,6 @@ from ddtrace._trace.context import Context from ddtrace._trace.processor import SpanAggregator from ddtrace._trace.processor import SpanProcessor -from ddtrace._trace.processor import SpanSamplingProcessor from ddtrace._trace.processor import TraceProcessor from ddtrace._trace.processor import TraceSamplingProcessor from ddtrace._trace.processor import TraceTagsProcessor @@ -27,6 +26,7 @@ from ddtrace.internal.processor.endpoint_call_counter import EndpointCallCounterProcessor from ddtrace.internal.sampling import SamplingMechanism from ddtrace.internal.sampling import SpanSamplingRule +from ddtrace.sampler import DatadogSampler from tests.utils import DummyTracer from tests.utils import DummyWriter from tests.utils import override_global_config @@ -408,12 +408,11 @@ def test_changing_tracer_sampler_changes_tracesamplingprocessor_sampler(): def test_single_span_sampling_processor(): """Test that single span sampling tags are applied to spans that should get sampled""" - rule_1 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=1.0, max_per_second=-1) rules = [rule_1] - processor = SpanSamplingProcessor(rules) + sampling_processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, sampling_processor) span = traced_function(tracer) @@ -426,9 +425,9 @@ def test_single_span_sampling_processor_match_second_rule(): rule_1 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=1.0, max_per_second=-1) rule_2 = SpanSamplingRule(service="test_service2", name="test_name2", sample_rate=1.0, max_per_second=-1) rules = [rule_1, rule_2] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, processor) span = traced_function(tracer, name="test_name2", service="test_service2") @@ -443,9 +442,9 @@ def test_single_span_sampling_processor_rule_order_drop(): rule_1 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=0, max_per_second=-1) rule_2 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=1.0, max_per_second=-1) rules = [rule_1, rule_2] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, processor) span = traced_function(tracer) @@ -460,9 +459,9 @@ def test_single_span_sampling_processor_rule_order_keep(): rule_1 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=1.0, max_per_second=-1) rule_2 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=0, max_per_second=-1) rules = [rule_1, rule_2] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, processor) span = traced_function(tracer) @@ -495,9 +494,9 @@ def test_single_span_sampling_processor_w_tracer_sampling( service="test_service", name="test_name", sample_rate=span_sample_rate_rule, max_per_second=-1 ) rules = [rule_1] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, processor) span = traced_function(tracer, trace_sampling_priority=trace_sampling_priority) @@ -510,16 +509,16 @@ def test_single_span_sampling_processor_w_tracer_sampling( def test_single_span_sampling_processor_w_tracer_sampling_after_processing(): - """Test that single span sampling tags and tracer sampling context are applied to spans - if the trace sampling is changed after the span is processed. + """Since the root span has MANUAL_KEEP_KEY set and the child span has not yet run through + the TraceSamplingProcessor, the child span will have the manual keep in its context and therefore skip single span + sampling. This leads to span sampling rates matching the reality of what span sampling + is responsible for sampling. """ - rule_1 = SpanSamplingRule(name="child", sample_rate=1.0, max_per_second=-1) rules = [rule_1] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) tracer = DummyTracer() - tracer._span_processors.append(processor) - + switch_out_trace_sampling_processor(tracer, processor) root = tracer.trace("root") # When trace sampling marks it as a drop @@ -529,12 +528,12 @@ def test_single_span_sampling_processor_w_tracer_sampling_after_processing(): # Child is checked against the span sampling rules, and then is kept child = tracer.trace("child") child.finish() + tracer.flush() # The trace is updated to be a keep, but we already span sampled child root.set_tag(MANUAL_KEEP_KEY) root.finish() - # We now expect the span to have both span sampling and tracer context that will sample - assert_span_sampling_decision_tags(child) + assert_span_sampling_decision_tags(child, None, None, None) assert child.context.sampling_priority == USER_KEEP @@ -557,10 +556,10 @@ def test_single_span_sampling_processor_w_stats_computation(): """Test that span processor changes _sampling_priority_v1 to 2 when stats computation is enabled""" rule_1 = SpanSamplingRule(service="test_service", name="test_name", sample_rate=1.0, max_per_second=-1) rules = [rule_1] - processor = SpanSamplingProcessor(rules) + processor = TraceSamplingProcessor(False, DatadogSampler(default_sample_rate=0.0), rules) with override_global_config(dict(_trace_compute_stats=True)): tracer = DummyTracer() - tracer._span_processors.append(processor) + switch_out_trace_sampling_processor(tracer, processor) span = traced_function(tracer) @@ -587,6 +586,18 @@ def assert_span_sampling_decision_tags( assert span.get_metric(SAMPLING_PRIORITY_KEY) == trace_sampling_priority +def switch_out_trace_sampling_processor(tracer, sampling_processor): + for aggregator in tracer._deferred_processors: + if type(aggregator) == SpanAggregator: + i = 0 + while i < len(aggregator._trace_processors): + processor = aggregator._trace_processors[i] + if type(processor) == TraceSamplingProcessor: + aggregator._trace_processors[i] = sampling_processor + break + i += 1 + + def test_endpoint_call_counter_processor(): """ProfilingSpanProcessor collects information about endpoints for profiling""" spanA = Span("spanA", resource="a", span_type=SpanTypes.WEB) From ae5d4dd50d27011454b8763f7c7390be4a967ba5 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Sun, 24 Mar 2024 17:01:55 -0400 Subject: [PATCH 64/69] cover Munir's nits and comments --- ddtrace/_trace/tracer.py | 8 +++---- ddtrace/propagation/_database_monitoring.py | 2 ++ ddtrace/propagation/http.py | 24 +++++++++---------- docs/advanced_usage.rst | 11 +++++---- .../notes/lazy_sampling-93057adeaccbb46f.yaml | 3 ++- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index c8196943fbe..d84b1b406c0 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -280,7 +280,7 @@ def __init__( self._hooks = _hooks.Hooks() atexit.register(self._atexit) - forksafe.register_before_fork(self.sample_before_fork) + forksafe.register_before_fork(self._sample_before_fork) forksafe.register(self._child_after_fork) self._shutdown_lock = RLock() @@ -304,7 +304,7 @@ def sample(self, span): if self._sampler is not None: self._sampler.sample(span) else: - log.debug("No sampler available to sample span") + log.error("No sampler available to sample span") @property def sampler(self): @@ -347,7 +347,7 @@ def deregister_on_start_span(self, func: Callable) -> Callable: self._hooks.deregister(self.__class__.start_span, func) return func - def sample_before_fork(self) -> None: + def _sample_before_fork(self) -> None: span = self.current_root_span() if span is not None and span.context.sampling_priority is None: self.sample(span) @@ -1070,7 +1070,7 @@ def shutdown(self, timeout: Optional[float] = None) -> None: atexit.unregister(self._atexit) forksafe.unregister(self._child_after_fork) - forksafe.unregister_before_fork(self.sample_before_fork) + forksafe.unregister_before_fork(self._sample_before_fork) self.start_span = self._start_span_after_shutdown # type: ignore[assignment] diff --git a/ddtrace/propagation/_database_monitoring.py b/ddtrace/propagation/_database_monitoring.py index 8e2612cdd09..bdeb12adc82 100644 --- a/ddtrace/propagation/_database_monitoring.py +++ b/ddtrace/propagation/_database_monitoring.py @@ -67,6 +67,8 @@ def inject(self, dbspan, args, kwargs): if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): if dbspan.context.sampling_priority is None: ddtrace.tracer.sample(dbspan._local_root) + else: + log.error("ddtrace.tracer.sample is not available, unable to sample span.") dbm_comment = self._get_dbm_comment(dbspan) if dbm_comment is None: diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index f2a85030dfa..72e0e1f0b16 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -11,7 +11,6 @@ import ddtrace from ddtrace._trace.span import Span # noqa:F401 -from ddtrace._trace.span import _is_top_level if sys.version_info >= (3, 8): @@ -957,6 +956,17 @@ def parent_call(): span_context = non_active_span.context + if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): + if non_active_span is not None: + root_span = non_active_span._local_root + else: + root_span = ddtrace.tracer.current_root_span() + + if root_span is not None and root_span.context.sampling_priority is None: + ddtrace.tracer.sample(root_span) + else: + log.error("ddtrace.tracer.sample is not available, unable to sample span.") + # Not a valid context to propagate if span_context.trace_id is None or span_context.span_id is None: log.debug("tried to inject invalid context %r", span_context) @@ -966,18 +976,6 @@ def parent_call(): for key in span_context._baggage: headers[_HTTP_BAGGAGE_PREFIX + key] = span_context._baggage[key] - if hasattr(ddtrace, "tracer") and hasattr(ddtrace.tracer, "sample"): - if non_active_span is not None: - if _is_top_level(non_active_span): - root_span = non_active_span - else: - root_span = non_active_span._local_root # type: ignore - else: - root_span = ddtrace.tracer.current_root_span() # type: ignore - - if root_span is not None and root_span.context.sampling_priority is None: - ddtrace.tracer.sample(root_span) - if PROPAGATION_STYLE_DATADOG in config._propagation_style_inject: _DatadogMultiHeader._inject(span_context, headers) if PROPAGATION_STYLE_B3_MULTI in config._propagation_style_inject: diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 6c40c49fa41..f9020a3ae09 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -82,8 +82,7 @@ context:: be finished in the same execution. The span context can be used to continue a trace in a different execution by passing it and activating it on the other end. Note that in all instances of crossing into another - execution, - sampling should be run manually before entering the new execution + execution, sampling should be run manually before entering the new execution to ensure that the sampling decision is the same across the trace. This can be done using `tracer.sample(tracer.current_root_span())` @@ -238,9 +237,11 @@ To trace requests across hosts, the spans on the secondary hosts must be linked - On the server side, it means to read propagated attributes and set them to the active tracing context. - On the client side, it means to propagate the attributes, commonly as a header/metadata. -`ddtrace` already provides default propagators but you can also implement your own. If utilizing your own propagator -make sure to run `tracer.sample(tracer.current_root_span())` before propagating downstream, -to ensure that the sampling decision is the same across the trace. +`ddtrace` already provides default propagators but you can also implement your own. Note that `ddtrace`` makes +use of lazy sampling, essentially making the sampling decision for a trace at the latest possible moment. This +includes before making an outgoing request via HTTP, gRPC, or a DB call for any automatically instrumented +integration. If utilizing your own propagator make sure to run `tracer.sample(tracer.current_root_span())` +before propagating downstream, to ensure that the sampling decision is the same across the trace. Web Frameworks ^^^^^^^^^^^^^^ diff --git a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml index 8efa4ecdf25..e4398d906cf 100644 --- a/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml +++ b/releasenotes/notes/lazy_sampling-93057adeaccbb46f.yaml @@ -11,7 +11,8 @@ prelude: > to other Datadog components (downstream instrumented services, databases, or execution context changes), and rely on the Python tracer to make the sampling decision (don't have an upstream service doing this), they will need to manually run the sampler for those traces, or use ``HttpPropagator.inject()``. - For more information please see: + For more information please see the following: + https://ddtrace.readthedocs.io/en/stable/advanced_usage.html#distributed-tracing https://ddtrace.readthedocs.io/en/stable/advanced_usage.html#tracing-context-management features: - | From 3ccd8ae8ad79d4c3f67b11011bdbf803bd36cf8f Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Sun, 24 Mar 2024 18:06:31 -0400 Subject: [PATCH 65/69] add special casing for floats that have non-zero decimal --- ddtrace/sampling_rule.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ddtrace/sampling_rule.py b/ddtrace/sampling_rule.py index 49135fe4df5..5effac17648 100644 --- a/ddtrace/sampling_rule.py +++ b/ddtrace/sampling_rule.py @@ -168,6 +168,17 @@ def check_tags(self, meta, metrics): # If the value doesn't match in meta, check the metrics if tag_match is False: value = metrics.get(tag_key) + # Floats: Matching floating point values with a non-zero decimal part is not supported. + # For floating point values with a non-zero decimal part, any all * pattern always returns true. + # Other patterns always return false. + if isinstance(value, float): + if not value.is_integer(): + if self._tag_value_matchers[tag_key].pattern == "*": + tag_match = True + else: + return False + continue + tag_match = self._tag_value_matchers[tag_key].match(str(value)) else: continue From 689c59351ab96a53b8342f0743689d6e766e6373 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Sun, 24 Mar 2024 18:33:18 -0400 Subject: [PATCH 66/69] add special float case tests --- tests/integration/test_sampling.py | 22 +++++++++++++++ ...pling_float_special_case_do_not_match.json | 26 ++++++++++++++++++ ...ampling_float_special_case_match_star.json | 27 +++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 tests/snapshots/test_extended_sampling_float_special_case_do_not_match.json create mode 100644 tests/snapshots/test_extended_sampling_float_special_case_match_star.json diff --git a/tests/integration/test_sampling.py b/tests/integration/test_sampling.py index 95f22fd4138..ba009e03f72 100644 --- a/tests/integration/test_sampling.py +++ b/tests/integration/test_sampling.py @@ -275,3 +275,25 @@ def test_extended_sampling_tags_and_name_glob(writer, tracer): tracer._tags = {"banana": "True"} tracer.trace("should_send1").finish() tracer.trace(name="mycoolname").finish() + + +@snapshot_parametrized_with_writers +def test_extended_sampling_float_special_case_do_not_match(writer, tracer): + """A float with a non-zero decimal and a tag with a non-* pattern + # should not match the rule, and should therefore be kept + """ + sampler = DatadogSampler(rules=[SamplingRule(0, tags={"tag": "2*"})]) + tracer.configure(sampler=sampler, writer=writer) + with tracer.trace(name="should_send") as span: + span.set_tag("tag", 20.1) + + +@snapshot_parametrized_with_writers +def test_extended_sampling_float_special_case_match_star(writer, tracer): + """A float with a non-zero decimal and a tag with a * pattern + # should match the rule, and should therefore should be dropped + """ + sampler = DatadogSampler(rules=[SamplingRule(0, tags={"tag": "*"})]) + tracer.configure(sampler=sampler, writer=writer) + with tracer.trace(name="should_send") as span: + span.set_tag("tag", 20.1) diff --git a/tests/snapshots/test_extended_sampling_float_special_case_do_not_match.json b/tests/snapshots/test_extended_sampling_float_special_case_do_not_match.json new file mode 100644 index 00000000000..8fe12f17645 --- /dev/null +++ b/tests/snapshots/test_extended_sampling_float_special_case_do_not_match.json @@ -0,0 +1,26 @@ +[[ + { + "name": "should_send", + "service": "", + "resource": "should_send", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "6600aa0600000000", + "language": "python", + "runtime-id": "8eef084f89104256b5f82b95c235139c" + }, + "metrics": { + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 53744, + "tag": 20.1 + }, + "duration": 27083, + "start": 1711319558921815630 + }]] diff --git a/tests/snapshots/test_extended_sampling_float_special_case_match_star.json b/tests/snapshots/test_extended_sampling_float_special_case_match_star.json new file mode 100644 index 00000000000..3e39035636d --- /dev/null +++ b/tests/snapshots/test_extended_sampling_float_special_case_match_star.json @@ -0,0 +1,27 @@ +[[ + { + "name": "should_send", + "service": "", + "resource": "should_send", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-3", + "_dd.p.tid": "6600aa0700000000", + "language": "python", + "runtime-id": "8eef084f89104256b5f82b95c235139c" + }, + "metrics": { + "_dd.rule_psr": 0.0, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": -1, + "process_id": 53744, + "tag": 20.1 + }, + "duration": 25000, + "start": 1711319559844282297 + }]] From 1419524873442006f0585029a710c830dab76be2 Mon Sep 17 00:00:00 2001 From: ZStriker19 Date: Mon, 25 Mar 2024 11:15:19 -0400 Subject: [PATCH 67/69] fix forksafe for profiling by copying list --- ddtrace/internal/forksafe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/internal/forksafe.py b/ddtrace/internal/forksafe.py index f17376aa141..3bd6891f3ee 100644 --- a/ddtrace/internal/forksafe.py +++ b/ddtrace/internal/forksafe.py @@ -40,7 +40,7 @@ def has_forked(): def run_hooks(registry): # type: (typing.List[typing.Callable[[], None]]) -> None - for hook in registry: + for hook in list(registry): try: hook() except Exception: From 3659dab92fc43fe04fbb5e126a8010119b276c4e Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Mon, 25 Mar 2024 13:00:30 -0400 Subject: [PATCH 68/69] Update ddtrace/propagation/http.py Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> --- ddtrace/propagation/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 72e0e1f0b16..9a2489cadd4 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -948,7 +948,7 @@ def parent_call(): """ if non_active_span is not None and non_active_span.context is not span_context: log.error( - "span_context and non_active_span.context are not the same, but should be, non_active_span.context " + "span_context and non_active_span.context are not the same, but should be. non_active_span.context " "will be used to generate distributed tracing headers. span_context: {}, non_active_span.context: {}", span_context, non_active_span.context, From 98994f1f800c6551ff1d4457d222f629dc077a40 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:22:52 -0400 Subject: [PATCH 69/69] Update docs/advanced_usage.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: jordan gonzález <30836115+duncanista@users.noreply.github.com> --- docs/advanced_usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index f9020a3ae09..e9ca0a7ab2f 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -237,7 +237,7 @@ To trace requests across hosts, the spans on the secondary hosts must be linked - On the server side, it means to read propagated attributes and set them to the active tracing context. - On the client side, it means to propagate the attributes, commonly as a header/metadata. -`ddtrace` already provides default propagators but you can also implement your own. Note that `ddtrace`` makes +`ddtrace` already provides default propagators but you can also implement your own. Note that `ddtrace` makes use of lazy sampling, essentially making the sampling decision for a trace at the latest possible moment. This includes before making an outgoing request via HTTP, gRPC, or a DB call for any automatically instrumented integration. If utilizing your own propagator make sure to run `tracer.sample(tracer.current_root_span())`