[core] Enable priority sampling by default (#774)

* Enable priority sampling by default * fix broken tests * fix linting * fix pylons test * fix logic to set tracer.priority_sampler * fix pylons test * fix priority sampling docs
DataDog · Dec 12, 2018 · 445d327 · 445d327
1 parent a0bdd48
commit 445d327
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 27 deletions.
diff --git a/ddtrace/tracer.py b/ddtrace/tracer.py
@@ -98,7 +98,7 @@ def configure(self, enabled=None, hostname=None, port=None, sampler=None,
             ``Tracer.wrap()``. This is an advanced option that usually doesn't need to be changed
             from the default value
         :param priority_sampling: enable priority sampling, this is required for
-            complete distributed tracing support.
+            complete distributed tracing support. Enabled by default.
         """
         if enabled is not None:
             self.enabled = enabled
@@ -110,8 +110,12 @@ def configure(self, enabled=None, hostname=None, port=None, sampler=None,
         if sampler is not None:
             self.sampler = sampler
 
-        if priority_sampling:
+        # If priority sampling is not set or is True and no priority sampler is set yet
+        if priority_sampling in (None, True) and not self.priority_sampler:
             self.priority_sampler = RateByServiceSampler()
+        # Explicitly disable priority sampling
+        elif priority_sampling is False:
+            self.priority_sampler = None
 
         if hostname is not None or port is not None or filters is not None or \
                 priority_sampling is not None:

diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
@@ -128,13 +128,10 @@ priority to the following values:
 - ``AUTO_REJECT``: the sampler automatically rejects the trace
 - ``AUTO_KEEP``: the sampler automatically keeps the trace
 
-Priority sampling is disabled by default. Enabling it ensures that your sampled
-distributed traces will be complete. To enable priority sampling::
-
-    tracer.configure(priority_sampling=True)
-
-Once enabled, the sampler will automatically assign a priority to your traces,
+Priority sampling is enabled by default.
+When enabled, the sampler will automatically assign a priority to your traces,
 depending on their service and volume.
+This ensures that your sampled distributed traces will be complete.
 
 You can also set this priority manually to either drop an uninteresting trace or
 to keep an important one.
@@ -323,7 +320,7 @@ for usage.
 +---------------------+---------------------------------------------------------+---------------+
 | `sampler`           | see `Sampling`_                                         | `AllSampler`  |
 +---------------------+---------------------------------------------------------+---------------+
-| `priority_sampling` | see `Priority Sampling`_                                | `False`       |
+| `priority_sampling` | see `Priority Sampling`_                                | `True`        |
 +---------------------+---------------------------------------------------------+---------------+
 | `settings`          | see `Advanced Usage`_                                   | `{}`          |
 +---------------------+---------------------------------------------------------+---------------+
@@ -448,7 +445,7 @@ The available environment variables for ``ddtrace-run`` are:
   ``localhost``)
 * ``DATADOG_TRACE_AGENT_PORT=8126``: override the port that the default tracer
   will submit to  (default: 8126)
-* ``DATADOG_PRIORITY_SAMPLING`` (default: false): enables :ref:`Priority
+* ``DATADOG_PRIORITY_SAMPLING`` (default: true): enables :ref:`Priority
   Sampling`
 
 ``ddtrace-run`` respects a variety of common entrypoints for web applications:

diff --git a/tests/contrib/grpc/test_grpc.py b/tests/contrib/grpc/test_grpc.py
@@ -45,7 +45,14 @@ def test_insecure_channel(self):
         spans = writer.pop()
         eq_(len(spans), 1)
         span = spans[0]
-        eq_(response.message, 'x-datadog-trace-id=%d;x-datadog-parent-id=%d' % (span.trace_id, span.span_id))
+        eq_(
+            response.message,
+            (
+                # DEV: Priority sampling is enabled by default
+                'x-datadog-trace-id=%d;x-datadog-parent-id=%d;x-datadog-sampling-priority=1' %
+                (span.trace_id, span.span_id)
+            ),
+        )
         _check_span(span)
 
     def test_secure_channel(self):
@@ -59,11 +66,18 @@ def test_secure_channel(self):
         eq_(len(spans), 1)
 
         span = spans[0]
-        eq_(response.message, 'x-datadog-trace-id=%d;x-datadog-parent-id=%d' % (span.trace_id, span.span_id))
+        eq_(
+            response.message,
+            (
+                # DEV: Priority sampling is enabled by default
+                'x-datadog-trace-id=%d;x-datadog-parent-id=%d;x-datadog-sampling-priority=1' %
+                (span.trace_id, span.span_id)
+            ),
+        )
         _check_span(span)
 
     def test_priority_sampling(self):
-        self._tracer.configure(priority_sampling=True)
+        # DEV: Priority sampling is enabled by default
         # Setting priority sampling reset the writer, we need to re-override it
         self._tracer.writer = DummyWriter()
 

diff --git a/tests/contrib/pylons/test_pylons.py b/tests/contrib/pylons/test_pylons.py
@@ -289,7 +289,7 @@ def test_distributed_tracing_default(self):
 
         ok_(span.trace_id != 100)
         ok_(span.parent_id != 42)
-        ok_(span.get_metric(SAMPLING_PRIORITY_KEY) is None)
+        ok_(span.get_metric(SAMPLING_PRIORITY_KEY) != 2)
 
     def test_distributed_tracing_enabled(self):
         # ensure distributed tracing propagator is working

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -106,8 +106,8 @@ def test_worker_single_trace(self):
         self._wait_thread_flush()
         eq_(self.api._put.call_count, 1)
         # check and retrieve the right call
-        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.3/traces')
-        eq_(endpoint, '/v0.3/traces')
+        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.4/traces')
+        eq_(endpoint, '/v0.4/traces')
         eq_(len(payload), 1)
         eq_(len(payload[0]), 1)
         eq_(payload[0][0]['name'], 'client.testing')
@@ -124,8 +124,8 @@ def test_worker_multiple_traces(self):
         self._wait_thread_flush()
         eq_(self.api._put.call_count, 1)
         # check and retrieve the right call
-        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.3/traces')
-        eq_(endpoint, '/v0.3/traces')
+        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.4/traces')
+        eq_(endpoint, '/v0.4/traces')
         eq_(len(payload), 2)
         eq_(len(payload[0]), 1)
         eq_(len(payload[1]), 1)
@@ -143,8 +143,8 @@ def test_worker_single_trace_multiple_spans(self):
         self._wait_thread_flush()
         eq_(self.api._put.call_count, 1)
         # check and retrieve the right call
-        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.3/traces')
-        eq_(endpoint, '/v0.3/traces')
+        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.4/traces')
+        eq_(endpoint, '/v0.4/traces')
         eq_(len(payload), 1)
         eq_(len(payload[0]), 2)
         eq_(payload[0][0]['name'], 'client.testing')
@@ -208,8 +208,8 @@ def test_worker_filter_request(self):
         # Only the second trace should have been sent
         eq_(self.api._put.call_count, 1)
         # check and retrieve the right call
-        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.3/traces')
-        eq_(endpoint, '/v0.3/traces')
+        endpoint, payload = self._get_endpoint_payload(self.api._put.call_args_list, '/v0.4/traces')
+        eq_(endpoint, '/v0.4/traces')
         eq_(len(payload), 1)
         eq_(payload[0][0]['name'], 'testing.nonfilteredurl')
 

diff --git a/tests/test_sampler.py b/tests/test_sampler.py
@@ -74,7 +74,7 @@ def test_sample_rate_deviation(self):
         for sample_rate in [0.1, 0.25, 0.5, 1]:
             tracer = get_dummy_tracer()
             writer = tracer.writer
-            tracer.configure(sampler=AllSampler(), priority_sampling=True)
+            tracer.configure(sampler=AllSampler())
             # We need to set the writer because tracer.configure overrides it,
             # indeed, as we enable priority sampling, we must ensure the writer
             # is priority sampling aware and pass it a reference on the
@@ -128,7 +128,7 @@ def test_set_sample_rate_by_service(self):
         ]
 
         tracer = get_dummy_tracer()
-        tracer.configure(sampler=AllSampler(), priority_sampling=True)
+        tracer.configure(sampler=AllSampler())
         priority_sampler = tracer.priority_sampler
         for case in cases:
             priority_sampler.set_sample_rate_by_service(case)

diff --git a/tests/utils/tracer.py b/tests/utils/tracer.py
@@ -6,9 +6,10 @@
 class DummyWriter(AgentWriter):
     """DummyWriter is a small fake writer used for tests. not thread-safe."""
 
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         # original call
-        super(DummyWriter, self).__init__()
+        super(DummyWriter, self).__init__(*args, **kwargs)
+
         # dummy components
         self.spans = []
         self.traces = []
@@ -57,4 +58,17 @@ class DummyTracer(Tracer):
     """
     def __init__(self, *args, **kwargs):
         super(DummyTracer, self).__init__(*args, **kwargs)
-        self.writer = DummyWriter()
+        self._update_writer()
+
+    def _update_writer(self):
+        self.writer = DummyWriter(
+                hostname=self.writer.api.hostname,
+                port=self.writer.api.port,
+                filters=self.writer._filters,
+                priority_sampler=self.writer._priority_sampler,
+        )
+
+    def configure(self, *args, **kwargs):
+        super(DummyTracer, self).configure(*args, **kwargs)
+        # `.configure()` may reset the writer
+        self._update_writer()