Skip to content
Permalink
Browse files

Add flag reporting-zipkin-sample-rate (pantsbuild#7211)

### Problem
In the current implementation, every time the pants command is run with zipkin tracing turned on all the zipkin traces will be collected. It is not very convenient when the number of runs is very big. 

### Solution
Possibility to set the sample rate will allow us to have the number of traces that fits the constraints of Zipkin server. 

### Result
A flag `reporting-zipkin-sample-rate` was added that sets the sample rate at which to sample Zipkin traces. If flags `reporting-zipkin-trace-id` and `reporting-zipkin-parent-id` are set the sample rate will always be 100.0 (no matter what is set in `reporting-zipkin-sample-rate` flag).
  • Loading branch information...
cattibrie authored and illicitonion committed Feb 6, 2019
1 parent 95638d3 commit b08c1fd225da1a678c022602888a5708e656f87e
@@ -60,6 +60,8 @@ def register_options(cls, register):
help='The 64-bit ID for a parent span that invokes Pants. '
'zipkin-trace-id and zipkin-parent-id must both either be set or not set '
'when run Pants command')
register('--zipkin-sample-rate', advanced=True, default=100.0,
help='Rate at which to sample Zipkin traces. Value 0.0 - 100.0.')

def initialize(self, run_tracker, all_options, start_time=None):
"""Initialize with the given RunTracker.
@@ -100,6 +102,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
zipkin_endpoint = self.get_options().zipkin_endpoint
trace_id = self.get_options().zipkin_trace_id
parent_id = self.get_options().zipkin_parent_id
sample_rate = self.get_options().zipkin_sample_rate

if zipkin_endpoint is None and trace_id is not None and parent_id is not None:
raise ValueError(
@@ -113,7 +116,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
if zipkin_endpoint is not None:
zipkin_reporter_settings = ZipkinReporter.Settings(log_level=Report.INFO)
zipkin_reporter = ZipkinReporter(
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id, sample_rate
)
report.add_reporter('zipkin', zipkin_reporter)

@@ -10,7 +10,7 @@
from py_zipkin import Encoding
from py_zipkin.transport import BaseTransportHandler
from py_zipkin.util import generate_random_64bit_string
from py_zipkin.zipkin import ZipkinAttrs, zipkin_span
from py_zipkin.zipkin import ZipkinAttrs, create_attrs_for_span, zipkin_span

from pants.base.workunit import WorkUnitLabel
from pants.reporting.reporter import Reporter
@@ -42,7 +42,7 @@ class ZipkinReporter(Reporter):
Reporter that implements Zipkin tracing.
"""

def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id, sample_rate):
"""
When trace_id and parent_id are set a Zipkin trace will be created with given trace_id
and parent_id. If trace_id and parent_id are set to None, a trace_id will be randomly
@@ -53,6 +53,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
:param string endpoint: The full HTTP URL of a zipkin server to which traces should be posted.
:param string trace_id: The overall 64 or 128-bit ID of the trace. May be None.
:param string parent_id: The 64-bit ID for a parent span that invokes Pants. May be None.
:param float sample_rate: Rate at which to sample Zipkin traces. Value 0.0 - 100.0.
"""
super(ZipkinReporter, self).__init__(run_tracker, settings)
# We keep track of connection between workunits and spans
@@ -61,6 +62,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
self.handler = HTTPTransportHandler(endpoint)
self.trace_id = trace_id
self.parent_id = parent_id
self.sample_rate = float(sample_rate)

def start_workunit(self, workunit):
"""Implementation of Reporter callback."""
@@ -84,13 +86,14 @@ def start_workunit(self, workunit):
is_sampled=True,
)
else:
zipkin_attrs = None
zipkin_attrs = create_attrs_for_span(
sample_rate=self.sample_rate, # Value between 0.0 and 100.0
)

span = zipkin_span(
service_name=service_name,
span_name=workunit.name,
transport_handler=self.handler,
sample_rate=100.0, # Value between 0.0 and 100.0
encoding=Encoding.V1_THRIFT,
zipkin_attrs=zipkin_attrs
)
@@ -104,7 +107,7 @@ def start_workunit(self, workunit):
# Goals and tasks save their start time at the beginning of their run.
# This start time is passed to workunit, because the workunit may be created much later.
span.start_timestamp = workunit.start_time
if first_span:
if first_span and span.zipkin_attrs.is_sampled:
span.logging_context.start_timestamp = workunit.start_time

def end_workunit(self, workunit):
@@ -220,6 +220,23 @@ def test_zipkin_reporter_with_given_trace_id_parent_id(self):
self.assertTrue(main_children)
self.assertTrue(any(span['name'] == 'cloc' for span in main_children))

def test_zipkin_reporter_with_zero_sample_rate(self):
ZipkinHandler = zipkin_handler()
with http_server(ZipkinHandler) as port:
endpoint = "http://localhost:{}".format(port)
command = [
'--reporting-zipkin-endpoint={}'.format(endpoint),
'--reporting-zipkin-sample-rate=0.0',
'cloc',
'src/python/pants:version'
]

pants_run = self.run_pants(command)
self.assert_success(pants_run)

num_of_traces = len(ZipkinHandler.traces)
self.assertEqual(num_of_traces, 0)

@staticmethod
def find_spans_by_name(trace, name):
return [span for span in trace if span['name'] == name]

0 comments on commit b08c1fd

Please sign in to comment.
You can’t perform that action at this time.