Skip to content

Commit

Permalink
feat(tracing): sampling rate scope by plugin
Browse files Browse the repository at this point in the history
Sampling rate can now be set via the Opentelemetry plugin instead of
it just being a global setting for the gateway.
  • Loading branch information
samugi committed Nov 20, 2023
1 parent f2a8a86 commit d9a8f15
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 48 deletions.
5 changes: 5 additions & 0 deletions changelog/unreleased/kong/tracing-sampling-rate-scope.yml
@@ -0,0 +1,5 @@
message: >
Tracing Sampling Rate can now be set via the `config.sampling_rate` property
of the OpenTelemetry plugin instead of it just being a global setting for the gateway.
type: feature
scope: Plugin
7 changes: 7 additions & 0 deletions kong/clustering/compat/removed_fields.lua
Expand Up @@ -109,4 +109,11 @@ return {
"read_body_for_logout",
},
},

-- Any dataplane older than 3.6.0
[3006000000] = {
opentelemetry = {
"sampling_rate",
},
},
}
89 changes: 54 additions & 35 deletions kong/pdk/tracing.lua
Expand Up @@ -12,6 +12,7 @@ local new_tab = require "table.new"
local base = require "resty.core.base"
local utils = require "kong.tools.utils"
local phase_checker = require "kong.pdk.private.phases"
local tracing_context = require "kong.tracing.tracing_context"

local ngx = ngx
local type = type
Expand Down Expand Up @@ -64,34 +65,29 @@ local function generate_span_id()
return rand_bytes(8)
end

--- Build-in sampler
local function always_on_sampler()
return true
end

local function always_off_sampler()
return false
end

-- Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
-- spec: https://github.com/c24t/opentelemetry-specification/blob/3b3d321865cf46364bdfb292c179b6444dc96bf9/specification/sdk-tracing.md#probability-sampler-algorithm
local function get_trace_id_based_sampler(rate)
if type(rate) ~= "number" then
error("invalid fraction", 2)
end
local function get_trace_id_based_sampler(options_sampling_rate)
return function(trace_id, sampling_rate)
sampling_rate = sampling_rate or options_sampling_rate

if rate >= 1 then
return always_on_sampler
end
if type(sampling_rate) ~= "number" then
error("invalid fraction", 2)
end

if rate <= 0 then
return always_off_sampler
end
-- always on sampler
if sampling_rate >= 1 then
return true
end

-- always off sampler
if sampling_rate <= 0 then
return false
end

local bound = rate * BOUND_MAX
-- probability sampler
local bound = sampling_rate * BOUND_MAX

-- TODO: is this a sound method to sample?
return function(trace_id)
if #trace_id < SAMPLING_BYTE then
error(TOO_SHORT_MESSAGE, 2)
end
Expand Down Expand Up @@ -190,9 +186,6 @@ local function create_span(tracer, options)

elseif options.should_sample ~= nil then
sampled = options.should_sample

else
sampled = tracer and tracer.sampler(trace_id)
end

span.parent_id = span.parent and span.parent.span_id
Expand All @@ -201,17 +194,15 @@ local function create_span(tracer, options)
span.span_id = generate_span_id()
span.trace_id = trace_id
span.kind = options.span_kind or SPAN_KIND.INTERNAL
-- if sampled == nil, the sampling decision is delayed to the
-- reporting phase so plugins can apply the configured sampling rate
span.should_sample = sampled

setmetatable(span, span_mt)
return span
end

local function link_span(tracer, span, name, options)
if not span.should_sample then
kong.log.debug("skipping non-sampled span")
return
end
if tracer and type(tracer) ~= "table" then
error("invalid tracer", 2)
end
Expand Down Expand Up @@ -271,8 +262,8 @@ end
-- local time = ngx.now()
-- span:finish(time * 100000000)
function span_mt:finish(end_time_ns)
if self.end_time_ns ~= nil or not self.should_sample then
-- span is finished, and already processed or not sampled
if self.end_time_ns ~= nil then
-- span is finished, and already processed
return
end

Expand Down Expand Up @@ -523,20 +514,48 @@ local function new_tracer(name, options)
end

--- Update the value of should_sample for all spans
-- Uses a parent-based sampler when the parent has sampled flag == false
-- to inherit the decision to not record the span
--
-- When the span's should_sample is nil (sampling decision was delayed),
-- apply the probability-based should_sample decision
--
-- @function kong.tracing:set_should_sample
-- @tparam bool should_sample value for the sample parameter
function self:set_should_sample(should_sample)
-- @tparam bool parent_should_sample value of the parent span sampled flag
-- extracted from the incoming tracing headers
-- @tparam number sampling_rate the sampling rate to apply for the
-- probability sampler
-- @treturn bool sampled value of sampled for this trace
function self:set_should_sample(parent_should_sample, sampling_rate)
local ctx = ngx.ctx
if not ctx.KONG_SPANS then
return
return nil, "span list is empty"
end

local trace_id = tracing_context.get_raw_trace_id(ctx)
if not trace_id then
return nil, "trace ID is unknown"
end

local sampled = true
local root_span = ctx.KONG_SPANS[1]
-- use parent-based sampler if parent has sampled == false
if parent_should_sample == false then
sampled = false

-- use probability-based sampler if sampling decision was delayed
elseif root_span.should_sample == nil then
sampled = self.sampler(trace_id, sampling_rate)
end

for _, span in ipairs(ctx.KONG_SPANS) do
-- ignore noop spans
if span.is_recording ~= false then
span.should_sample = should_sample
span.should_sample = sampled
end
end

return sampled
end

tracer_memo[name] = setmetatable(self, tracer_mt)
Expand Down
19 changes: 12 additions & 7 deletions kong/plugins/opentelemetry/handler.lua
Expand Up @@ -94,7 +94,8 @@ end
function OpenTelemetryHandler:access(conf)
local headers = ngx_get_headers()
local root_span = ngx.ctx.KONG_SPANS and ngx.ctx.KONG_SPANS[1]
local tracer = kong.tracing.new("otel")
local tracer = kong.tracing.name == "noop" and kong.tracing.new("otel")
or kong.tracing

-- make propagation running with tracing instrumetation not enabled
if not root_span then
Expand All @@ -105,12 +106,7 @@ function OpenTelemetryHandler:access(conf)
end

local injected_parent_span = tracing_context.get_unlinked_span("balancer") or root_span

local header_type, trace_id, span_id, parent_id, should_sample, _ = propagation_parse(headers, conf.header_type)
if should_sample == false then
tracer:set_should_sample(should_sample)
injected_parent_span.should_sample = should_sample
end
local header_type, trace_id, span_id, parent_id, parent_sampled, _ = propagation_parse(headers, conf.header_type)

-- overwrite trace id
-- as we are in a chain of existing trace
Expand All @@ -122,6 +118,15 @@ function OpenTelemetryHandler:access(conf)
tracing_context.set_raw_trace_id(trace_id)
end

-- set_should_sample contains takes sampling decisions based on the value
-- of the trace id (probability sampler): call it after the trace_id is
-- updated to its final value
local sampled, err = tracer:set_should_sample(parent_sampled, conf.sampling_rate)
if err then
ngx_log(ngx_ERR, _log_prefix, "failed to set sampled flag: ", err)
end
injected_parent_span.should_sample = not not sampled

-- overwrite root span's parent_id
if span_id then
root_span.parent_id = span_id
Expand Down
8 changes: 8 additions & 0 deletions kong/plugins/opentelemetry/schema.lua
Expand Up @@ -59,6 +59,14 @@ return {
required = false,
default = "preserve",
one_of = { "preserve", "ignore", "b3", "b3-single", "w3c", "jaeger", "ot", "aws", "gcp" } } },
{ sampling_rate = {
description = "Tracing sampling rate to configure the probability-based samper with.",
type = "number",
gt = 0,
between = {0, 1},
required = false,
default = nil,
} },
},
entity_checks = {
{ custom_entity_check = {
Expand Down
2 changes: 2 additions & 0 deletions spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua
Expand Up @@ -212,6 +212,7 @@ describe("CP/DP config compat transformations #" .. strategy, function()
local expected_otel_prior_35 = utils.cycle_aware_deep_copy(opentelemetry)
expected_otel_prior_35.config.header_type = "preserve"
expected_otel_prior_35.config.sampling_rate = nil
do_assert(utils.uuid(), "3.4.0", expected_otel_prior_35)
-- cleanup
Expand All @@ -231,6 +232,7 @@ describe("CP/DP config compat transformations #" .. strategy, function()
local expected_otel_prior_34 = utils.cycle_aware_deep_copy(opentelemetry)
expected_otel_prior_34.config.header_type = "preserve"
expected_otel_prior_34.config.sampling_rate = nil
do_assert(utils.uuid(), "3.3.0", expected_otel_prior_34)
-- cleanup
Expand Down
Expand Up @@ -14,19 +14,19 @@ local _M = {

function _M:access(conf)
local headers = ngx.req.get_headers()
local tracer = kong.tracing.new("trace-propagator")
local tracer = kong.tracing.name == "noop" and kong.tracing.new("otel")
or kong.tracing
local root_span = ngx.ctx.KONG_SPANS and ngx.ctx.KONG_SPANS[1]
if not root_span then
root_span = tracer.start_span("root")
end
local injected_parent_span = tracing_context.get_unlinked_span("balancer") or root_span

local header_type, trace_id, span_id, parent_id, should_sample = propagation_parse(headers)
local header_type, trace_id, span_id, parent_id, parent_sampled = propagation_parse(headers)

local sampled = tracer:set_should_sample(parent_sampled, conf.sampling_rate)
injected_parent_span.should_sample = not not sampled

if should_sample == false then
tracer:set_should_sample(should_sample)
injected_parent_span.should_sample = should_sample
end

if trace_id then
injected_parent_span.trace_id = trace_id
Expand Down

0 comments on commit d9a8f15

Please sign in to comment.