diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index df45ec57dc8..9b31265477f 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -73,6 +73,23 @@ def _possible_header(header): _POSSIBLE_HTTP_HEADER_TRACESTATE = _possible_header(_HTTP_HEADER_TRACESTATE) +# https://www.w3.org/TR/trace-context/#traceparent-header-field-values +# Future proofing: The traceparent spec is additive, future traceparent versions may contain more than 4 values +# The regex below matches the version, trace id, span id, sample flag, and end-string/future values (if version>00) +_TRACEPARENT_HEX_REGEX = re.compile( + r""" + ^ # Start of string + ([a-f0-9]{2})- # 2 character hex version + ([a-f0-9]{32})- # 32 character hex trace id + ([a-f0-9]{16})- # 16 character hex span id + ([a-f0-9]{2}) # 2 character hex sample flag + (-.+)? # optional, start of any additional values + $ # end of string + """, + re.VERBOSE, +) + + def _extract_header_value(possible_header_names, headers, default=None): # type: (FrozenSet[str], Dict[str, str], Optional[str]) -> Optional[str] for header in possible_header_names: @@ -550,35 +567,41 @@ def _get_traceparent_values(tp): Otherwise we extract the trace-id, span-id, and sampling priority from the traceparent header. """ + valid_tp_values = _TRACEPARENT_HEX_REGEX.match(tp.strip()) + if valid_tp_values is None: + raise ValueError("Invalid traceparent version: %s" % tp) + + ( + version, + trace_id_hex, + span_id_hex, + trace_flags_hex, + future_vals, + ) = valid_tp_values.groups() # type: Tuple[str, str, str, str, Optional[str]] - version, trace_id_hex, span_id_hex, trace_flags_hex = tp.strip().split("-") - # check version is a valid hexadecimal, if not it's invalid we will move on to the next prop method - int(version, 16) - # https://www.w3.org/TR/trace-context/#version if version == "ff": - raise ValueError("'ff' is an invalid traceparent version") - # currently 00 is the only version format, but if future versions come up we may need to add changes - if version != "00": + # https://www.w3.org/TR/trace-context/#version + raise ValueError("ff is an invalid traceparent version: %s" % tp) + elif version != "00": + # currently 00 is the only version format, but if future versions come up we may need to add changes log.warning("unsupported traceparent version:%r, still attempting to parse", version) + elif version == "00" and future_vals is not None: + raise ValueError("Traceparents with the version `00` should contain 4 values delimited by a dash: %s" % tp) - if len(trace_id_hex) == 32 and len(span_id_hex) == 16 and len(trace_flags_hex) >= 2: - trace_id = _hex_id_to_dd_id(trace_id_hex) - span_id = _hex_id_to_dd_id(span_id_hex) + trace_id = _hex_id_to_dd_id(trace_id_hex) + span_id = _hex_id_to_dd_id(span_id_hex) - # All 0s are invalid values - if trace_id == 0: - raise ValueError("0 value for trace_id is invalid") - if span_id == 0: - raise ValueError("0 value for span_id is invalid") + # All 0s are invalid values + if trace_id == 0: + raise ValueError("0 value for trace_id is invalid") + if span_id == 0: + raise ValueError("0 value for span_id is invalid") - trace_flags = _hex_id_to_dd_id(trace_flags_hex) - # there's currently only one trace flag, which denotes sampling priority - # was set to keep "01" or drop "00" - # trace flags is a bit field: https://www.w3.org/TR/trace-context/#trace-flags - sampling_priority = trace_flags & 0x1 - - else: - raise ValueError("W3C traceparent hex length incorrect: %s" % tp) + trace_flags = _hex_id_to_dd_id(trace_flags_hex) + # there's currently only one trace flag, which denotes sampling priority + # was set to keep "01" or drop "00" + # trace flags is a bit field: https://www.w3.org/TR/trace-context/#trace-flags + sampling_priority = trace_flags & 0x1 return trace_id, span_id, sampling_priority @@ -644,10 +667,6 @@ def _extract(headers): if tp is None: log.debug("no traceparent header") return None - # uppercase char in tp makes it invalid: - # https://www.w3.org/TR/trace-context/#traceparent-header-field-values - if not tp.islower(): - raise ValueError("uppercase characters are not allowed in traceparent") trace_id, span_id, sampling_priority = _TraceContext._get_traceparent_values(tp) except (ValueError, AssertionError): log.exception("received invalid w3c traceparent: %s ", tp) diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py index 3946a3b5fea..d91b7cd1d52 100644 --- a/tests/tracer/test_propagation.py +++ b/tests/tracer/test_propagation.py @@ -444,34 +444,52 @@ def test_tracecontext_get_sampling_priority(sampling_priority_tp, sampling_prior None, ), ( - "01-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", + "01-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01-what-the-future-looks-like", # tp, trace_id, span_id, sampling_priority (11803532876627986230, 67667974448284343, 1), ["unsupported traceparent version:'01', still attempting to parse"], None, ), ( - "0-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", + "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01-v00-can-not-have-future-values", # tp, trace_id, span_id, sampling_priority (11803532876627986230, 67667974448284343, 1), - ["unsupported traceparent version:'0', still attempting to parse"], + [], + ValueError, + ), + ( + "0-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", + # tp, trace_id, span_id, sampling_priority + None, + [], + ValueError, + ), + ( + "ff-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", + # tp, trace_id, span_id, sampling_priority None, + [], + ValueError, + ), + ( + "00-4BF92K3577B34dA6C3ce929d0e0e4736-00f067aa0ba902b7-01", + # tp, trace_id, span_id, sampling_priority + None, + [], + ValueError, ), ( "00-f92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", # tp, trace_id, span_id, sampling_priority None, - [ - "received invalid w3c traceparent: 00-f92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01.", - "W3C traceparent hex length incorrect: 00-f92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01", - ], + [], ValueError, ), ( # we still parse the trace flag and analyze the it as a bit field "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-02", # tp, trace_id, span_id, sampling_priority (11803532876627986230, 67667974448284343, 0), - None, + [], None, ), ], @@ -481,7 +499,10 @@ def test_tracecontext_get_sampling_priority(sampling_priority_tp, sampling_prior "invalid_0_value_for_span_id", "traceflag_00", "unsupported_version", + "version_00_with_unsupported_trailing_values", "short_version", + "invalid_version", + "traceparent_contains_uppercase_chars", "short_trace_id", "unknown_trace_flag", ], @@ -490,14 +511,14 @@ def test_extract_traceparent(caplog, headers, expected_tuple, expected_logging, with caplog.at_level(logging.DEBUG): if expected_exception: with pytest.raises(expected_exception): - traceparent_values = _TraceContext._get_traceparent_values(headers) - assert traceparent_values == expected_tuple + _TraceContext._get_traceparent_values(headers) else: traceparent_values = _TraceContext._get_traceparent_values(headers) assert traceparent_values == expected_tuple - if caplog.text or expected_logging: - for expected_log in expected_logging: - assert expected_log in caplog.text + + if caplog.text or expected_logging: + for expected_log in expected_logging: + assert expected_log in caplog.text @pytest.mark.parametrize(