diff --git a/ddtrace/contrib/trace_utils.py b/ddtrace/contrib/trace_utils.py index fbf519918e9..fe6de594cfa 100644 --- a/ddtrace/contrib/trace_utils.py +++ b/ddtrace/contrib/trace_utils.py @@ -23,6 +23,7 @@ from ddtrace.ext import http from ddtrace.ext import user from ddtrace.internal import _context +from ddtrace.internal.compat import parse from ddtrace.internal.compat import six from ddtrace.internal.logger import get_logger from ddtrace.internal.utils.cache import cached @@ -280,6 +281,29 @@ def _store_response_headers(headers, span, integration_config): _store_headers(headers, span, integration_config, RESPONSE) +def _sanitized_url(url): + # type: (str) -> str + """ + Sanitize url by removing parts with potential auth info + """ + if "@" in url: + parsed = parse.urlparse(url) + netloc = parsed.netloc + netloc = netloc[netloc.index("@") + 1 :] + return parse.urlunparse( + ( + parsed.scheme, + netloc, + parsed.path, + "", + parsed.query, + "", + ) + ) + + return url + + def with_traced_module(func): """Helper for providing tracing essentials (module and pin) for tracing wrappers. @@ -417,6 +441,8 @@ def set_http_meta( span.set_tag_str(http.METHOD, method) if url is not None: + url = _sanitized_url(url) + if integration_config.http_tag_query_string: # Tagging query string in http.url if config.global_query_string_obfuscation_disabled: # No redacting of query strings span.set_tag_str(http.URL, url) diff --git a/releasenotes/notes/fix-http-url-200fca099f41ce49.yaml b/releasenotes/notes/fix-http-url-200fca099f41ce49.yaml new file mode 100644 index 00000000000..679a06fa539 --- /dev/null +++ b/releasenotes/notes/fix-http-url-200fca099f41ce49.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + This fix removes unintended url parts in the ``http.url`` tag. diff --git a/tests/contrib/aiohttp/test_aiohttp_client.py b/tests/contrib/aiohttp/test_aiohttp_client.py index 10264dd0cfe..6c5443cbecf 100644 --- a/tests/contrib/aiohttp/test_aiohttp_client.py +++ b/tests/contrib/aiohttp/test_aiohttp_client.py @@ -16,7 +16,9 @@ PORT = HTTPBIN_CONFIG["port"] SOCKET = "{}:{}".format(HOST, PORT) URL = "http://{}".format(SOCKET) +URL_AUTH = "http://user:pass@{}".format(SOCKET) URL_200 = "{}/status/200".format(URL) +URL_AUTH_200 = "{}/status/200".format(URL_AUTH) URL_500 = "{}/status/500".format(URL) @@ -35,6 +37,14 @@ async def test_200_request(snapshot_context): assert resp.status == 200 +@pytest.mark.asyncio +async def test_auth_200_request(snapshot_context): + with snapshot_context(): + async with aiohttp.ClientSession() as session: + async with session.get(URL_AUTH_200) as resp: + assert resp.status == 200 + + @pytest.mark.asyncio async def test_200_request_post(snapshot_context): with snapshot_context(): diff --git a/tests/contrib/requests/test_requests.py b/tests/contrib/requests/test_requests.py index e48ed885e9b..0c9b5a6e9b8 100644 --- a/tests/contrib/requests/test_requests.py +++ b/tests/contrib/requests/test_requests.py @@ -30,6 +30,7 @@ SOCKET = "httpbin.org" URL_200 = "http://{}/status/200".format(SOCKET) URL_500 = "http://{}/status/500".format(SOCKET) +URL_AUTH_200 = "http://user:pass@{}/status/200".format(SOCKET) class BaseRequestTestCase(object): @@ -125,6 +126,13 @@ def test_200(self): assert s.span_type == "http" assert http.QUERY_STRING not in s.get_tags() + def test_auth_200(self): + self.session.get(URL_AUTH_200) + spans = self.pop_spans() + assert len(spans) == 1 + s = spans[0] + assert s.get_tag(http.URL) == URL_200 + def test_200_send(self): # when calling send directly req = requests.Request(url=URL_200, method="GET") diff --git a/tests/snapshots/tests.contrib.aiohttp.test_aiohttp_client.test_auth_200_request.json b/tests/snapshots/tests.contrib.aiohttp.test_aiohttp_client.test_auth_200_request.json new file mode 100644 index 00000000000..c035f80daa1 --- /dev/null +++ b/tests/snapshots/tests.contrib.aiohttp.test_aiohttp_client.test_auth_200_request.json @@ -0,0 +1,37 @@ +[[ + { + "name": "aiohttp.request", + "service": null, + "resource": "aiohttp.request", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "http", + "meta": { + "_dd.p.dm": "-0", + "http.method": "GET", + "http.status_code": "200", + "http.status_msg": "OK", + "http.url": "http://localhost:8001/status/200", + "runtime-id": "7a569f0d94574038b2b11ee3579d0936" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "system.pid": 57194 + }, + "duration": 8933000, + "start": 1673646330398679000 + }, + { + "name": "TCPConnector.connect", + "service": null, + "resource": "TCPConnector.connect", + "trace_id": 0, + "span_id": 2, + "parent_id": 1, + "duration": 1426000, + "start": 1673646330399204000 + }]] diff --git a/tests/tracer/test_trace_utils.py b/tests/tracer/test_trace_utils.py index a1e5459b36d..ec8412fd708 100644 --- a/tests/tracer/test_trace_utils.py +++ b/tests/tracer/test_trace_utils.py @@ -336,6 +336,9 @@ def test_ext_service(int_config, pin, config_val, default, expected): {"id": "val", "name": "vlad"}, None, ), + ("GET", "http://user:pass@localhost/", 0, None, None, None, None, None, None, None), + ("GET", "http://user@localhost/", 0, None, None, None, None, None, None, None), + ("GET", "http://user:pass@localhost/api?q=test", 0, None, None, None, None, None, None, None), ], ) def test_set_http_meta( @@ -376,10 +379,16 @@ def test_set_http_meta( assert http.METHOD not in span.get_tags() if url is not None: + if url.startswith("http://user"): + # Remove any userinfo that may be in the original url + expected_url = url[: url.index(":")] + "://" + url[url.index("@") + 1 :] + else: + expected_url = url + if query and int_config.trace_query_string: - assert span.get_tag(http.URL) == stringify(url + "?" + query) + assert span.get_tag(http.URL) == stringify(expected_url + "?" + query) else: - assert span.get_tag(http.URL) == stringify(url) + assert span.get_tag(http.URL) == stringify(expected_url) else: assert http.URL not in span.get_tags()