From c381bd289acf536645b068fbb2edf95e6a0c339f Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Fri, 2 Nov 2018 09:34:05 -0400 Subject: [PATCH 1/6] [httplib] Strip all but path from url --- ddtrace/compat.py | 4 +++- ddtrace/contrib/httplib/patch.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ddtrace/compat.py b/ddtrace/compat.py index 9b8e89de21c..19f818f9cc7 100644 --- a/ddtrace/compat.py +++ b/ddtrace/compat.py @@ -1,5 +1,5 @@ -import sys import platform +import sys PYTHON_VERSION_INFO = sys.version_info PY2 = sys.version_info[0] == 2 @@ -12,6 +12,7 @@ if PY2: from urllib import urlencode + from urlparse import urlparse import httplib stringify = unicode from Queue import Queue @@ -22,6 +23,7 @@ else: from queue import Queue from urllib.parse import urlencode + from urllib.parse import urlparse import http.client as httplib from io import StringIO diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index d87dc7ce7f5..98d8482c264 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -5,12 +5,11 @@ import wrapt # Project -from ...compat import httplib, PY2 +from ...compat import PY2, httplib, urlparse from ...ext import http as ext_http from ...pin import Pin from ...utils.wrappers import unwrap as _u - span_name = 'httplib.request' if PY2 else 'http.client.request' log = logging.getLogger(__name__) @@ -60,9 +59,14 @@ def _wrap_putrequest(func, instance, args, kwargs): method, path = args[:2] scheme = 'https' if isinstance(instance, httplib.HTTPSConnection) else 'http' port = ':{port}'.format(port=instance.port) + + # strip path of non-path data + path = urlparse(path).path + if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443): port = '' url = '{scheme}://{host}{port}{path}'.format(scheme=scheme, host=instance.host, port=port, path=path) + span.set_tag(ext_http.URL, url) span.set_tag(ext_http.METHOD, method) except Exception: From 3a302b97c1ebb057393806ef0d0b9c761d7997a0 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Fri, 2 Nov 2018 10:02:45 -0400 Subject: [PATCH 2/6] [httplib] Fix tests --- ddtrace/compat.py | 2 -- ddtrace/contrib/httplib/patch.py | 4 ++-- tests/contrib/httplib/test_httplib.py | 7 +++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ddtrace/compat.py b/ddtrace/compat.py index 19f818f9cc7..be875ff7d39 100644 --- a/ddtrace/compat.py +++ b/ddtrace/compat.py @@ -12,7 +12,6 @@ if PY2: from urllib import urlencode - from urlparse import urlparse import httplib stringify = unicode from Queue import Queue @@ -23,7 +22,6 @@ else: from queue import Queue from urllib.parse import urlencode - from urllib.parse import urlparse import http.client as httplib from io import StringIO diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index 98d8482c264..f34b42d8df8 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -5,7 +5,7 @@ import wrapt # Project -from ...compat import PY2, httplib, urlparse +from ...compat import PY2, httplib, parse from ...ext import http as ext_http from ...pin import Pin from ...utils.wrappers import unwrap as _u @@ -61,7 +61,7 @@ def _wrap_putrequest(func, instance, args, kwargs): port = ':{port}'.format(port=instance.port) # strip path of non-path data - path = urlparse(path).path + path = parse.urlparse(path).path if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443): port = '' diff --git a/tests/contrib/httplib/test_httplib.py b/tests/contrib/httplib/test_httplib.py index 81c6e339744..e8b21c300d4 100644 --- a/tests/contrib/httplib/test_httplib.py +++ b/tests/contrib/httplib/test_httplib.py @@ -7,16 +7,15 @@ import wrapt # Project -from ddtrace.compat import httplib, PY2 +from ddtrace.compat import PY2, httplib from ddtrace.contrib.httplib import patch, unpatch from ddtrace.contrib.httplib.patch import should_skip_request from ddtrace.pin import Pin - from tests.opentracer.utils import init_tracer + from ...test_tracer import get_dummy_tracer from ...util import assert_dict_issuperset, override_global_tracer - if PY2: from urllib2 import urlopen, build_opener, Request else: @@ -242,7 +241,7 @@ def test_httplib_request_get_request_query_string(self): { 'http.method': 'GET', 'http.status_code': '200', - 'http.url': '{}?key=value&key2=value2'.format(URL_200), + 'http.url': '{}'.format(URL_200), } ) From 83a89a3227643119c53811c8fe32c4d25d3ef638 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Fri, 2 Nov 2018 15:12:08 -0400 Subject: [PATCH 3/6] [requests] Sanitize url --- ddtrace/contrib/requests/connection.py | 19 +++++++++++++------ tests/contrib/requests/test_requests.py | 24 +++++++++++++++++++----- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/ddtrace/contrib/requests/connection.py b/ddtrace/contrib/requests/connection.py index 8d18c2fbb82..6444492251b 100644 --- a/ddtrace/contrib/requests/connection.py +++ b/ddtrace/contrib/requests/connection.py @@ -1,14 +1,12 @@ import logging -import ddtrace +import ddtrace from ddtrace import config -from .constants import DEFAULT_SERVICE - -from ...ext import http from ...compat import parse +from ...ext import http from ...propagation.http import HTTPPropagator - +from .constants import DEFAULT_SERVICE log = logging.getLogger(__name__) @@ -55,6 +53,15 @@ def _wrap_request(func, instance, args, kwargs): url = kwargs.get('url') or args[1] headers = kwargs.get('headers', {}) parsed_uri = parse.urlparse(url) + # sanitize url of query + sanitized_url = parse.urlunparse(( + parsed_uri.scheme, + parsed_uri.netloc, + parsed_uri.path, + parsed_uri.params, + None, # drop parsed_uri.query + parsed_uri.fragment + )) with tracer.trace("requests.request", span_type=http.TYPE) as span: # update the span service name before doing any action @@ -73,7 +80,7 @@ def _wrap_request(func, instance, args, kwargs): finally: try: span.set_tag(http.METHOD, method.upper()) - span.set_tag(http.URL, url) + span.set_tag(http.URL, sanitized_url) if response is not None: span.set_tag(http.STATUS_CODE, response.status_code) # `span.error` must be an integer diff --git a/tests/contrib/requests/test_requests.py b/tests/contrib/requests/test_requests.py index f3a01ae8dd9..6836da74920 100644 --- a/tests/contrib/requests/test_requests.py +++ b/tests/contrib/requests/test_requests.py @@ -1,17 +1,17 @@ import unittest -import requests +import requests from requests import Session from requests.exceptions import MissingSchema -from nose.tools import eq_, assert_raises from ddtrace import config -from ddtrace.ext import http, errors from ddtrace.contrib.requests import patch, unpatch - +from ddtrace.ext import errors, http +from nose.tools import assert_raises, eq_ from tests.opentracer.utils import init_tracer -from ...util import override_global_tracer + from ...test_tracer import get_dummy_tracer +from ...util import override_global_tracer # socket name comes from https://english.stackexchange.com/a/44048 SOCKET = 'httpbin.org' @@ -105,6 +105,20 @@ def test_200(self): eq_(s.error, 0) eq_(s.span_type, http.TYPE) + def test_200_query_string(self): + # ensure query string is removed before adding url to metadata + out = self.session.get(URL_200 + '?key=value&key2=value2') + eq_(out.status_code, 200) + # validation + spans = self.tracer.writer.pop() + eq_(len(spans), 1) + s = spans[0] + eq_(s.get_tag(http.METHOD), 'GET') + eq_(s.get_tag(http.STATUS_CODE), '200') + eq_(s.get_tag(http.URL), URL_200) + eq_(s.error, 0) + eq_(s.span_type, http.TYPE) + def test_requests_module_200(self): # ensure the requests API is instrumented even without # using a `Session` directly From 9c5b1512af64ad7298e3236dc70da7af1cfaef43 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Fri, 2 Nov 2018 15:15:43 -0400 Subject: [PATCH 4/6] [httplib] Add comment --- tests/contrib/httplib/test_httplib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/contrib/httplib/test_httplib.py b/tests/contrib/httplib/test_httplib.py index e8b21c300d4..13e5a586a68 100644 --- a/tests/contrib/httplib/test_httplib.py +++ b/tests/contrib/httplib/test_httplib.py @@ -241,6 +241,7 @@ def test_httplib_request_get_request_query_string(self): { 'http.method': 'GET', 'http.status_code': '200', + # check url metadata lacks query string 'http.url': '{}'.format(URL_200), } ) From 9875c7d6b41f15b9e7c1c296758007788456dc7e Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Mon, 5 Nov 2018 16:10:15 -0500 Subject: [PATCH 5/6] Correct comment --- tests/contrib/httplib/test_httplib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/contrib/httplib/test_httplib.py b/tests/contrib/httplib/test_httplib.py index 13e5a586a68..360247581ee 100644 --- a/tests/contrib/httplib/test_httplib.py +++ b/tests/contrib/httplib/test_httplib.py @@ -220,7 +220,7 @@ def test_httplib_request_post_request(self): def test_httplib_request_get_request_query_string(self): """ When making a GET request with a query string via httplib.HTTPConnection.request - we capture a the entire url in the span + we capture the all of the url in the span except for the query string """ conn = self.get_http_connection(SOCKET) with contextlib.closing(conn): From 9d3a159f0fcabc6b61b9e5e4191c153efb6071f9 Mon Sep 17 00:00:00 2001 From: "Tahir H. Butt" Date: Mon, 5 Nov 2018 16:58:04 -0500 Subject: [PATCH 6/6] Make httlib and requests consistent --- ddtrace/contrib/httplib/patch.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ddtrace/contrib/httplib/patch.py b/ddtrace/contrib/httplib/patch.py index f34b42d8df8..fc0da6ba664 100644 --- a/ddtrace/contrib/httplib/patch.py +++ b/ddtrace/contrib/httplib/patch.py @@ -60,14 +60,22 @@ def _wrap_putrequest(func, instance, args, kwargs): scheme = 'https' if isinstance(instance, httplib.HTTPSConnection) else 'http' port = ':{port}'.format(port=instance.port) - # strip path of non-path data - path = parse.urlparse(path).path - if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443): port = '' url = '{scheme}://{host}{port}{path}'.format(scheme=scheme, host=instance.host, port=port, path=path) - span.set_tag(ext_http.URL, url) + # sanitize url + parsed = parse.urlparse(url) + sanitized_url = parse.urlunparse(( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + None, # drop query + parsed.fragment + )) + + span.set_tag(ext_http.URL, sanitized_url) span.set_tag(ext_http.METHOD, method) except Exception: log.debug('error applying request tags', exc_info=True)