Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[httplib, requests] Sanitize urls in span metadata #688

Merged
merged 9 commits into from Nov 6, 2018
2 changes: 1 addition & 1 deletion ddtrace/compat.py
@@ -1,5 +1,5 @@
import sys
import platform
import sys

PYTHON_VERSION_INFO = sys.version_info
PY2 = sys.version_info[0] == 2
Expand Down
18 changes: 15 additions & 3 deletions ddtrace/contrib/httplib/patch.py
Expand Up @@ -5,12 +5,11 @@
import wrapt

# Project
from ...compat import httplib, PY2
from ...compat import PY2, httplib, parse
from ...ext import http as ext_http
from ...pin import Pin
from ...utils.wrappers import unwrap as _u


span_name = 'httplib.request' if PY2 else 'http.client.request'

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -60,10 +59,23 @@ def _wrap_putrequest(func, instance, args, kwargs):
method, path = args[:2]
scheme = 'https' if isinstance(instance, httplib.HTTPSConnection) else 'http'
port = ':{port}'.format(port=instance.port)

if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443):
port = ''
url = '{scheme}://{host}{port}{path}'.format(scheme=scheme, host=instance.host, port=port, path=path)
majorgreys marked this conversation as resolved.
Show resolved Hide resolved
span.set_tag(ext_http.URL, url)

# sanitize url
parsed = parse.urlparse(url)
sanitized_url = parse.urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
None, # drop query
parsed.fragment
))

span.set_tag(ext_http.URL, sanitized_url)
span.set_tag(ext_http.METHOD, method)
except Exception:
log.debug('error applying request tags', exc_info=True)
Expand Down
19 changes: 13 additions & 6 deletions ddtrace/contrib/requests/connection.py
@@ -1,14 +1,12 @@
import logging
import ddtrace

import ddtrace
from ddtrace import config

from .constants import DEFAULT_SERVICE

from ...ext import http
from ...compat import parse
from ...ext import http
from ...propagation.http import HTTPPropagator

from .constants import DEFAULT_SERVICE

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,6 +53,15 @@ def _wrap_request(func, instance, args, kwargs):
url = kwargs.get('url') or args[1]
headers = kwargs.get('headers', {})
parsed_uri = parse.urlparse(url)
# sanitize url of query
sanitized_url = parse.urlunparse((
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

I was originally thinking just parsed_uri.path or something, but this is much better.

parsed_uri.scheme,
parsed_uri.netloc,
parsed_uri.path,
parsed_uri.params,
None, # drop parsed_uri.query
parsed_uri.fragment
))

with tracer.trace("requests.request", span_type=http.TYPE) as span:
# update the span service name before doing any action
Expand All @@ -76,7 +83,7 @@ def _wrap_request(func, instance, args, kwargs):
finally:
try:
span.set_tag(http.METHOD, method.upper())
span.set_tag(http.URL, url)
span.set_tag(http.URL, sanitized_url)
if response is not None:
span.set_tag(http.STATUS_CODE, response.status_code)
# `span.error` must be an integer
Expand Down
10 changes: 5 additions & 5 deletions tests/contrib/httplib/test_httplib.py
Expand Up @@ -7,16 +7,15 @@
import wrapt

# Project
from ddtrace.compat import httplib, PY2
from ddtrace.compat import PY2, httplib
from ddtrace.contrib.httplib import patch, unpatch
from ddtrace.contrib.httplib.patch import should_skip_request
from ddtrace.pin import Pin

from tests.opentracer.utils import init_tracer

from ...test_tracer import get_dummy_tracer
from ...util import assert_dict_issuperset, override_global_tracer


if PY2:
from urllib2 import urlopen, build_opener, Request
else:
Expand Down Expand Up @@ -251,7 +250,7 @@ def test_httplib_request_post_request(self):
def test_httplib_request_get_request_query_string(self):
"""
When making a GET request with a query string via httplib.HTTPConnection.request
we capture a the entire url in the span
we capture the all of the url in the span except for the query string
"""
conn = self.get_http_connection(SOCKET)
with contextlib.closing(conn):
Expand All @@ -272,7 +271,8 @@ def test_httplib_request_get_request_query_string(self):
{
'http.method': 'GET',
'http.status_code': '200',
'http.url': '{}?key=value&key2=value2'.format(URL_200),
# check url metadata lacks query string
majorgreys marked this conversation as resolved.
Show resolved Hide resolved
'http.url': '{}'.format(URL_200),
brettlangdon marked this conversation as resolved.
Show resolved Hide resolved
}
)

Expand Down
24 changes: 19 additions & 5 deletions tests/contrib/requests/test_requests.py
@@ -1,17 +1,17 @@
import unittest
import requests

import requests
from requests import Session
from requests.exceptions import MissingSchema
from nose.tools import eq_, assert_raises

from ddtrace import config
from ddtrace.ext import http, errors
from ddtrace.contrib.requests import patch, unpatch

from ddtrace.ext import errors, http
from nose.tools import assert_raises, eq_
from tests.opentracer.utils import init_tracer
from ...util import override_global_tracer

from ...test_tracer import get_dummy_tracer
from ...util import override_global_tracer

# socket name comes from https://english.stackexchange.com/a/44048
SOCKET = 'httpbin.org'
Expand Down Expand Up @@ -105,6 +105,20 @@ def test_200(self):
eq_(s.error, 0)
eq_(s.span_type, http.TYPE)

def test_200_query_string(self):
# ensure query string is removed before adding url to metadata
out = self.session.get(URL_200 + '?key=value&key2=value2')
eq_(out.status_code, 200)
# validation
spans = self.tracer.writer.pop()
eq_(len(spans), 1)
s = spans[0]
eq_(s.get_tag(http.METHOD), 'GET')
eq_(s.get_tag(http.STATUS_CODE), '200')
eq_(s.get_tag(http.URL), URL_200)
eq_(s.error, 0)
eq_(s.span_type, http.TYPE)

def test_requests_module_200(self):
# ensure the requests API is instrumented even without
# using a `Session` directly
Expand Down