Skip to content

Commit

Permalink
[httplib] [requests] Sanitize urls in span metadata (#688)
Browse files Browse the repository at this point in the history
* [httplib] Strip all but path from url

* [httplib] Fix tests

* [requests] Sanitize url

* [httplib] Add comment

* Correct comment

* Make httlib and requests consistent
  • Loading branch information
majorgreys authored and brettlangdon committed Nov 8, 2018
1 parent 8dab248 commit be36ebb
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 20 deletions.
2 changes: 1 addition & 1 deletion ddtrace/compat.py
@@ -1,5 +1,5 @@
import sys
import platform
import sys

PYTHON_VERSION_INFO = sys.version_info
PY2 = sys.version_info[0] == 2
Expand Down
18 changes: 15 additions & 3 deletions ddtrace/contrib/httplib/patch.py
Expand Up @@ -5,12 +5,11 @@
import wrapt

# Project
from ...compat import httplib, PY2
from ...compat import PY2, httplib, parse
from ...ext import http as ext_http
from ...pin import Pin
from ...utils.wrappers import unwrap as _u


span_name = 'httplib.request' if PY2 else 'http.client.request'

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -60,10 +59,23 @@ def _wrap_putrequest(func, instance, args, kwargs):
method, path = args[:2]
scheme = 'https' if isinstance(instance, httplib.HTTPSConnection) else 'http'
port = ':{port}'.format(port=instance.port)

if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443):
port = ''
url = '{scheme}://{host}{port}{path}'.format(scheme=scheme, host=instance.host, port=port, path=path)
span.set_tag(ext_http.URL, url)

# sanitize url
parsed = parse.urlparse(url)
sanitized_url = parse.urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
None, # drop query
parsed.fragment
))

span.set_tag(ext_http.URL, sanitized_url)
span.set_tag(ext_http.METHOD, method)
except Exception:
log.debug('error applying request tags', exc_info=True)
Expand Down
19 changes: 13 additions & 6 deletions ddtrace/contrib/requests/connection.py
@@ -1,14 +1,12 @@
import logging
import ddtrace

import ddtrace
from ddtrace import config

from .constants import DEFAULT_SERVICE

from ...ext import http
from ...compat import parse
from ...ext import http
from ...propagation.http import HTTPPropagator

from .constants import DEFAULT_SERVICE

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,6 +53,15 @@ def _wrap_request(func, instance, args, kwargs):
url = kwargs.get('url') or args[1]
headers = kwargs.get('headers', {})
parsed_uri = parse.urlparse(url)
# sanitize url of query
sanitized_url = parse.urlunparse((
parsed_uri.scheme,
parsed_uri.netloc,
parsed_uri.path,
parsed_uri.params,
None, # drop parsed_uri.query
parsed_uri.fragment
))

with tracer.trace("requests.request", span_type=http.TYPE) as span:
# update the span service name before doing any action
Expand All @@ -76,7 +83,7 @@ def _wrap_request(func, instance, args, kwargs):
finally:
try:
span.set_tag(http.METHOD, method.upper())
span.set_tag(http.URL, url)
span.set_tag(http.URL, sanitized_url)
if response is not None:
span.set_tag(http.STATUS_CODE, response.status_code)
# `span.error` must be an integer
Expand Down
10 changes: 5 additions & 5 deletions tests/contrib/httplib/test_httplib.py
Expand Up @@ -7,16 +7,15 @@
import wrapt

# Project
from ddtrace.compat import httplib, PY2
from ddtrace.compat import PY2, httplib
from ddtrace.contrib.httplib import patch, unpatch
from ddtrace.contrib.httplib.patch import should_skip_request
from ddtrace.pin import Pin

from tests.opentracer.utils import init_tracer

from ...test_tracer import get_dummy_tracer
from ...util import assert_dict_issuperset, override_global_tracer


if PY2:
from urllib2 import urlopen, build_opener, Request
else:
Expand Down Expand Up @@ -221,7 +220,7 @@ def test_httplib_request_post_request(self):
def test_httplib_request_get_request_query_string(self):
"""
When making a GET request with a query string via httplib.HTTPConnection.request
we capture a the entire url in the span
we capture the all of the url in the span except for the query string
"""
conn = self.get_http_connection(SOCKET)
with contextlib.closing(conn):
Expand All @@ -242,7 +241,8 @@ def test_httplib_request_get_request_query_string(self):
{
'http.method': 'GET',
'http.status_code': '200',
'http.url': '{}?key=value&key2=value2'.format(URL_200),
# check url metadata lacks query string
'http.url': '{}'.format(URL_200),
}
)

Expand Down
24 changes: 19 additions & 5 deletions tests/contrib/requests/test_requests.py
@@ -1,17 +1,17 @@
import unittest
import requests

import requests
from requests import Session
from requests.exceptions import MissingSchema
from nose.tools import eq_, assert_raises

from ddtrace import config
from ddtrace.ext import http, errors
from ddtrace.contrib.requests import patch, unpatch

from ddtrace.ext import errors, http
from nose.tools import assert_raises, eq_
from tests.opentracer.utils import init_tracer
from ...util import override_global_tracer

from ...test_tracer import get_dummy_tracer
from ...util import override_global_tracer

# socket name comes from https://english.stackexchange.com/a/44048
SOCKET = 'httpbin.org'
Expand Down Expand Up @@ -105,6 +105,20 @@ def test_200(self):
eq_(s.error, 0)
eq_(s.span_type, http.TYPE)

def test_200_query_string(self):
# ensure query string is removed before adding url to metadata
out = self.session.get(URL_200 + '?key=value&key2=value2')
eq_(out.status_code, 200)
# validation
spans = self.tracer.writer.pop()
eq_(len(spans), 1)
s = spans[0]
eq_(s.get_tag(http.METHOD), 'GET')
eq_(s.get_tag(http.STATUS_CODE), '200')
eq_(s.get_tag(http.URL), URL_200)
eq_(s.error, 0)
eq_(s.span_type, http.TYPE)

def test_requests_module_200(self):
# ensure the requests API is instrumented even without
# using a `Session` directly
Expand Down

0 comments on commit be36ebb

Please sign in to comment.