From 39447913b7fea4982b49ffef8bfbdeb447f6de3c Mon Sep 17 00:00:00 2001 From: Jameel Al-Aziz <247849+jalaziz@users.noreply.github.com> Date: Fri, 7 Jan 2022 06:23:59 -0800 Subject: [PATCH] Fix UnicodeDecodeError in aioredis integration Escape non-unicode bytes when decoding aioredis args. This prevents the `UnicodeDecodeError` while still offering some semblance of a human-readable string for tracing. fixes #3088 (cherry picked from commit 92390b28d86e9fb8940ee2fe94da9bda7e12b686) --- ddtrace/contrib/aioredis/patch.py | 10 ++-- ddtrace/contrib/redis/util.py | 8 ++- ddtrace/internal/compat.py | 1 + ...s-unicodedecodeerror-b861b199e8757950.yaml | 6 ++ tests/contrib/aioredis/test_aioredis.py | 31 ++++++++++ ..._aioredis.test_decoding_non_utf8_args.json | 56 +++++++++++++++++++ ....test_decoding_non_utf8_pipeline_args.json | 28 ++++++++++ ...st_decoding_non_utf8_pipeline_args_13.json | 28 ++++++++++ 8 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/fix-aioredis-unicodedecodeerror-b861b199e8757950.yaml create mode 100644 tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_args.json create mode 100644 tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args.json create mode 100644 tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args_13.json diff --git a/ddtrace/contrib/aioredis/patch.py b/ddtrace/contrib/aioredis/patch.py index fad1f471525..c2752835c9f 100644 --- a/ddtrace/contrib/aioredis/patch.py +++ b/ddtrace/contrib/aioredis/patch.py @@ -66,8 +66,7 @@ async def traced_execute_command(func, instance, args, kwargs): if not pin or not pin.enabled(): return await func(*args, **kwargs) - decoded_args = [arg.decode() if isinstance(arg, bytes) else arg for arg in args] - with _trace_redis_cmd(pin, config.aioredis, instance, decoded_args): + with _trace_redis_cmd(pin, config.aioredis, instance, args): return await func(*args, **kwargs) @@ -107,7 +106,6 @@ def traced_13_execute_command(func, instance, args, kwargs): if not pin or not pin.enabled(): return func(*args, **kwargs) - decoded_args = [arg.decode() if isinstance(arg, bytes) else arg for arg in args] # Don't activate the span since this operation is performed as a future which concludes sometime later on in # execution so subsequent operations in the stack are not necessarily semantically related # (we don't want this span to be the parent of all other spans created before the future is resolved) @@ -116,7 +114,7 @@ def traced_13_execute_command(func, instance, args, kwargs): ) span.set_tag(SPAN_MEASURED_KEY) - query = format_command_args(decoded_args) + query = format_command_args(args) span.resource = query span.set_tag(redisx.RAWCMD, query) if pin.tags: @@ -129,7 +127,7 @@ def traced_13_execute_command(func, instance, args, kwargs): redisx.DB: instance.db or 0, } ) - span.set_metric(redisx.ARGS_LEN, len(decoded_args)) + span.set_metric(redisx.ARGS_LEN, len(args)) # set analytics sample rate if enabled span.set_tag(ANALYTICS_SAMPLE_RATE_KEY, config.aioredis.get_analytics_sample_rate()) @@ -157,7 +155,7 @@ async def traced_13_execute_pipeline(func, instance, args, kwargs): cmds = [] for _, cmd, cmd_args, _ in instance._pipeline: - parts = [cmd.decode() if isinstance(cmd, bytes) else cmd] + parts = [cmd] parts.extend(cmd_args) cmds.append(format_command_args(parts)) resource = "\n".join(cmds) diff --git a/ddtrace/contrib/redis/util.py b/ddtrace/contrib/redis/util.py index e4ad398bbe7..e0b041521f0 100644 --- a/ddtrace/contrib/redis/util.py +++ b/ddtrace/contrib/redis/util.py @@ -9,7 +9,10 @@ from ...ext import SpanTypes from ...ext import net from ...ext import redis as redisx +from ...internal.compat import binary_type +from ...internal.compat import ensure_text from ...internal.compat import stringify +from ...internal.compat import text_type VALUE_PLACEHOLDER = "?" @@ -41,7 +44,10 @@ def format_command_args(args): out = [] for arg in args: try: - cmd = stringify(arg) + if isinstance(arg, (binary_type, text_type)): + cmd = ensure_text(arg, errors="backslashreplace") + else: + cmd = stringify(arg) if len(cmd) > VALUE_MAX_LEN: cmd = cmd[:VALUE_MAX_LEN] + VALUE_TOO_LONG_MARK diff --git a/ddtrace/internal/compat.py b/ddtrace/internal/compat.py index 70a34a0cec6..ee06690c601 100644 --- a/ddtrace/internal/compat.py +++ b/ddtrace/internal/compat.py @@ -51,6 +51,7 @@ ensure_str = six.ensure_str stringify = six.text_type string_type = six.string_types[0] +text_type = six.text_type binary_type = six.binary_type msgpack_type = six.binary_type # DEV: `six` doesn't have `float` in `integer_types` diff --git a/releasenotes/notes/fix-aioredis-unicodedecodeerror-b861b199e8757950.yaml b/releasenotes/notes/fix-aioredis-unicodedecodeerror-b861b199e8757950.yaml new file mode 100644 index 00000000000..235715fbcc4 --- /dev/null +++ b/releasenotes/notes/fix-aioredis-unicodedecodeerror-b861b199e8757950.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Escape non-Unicode bytes when decoding aioredis args. This fixes a + ``UnicodeDecodeError`` that can be thrown from the aioredis integration + when interacting with binary-encoded data, as is done in channels-redis. diff --git a/tests/contrib/aioredis/test_aioredis.py b/tests/contrib/aioredis/test_aioredis.py index 3ef8780bcdd..33e736c2345 100644 --- a/tests/contrib/aioredis/test_aioredis.py +++ b/tests/contrib/aioredis/test_aioredis.py @@ -66,6 +66,37 @@ async def test_basic_request(redis_client): assert val is None +@pytest.mark.asyncio +@pytest.mark.snapshot +async def test_decoding_non_utf8_args(redis_client): + await redis_client.set(b"\x80foo", b"\x80abc") + val = await redis_client.get(b"\x80foo") + assert val == b"\x80abc" + + +@pytest.mark.asyncio +@pytest.mark.snapshot(variants={"": aioredis_version >= (2, 0), "13": aioredis_version < (2, 0)}) +async def test_decoding_non_utf8_pipeline_args(redis_client): + if aioredis_version >= (2, 0): + p = await redis_client.pipeline(transaction=False) + await p.set(b"\x80blah", "boo") + await p.set("foo", b"\x80abc") + await p.get(b"\x80blah") + await p.get("foo") + else: + p = redis_client.pipeline() + p.set(b"\x80blah", "boo") + p.set("foo", b"\x80abc") + p.get(b"\x80blah") + p.get("foo") + + response_list = await p.execute() + assert response_list[0] is True # response from redis.set is OK if successfully pushed + assert response_list[1] is True + assert response_list[2].decode() == "boo" + assert response_list[3] == b"\x80abc" + + @pytest.mark.asyncio @pytest.mark.snapshot async def test_long_command(redis_client): diff --git a/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_args.json b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_args.json new file mode 100644 index 00000000000..dd61ecdf975 --- /dev/null +++ b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_args.json @@ -0,0 +1,56 @@ +[[ + { + "name": "redis.command", + "service": "redis", + "resource": "SET \\x80foo \\x80abc", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "redis", + "meta": { + "out.host": "127.0.0.1", + "redis.raw_command": "SET \\x80foo \\x80abc", + "runtime-id": "af49d807abff41089eed43f05d099627" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "out.port": 6379, + "out.redis_db": 0, + "redis.args_length": 3, + "system.pid": 36293 + }, + "duration": 6351000, + "start": 1640819680842990000 + }], +[ + { + "name": "redis.command", + "service": "redis", + "resource": "GET \\x80foo", + "trace_id": 1, + "span_id": 1, + "parent_id": 0, + "type": "redis", + "meta": { + "out.host": "127.0.0.1", + "redis.raw_command": "GET \\x80foo", + "runtime-id": "af49d807abff41089eed43f05d099627" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "out.port": 6379, + "out.redis_db": 0, + "redis.args_length": 2, + "system.pid": 36293 + }, + "duration": 1959000, + "start": 1640819680849456000 + }]] diff --git a/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args.json b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args.json new file mode 100644 index 00000000000..2820d79c1ff --- /dev/null +++ b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args.json @@ -0,0 +1,28 @@ +[[ + { + "name": "redis.command", + "service": "redis", + "resource": "SET \\x80blah boo\nSET foo \\x80abc\nGET \\x80blah\nGET foo", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "redis", + "meta": { + "out.host": "127.0.0.1", + "redis.raw_command": "SET \\x80blah boo\nSET foo \\x80abc\nGET \\x80blah\nGET foo", + "runtime-id": "e6362b13a7394c2490fc39fa29c0e4af" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "out.port": 6379, + "out.redis_db": 0, + "redis.pipeline_length": 4, + "system.pid": 60727 + }, + "duration": 922000, + "start": 1640911687480019000 + }]] diff --git a/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args_13.json b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args_13.json new file mode 100644 index 00000000000..1cbf6466aec --- /dev/null +++ b/tests/snapshots/tests.contrib.aioredis.test_aioredis.test_decoding_non_utf8_pipeline_args_13.json @@ -0,0 +1,28 @@ +[[ + { + "name": "redis.command", + "service": "redis", + "resource": "SET \\x80blah boo\nSET foo \\x80abc\nGET \\x80blah\nGET foo", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "redis", + "meta": { + "out.host": "127.0.0.1", + "redis.raw_command": "SET \\x80blah boo\nSET foo \\x80abc\nGET \\x80blah\nGET foo", + "runtime-id": "2c1fc87edd3b44ea8e46fe3854500d3c" + }, + "metrics": { + "_dd.agent_psr": 1.0, + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "out.port": 6379, + "out.redis_db": 0, + "redis.pipeline_length": 4, + "system.pid": 60927 + }, + "duration": 982000, + "start": 1640911730323059000 + }]]