From 28d3d7c08772504ac58564a9398b1593b40d66da Mon Sep 17 00:00:00 2001 From: Aryan Putta Date: Thu, 21 May 2026 23:26:52 -0400 Subject: [PATCH 1/4] cuda.bindings: add decode_c_str helper for #2122 - new pure-Python helper cuda.bindings._internal.strdecode.decode_c_str(data, api_name); success path unchanged from bytes.decode() - on UnicodeDecodeError, re-raises same exception type with reason extended to include CUDA API name + bounded hex preview (stop at first NUL with offset marker, cap 64 bytes with +N more) - exception type and __cause__ chain preserved so existing except UnicodeDecodeError keeps working - placed in _internal/ next to _fast_enum.py: no Cython rebuild, unit-testable without compiling extensions - generated .pyx call sites (nvml.error_string, nvvm.get_error_string, nvfatbin.get_error_string, cufile.cufileop_status_error) left untouched per cuda_bindings/CLAUDE.md; adoption belongs in next generator sync Implements the helper rwgk proposed inline on #2118. Refs #2118. Closes #2122. Signed-off-by: Aryan Putta --- .../cuda/bindings/_internal/strdecode.py | 42 +++++++ cuda_bindings/tests/test_strdecode.py | 106 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_internal/strdecode.py create mode 100644 cuda_bindings/tests/test_strdecode.py diff --git a/cuda_bindings/cuda/bindings/_internal/strdecode.py b/cuda_bindings/cuda/bindings/_internal/strdecode.py new file mode 100644 index 00000000000..ed825389c04 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/strdecode.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +"""Decode C strings returned by CUDA libraries with actionable failure context.""" + +from __future__ import annotations + +# Cap mirrors the #2118 mojibake size; keeps exception text readable in logs. +_PREVIEW_MAX_BYTES = 64 + + +def _bounded_hex_preview(data: bytes, max_bytes: int = _PREVIEW_MAX_BYTES) -> str: + # NUL terminates C strings; bytes past it are not the returned value. + # Marker is explicit so a reader does not misread truncation as empty. + nul = data.find(b"\x00") + nul_stopped = nul != -1 + visible_end = len(data) if not nul_stopped else nul + snippet_end = min(visible_end, max_bytes) + snippet = data[:snippet_end] + body = snippet.hex(" ") if snippet else "" + parts = [] + if snippet_end < visible_end: + parts.append(f"+{visible_end - snippet_end} more") + if nul_stopped: + parts.append(f"stopped at NUL@{nul}") + suffix = f" ...({'; '.join(parts)})" if parts else "" + return f"<{visible_end} bytes; hex='{body}'{suffix}>" + + +def decode_c_str(data: bytes, api_name: str) -> str: + """Decode ``data`` as UTF-8, or raise ``UnicodeDecodeError`` with ``api_name`` and a bounded hex preview in ``reason``. + + Internal API. ``api_name`` is trusted caller input and is embedded verbatim. + """ + try: + return data.decode("utf-8") + except UnicodeDecodeError as e: + # Same exception type, not a subclass: existing `except UnicodeDecodeError` + # keeps working. Original chained via `from e`. + preview = _bounded_hex_preview(data) + reason = f"{e.reason} (returned by {api_name}; bytes={preview})" + raise UnicodeDecodeError(e.encoding, e.object, e.start, e.end, reason) from e diff --git a/cuda_bindings/tests/test_strdecode.py b/cuda_bindings/tests/test_strdecode.py new file mode 100644 index 00000000000..fd06c451c42 --- /dev/null +++ b/cuda_bindings/tests/test_strdecode.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import pytest + +from cuda.bindings._internal.strdecode import _bounded_hex_preview, decode_c_str + +WSL_MOJIBAKE_PREFIX = b"\xf8\x9a\x80\x80\xaf" + + +def test_valid_utf8_passthrough(): + assert decode_c_str(b"hello world", "fakeApi") == "hello world" + + +def test_empty_input(): + assert decode_c_str(b"", "fakeApi") == "" + + +def test_multibyte_utf8_passthrough(): + assert decode_c_str(b"caf\xc3\xa9", "fakeApi") == "café" + + +def test_invalid_bytes_raise_unicode_decode_error(): + with pytest.raises(UnicodeDecodeError): + decode_c_str(WSL_MOJIBAKE_PREFIX, "nvmlSystemGetProcessName") + + +def test_failure_reason_includes_api_name(): + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(WSL_MOJIBAKE_PREFIX, "nvmlSystemGetProcessName") + assert "nvmlSystemGetProcessName" in excinfo.value.reason + + +def test_failure_reason_includes_hex_preview(): + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(WSL_MOJIBAKE_PREFIX, "nvmlSystemGetProcessName") + assert "f8 9a 80 80 af" in excinfo.value.reason + + +def test_failure_chains_original_error(): + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(b"\xf8", "fakeApi") + assert isinstance(excinfo.value.__cause__, UnicodeDecodeError) + + +def test_failure_preserves_codec_and_position(): + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(b"\xf8\x9a", "fakeApi") + assert excinfo.value.encoding == "utf-8" + assert excinfo.value.start == 0 + assert excinfo.value.end == 1 + + +def test_preview_stops_at_first_nul(): + preview = _bounded_hex_preview(b"\xf8\xf8\x00trailing junk") + assert "f8 f8" in preview + assert "trailing" not in preview + assert "<2 bytes;" in preview + assert "stopped at NUL@2" in preview + + +def test_preview_with_leading_nul_reports_zero_bytes_and_nul_marker(): + preview = _bounded_hex_preview(b"\x00\xf8\xf8") + assert preview.startswith("<0 bytes;") + assert "hex=''" in preview + assert "stopped at NUL@0" in preview + + +def test_preview_no_nul_no_truncation_has_no_suffix(): + preview = _bounded_hex_preview(b"\xf8\x9a") + assert preview == "<2 bytes; hex='f8 9a'>" + + +def test_preview_caps_long_buffers(): + preview = _bounded_hex_preview(b"\xf8" * 200, max_bytes=8) + assert "f8 f8 f8 f8 f8 f8 f8 f8" in preview + assert "+192 more" in preview + assert "stopped at NUL" not in preview + + +def test_preview_combines_truncation_and_nul_markers(): + preview = _bounded_hex_preview(b"\xf8" * 20 + b"\x00rest", max_bytes=8) + assert "+12 more" in preview + assert "stopped at NUL@20" in preview + + +def test_preview_empty_input(): + assert _bounded_hex_preview(b"") == "<0 bytes; hex=''>" + + +def test_failure_preview_stops_at_embedded_nul_even_with_bad_bytes_before(): + # C-string convention: helper reports what NVML would treat as the string. + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(b"\xf8\x9a\x00ignored_after_nul", "fakeApi") + reason = excinfo.value.reason + assert "f8 9a" in reason + assert "ignored_after_nul" not in reason + assert "<2 bytes;" in reason + + +def test_failure_message_stays_bounded_for_long_garbage(): + with pytest.raises(UnicodeDecodeError) as excinfo: + decode_c_str(b"\xf8" * 1024, "fakeApi") + reason = excinfo.value.reason + assert "+960 more" in reason + assert len(reason) < 500 From 3eb70d1dd75d92a3143034bbeb95795fc45ebb90 Mon Sep 17 00:00:00 2001 From: Aryan Putta Date: Thu, 21 May 2026 23:30:02 -0400 Subject: [PATCH 2/4] cuda.bindings: tighten strdecode inline comments Signed-off-by: Aryan Putta --- cuda_bindings/cuda/bindings/_internal/strdecode.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/strdecode.py b/cuda_bindings/cuda/bindings/_internal/strdecode.py index ed825389c04..a8d04c80a29 100644 --- a/cuda_bindings/cuda/bindings/_internal/strdecode.py +++ b/cuda_bindings/cuda/bindings/_internal/strdecode.py @@ -5,13 +5,13 @@ from __future__ import annotations -# Cap mirrors the #2118 mojibake size; keeps exception text readable in logs. +# Cap sized for the #2118 mojibake without flooding exception text. _PREVIEW_MAX_BYTES = 64 def _bounded_hex_preview(data: bytes, max_bytes: int = _PREVIEW_MAX_BYTES) -> str: - # NUL terminates C strings; bytes past it are not the returned value. - # Marker is explicit so a reader does not misread truncation as empty. + # Bytes after the first NUL are not part of the returned C string. The + # marker is explicit so truncation cannot be misread as an empty value. nul = data.find(b"\x00") nul_stopped = nul != -1 visible_end = len(data) if not nul_stopped else nul @@ -30,13 +30,12 @@ def _bounded_hex_preview(data: bytes, max_bytes: int = _PREVIEW_MAX_BYTES) -> st def decode_c_str(data: bytes, api_name: str) -> str: """Decode ``data`` as UTF-8, or raise ``UnicodeDecodeError`` with ``api_name`` and a bounded hex preview in ``reason``. - Internal API. ``api_name`` is trusted caller input and is embedded verbatim. + Internal API. ``api_name`` is trusted caller input and embedded verbatim. """ try: return data.decode("utf-8") except UnicodeDecodeError as e: - # Same exception type, not a subclass: existing `except UnicodeDecodeError` - # keeps working. Original chained via `from e`. + # Same exception type, not a subclass, so existing handlers still catch. preview = _bounded_hex_preview(data) reason = f"{e.reason} (returned by {api_name}; bytes={preview})" raise UnicodeDecodeError(e.encoding, e.object, e.start, e.end, reason) from e From ec677f4974a08b2c0ac19765eea7db06244b5bc0 Mon Sep 17 00:00:00 2001 From: Aryan Putta Date: Thu, 21 May 2026 23:51:48 -0400 Subject: [PATCH 3/4] cuda.bindings: trim strdecode tests to behavior-pinning set Drop tests that verified stdlib behavior or trivial format strings; keep the 11 that pin a non-obvious invariant (issue spec, codec/position preservation, NUL/cap markers). Signed-off-by: Aryan Putta --- cuda_bindings/tests/test_strdecode.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/cuda_bindings/tests/test_strdecode.py b/cuda_bindings/tests/test_strdecode.py index fd06c451c42..2d68f6cf4c1 100644 --- a/cuda_bindings/tests/test_strdecode.py +++ b/cuda_bindings/tests/test_strdecode.py @@ -12,14 +12,6 @@ def test_valid_utf8_passthrough(): assert decode_c_str(b"hello world", "fakeApi") == "hello world" -def test_empty_input(): - assert decode_c_str(b"", "fakeApi") == "" - - -def test_multibyte_utf8_passthrough(): - assert decode_c_str(b"caf\xc3\xa9", "fakeApi") == "café" - - def test_invalid_bytes_raise_unicode_decode_error(): with pytest.raises(UnicodeDecodeError): decode_c_str(WSL_MOJIBAKE_PREFIX, "nvmlSystemGetProcessName") @@ -59,18 +51,6 @@ def test_preview_stops_at_first_nul(): assert "stopped at NUL@2" in preview -def test_preview_with_leading_nul_reports_zero_bytes_and_nul_marker(): - preview = _bounded_hex_preview(b"\x00\xf8\xf8") - assert preview.startswith("<0 bytes;") - assert "hex=''" in preview - assert "stopped at NUL@0" in preview - - -def test_preview_no_nul_no_truncation_has_no_suffix(): - preview = _bounded_hex_preview(b"\xf8\x9a") - assert preview == "<2 bytes; hex='f8 9a'>" - - def test_preview_caps_long_buffers(): preview = _bounded_hex_preview(b"\xf8" * 200, max_bytes=8) assert "f8 f8 f8 f8 f8 f8 f8 f8" in preview @@ -84,18 +64,12 @@ def test_preview_combines_truncation_and_nul_markers(): assert "stopped at NUL@20" in preview -def test_preview_empty_input(): - assert _bounded_hex_preview(b"") == "<0 bytes; hex=''>" - - def test_failure_preview_stops_at_embedded_nul_even_with_bad_bytes_before(): - # C-string convention: helper reports what NVML would treat as the string. with pytest.raises(UnicodeDecodeError) as excinfo: decode_c_str(b"\xf8\x9a\x00ignored_after_nul", "fakeApi") reason = excinfo.value.reason assert "f8 9a" in reason assert "ignored_after_nul" not in reason - assert "<2 bytes;" in reason def test_failure_message_stays_bounded_for_long_garbage(): From eb45dac6594f4e5bf0aff033070c4bb873d4c61b Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 22 May 2026 01:31:32 -0400 Subject: [PATCH 4/4] cuda.bindings: drop from __future__ import annotations from strdecode.py No annotation in the file uses PEP 604 union syntax or forward references, so the import is unnecessary. Matches the style of the neighboring _fast_enum.py which does not carry it. --- cuda_bindings/cuda/bindings/_internal/strdecode.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/strdecode.py b/cuda_bindings/cuda/bindings/_internal/strdecode.py index a8d04c80a29..9c723fe9d97 100644 --- a/cuda_bindings/cuda/bindings/_internal/strdecode.py +++ b/cuda_bindings/cuda/bindings/_internal/strdecode.py @@ -3,8 +3,6 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE """Decode C strings returned by CUDA libraries with actionable failure context.""" -from __future__ import annotations - # Cap sized for the #2118 mojibake without flooding exception text. _PREVIEW_MAX_BYTES = 64