diff --git a/cuda_bindings/pixi.lock b/cuda_bindings/pixi.lock index 629d594a5b..c527f92276 100644 --- a/cuda_bindings/pixi.lock +++ b/cuda_bindings/pixi.lock @@ -962,7 +962,7 @@ packages: - python_abi 3.14.* *_cp314 license: LicenseRef-NVIDIA-SOFTWARE-LICENSE input: - hash: 225edd459102d1f609dc61be2335826c3aaec36d76fb657faf6559efe1937aca + hash: 551bbbc879e5fefd687d45528e4876eb2383a17c2d292c200e92b369eead4289 globs: - pyproject.toml - conda: . @@ -985,7 +985,7 @@ packages: - python_abi 3.14.* *_cp314 license: LicenseRef-NVIDIA-SOFTWARE-LICENSE input: - hash: 225edd459102d1f609dc61be2335826c3aaec36d76fb657faf6559efe1937aca + hash: 551bbbc879e5fefd687d45528e4876eb2383a17c2d292c200e92b369eead4289 globs: - pyproject.toml - conda: . @@ -1005,7 +1005,7 @@ packages: - python_abi 3.14.* *_cp314 license: LicenseRef-NVIDIA-SOFTWARE-LICENSE input: - hash: 225edd459102d1f609dc61be2335826c3aaec36d76fb657faf6559efe1937aca + hash: 551bbbc879e5fefd687d45528e4876eb2383a17c2d292c200e92b369eead4289 globs: - pyproject.toml - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.0.85-ha770c72_0.conda diff --git a/cuda_bindings/tests/conftest.py b/cuda_bindings/tests/conftest.py new file mode 100644 index 0000000000..f0a426406a --- /dev/null +++ b/cuda_bindings/tests/conftest.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import cuda.bindings.driver as cuda +import pytest + + +@pytest.fixture(scope="module") +def cuda_driver(): + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + +@pytest.fixture(scope="module") +def device(cuda_driver): + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + return device + + +@pytest.fixture(scope="module", autouse=True) +def ctx(device): + # Construct context + err, ctx = cuda.cuCtxCreate(None, 0, device) + assert err == cuda.CUresult.CUDA_SUCCESS + yield ctx + (err,) = cuda.cuCtxDestroy(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index cd723941be..888c05f930 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -39,17 +39,7 @@ def callableBinary(name): def test_cuda_memcpy(): - # Init CUDA - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - # Get device - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - # Construct context - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS # Allocate dev memory size = int(1024 * np.uint8().itemsize) @@ -75,24 +65,14 @@ def test_cuda_memcpy(): # Cleanup (err,) = cuda.cuMemFree(dptr) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS def test_cuda_array(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - # No context created desc = cuda.CUDA_ARRAY_DESCRIPTOR() err, arr = cuda.cuArrayCreate(desc) assert err == cuda.CUresult.CUDA_ERROR_INVALID_CONTEXT or err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - # Desciption not filled err, arr = cuda.cuArrayCreate(desc) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE @@ -106,21 +86,12 @@ def test_cuda_array(): (err,) = cuda.cuArrayDestroy(arr) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS -def test_cuda_repr_primitive(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS +def test_cuda_repr_primitive(device, ctx): assert str(device) == "" assert int(device) == 0 - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS assert str(ctx).startswith(" 0 assert hex(ctx) == hex(int(ctx)) @@ -174,19 +145,9 @@ def test_cuda_repr_primitive(): assert str(int64) == f"" assert int(int64) == size - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - - -def test_cuda_repr_pointer(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS +def test_cuda_repr_pointer(ctx): # Test 1: Classes representing pointers - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS assert str(ctx).startswith(" 0 assert hex(ctx) == hex(int(ctx)) @@ -203,18 +164,8 @@ def test_cuda_repr_pointer(): assert int(b2d_cb) == func assert hex(b2d_cb) == hex(func) - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - - -def test_cuda_uuid_list_access(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS +def test_cuda_uuid_list_access(device): err, uuid = cuda.cuDeviceGetUuid(device) assert err == cuda.CUresult.CUDA_SUCCESS assert len(uuid.bytes) <= 16 @@ -228,18 +179,8 @@ def test_cuda_uuid_list_access(): jit_option.CU_JIT_LOG_VERBOSE: 5, } - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - def test_cuda_cuModuleLoadDataEx(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - option_keys = [ cuda.CUjit_option.CU_JIT_INFO_LOG_BUFFER, cuda.CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, @@ -250,9 +191,6 @@ def test_cuda_cuModuleLoadDataEx(): # FIXME: This function call raises CUDA_ERROR_INVALID_VALUE err, mod = cuda.cuModuleLoadDataEx(0, 0, option_keys, []) - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - def test_cuda_repr(): actual = cuda.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS() @@ -323,13 +261,6 @@ def test_cuda_CUstreamBatchMemOpParams(): driverVersionLessThan(11030) or not supportsMemoryPool(), reason="When new attributes were introduced" ) def test_cuda_memPool_attr(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - poolProps = cuda.CUmemPoolProps() poolProps.allocType = cuda.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED poolProps.location.id = 0 @@ -386,20 +317,12 @@ def test_cuda_memPool_attr(): (err,) = cuda.cuMemPoolDestroy(pool) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS @pytest.mark.skipif( driverVersionLessThan(11030) or not supportsManagedMemory(), reason="When new attributes were introduced" ) def test_cuda_pointer_attr(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS err, ptr = cuda.cuMemAllocManaged(0x1000, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value) assert err == cuda.CUresult.CUDA_SUCCESS @@ -445,19 +368,10 @@ def test_cuda_pointer_attr(): (err,) = cuda.cuMemFree(ptr) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS @pytest.mark.skipif(not supportsManagedMemory(), reason="When new attributes were introduced") -def test_cuda_mem_range_attr(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - +def test_cuda_mem_range_attr(device): size = 0x1000 location_device = cuda.CUmemLocation() location_device.type = cuda.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE @@ -517,19 +431,10 @@ def test_cuda_mem_range_attr(): (err,) = cuda.cuMemFree(ptr) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS @pytest.mark.skipif(driverVersionLessThan(11040) or not supportsMemoryPool(), reason="Mempool for graphs not supported") -def test_cuda_graphMem_attr(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - +def test_cuda_graphMem_attr(device): err, stream = cuda.cuStreamCreate(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -577,8 +482,6 @@ def test_cuda_graphMem_attr(): assert err == cuda.CUresult.CUDA_SUCCESS (err,) = cuda.cuStreamDestroy(stream) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS @pytest.mark.skipif( @@ -588,13 +491,6 @@ def test_cuda_graphMem_attr(): reason="Coredump API not present", ) def test_cuda_coredump_attr(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - attr_list = [None] * 6 (err,) = cuda.cuCoredumpSetAttributeGlobal(cuda.CUcoredumpSettings.CU_COREDUMP_TRIGGER_HOST, False) @@ -623,18 +519,8 @@ def test_cuda_coredump_attr(): assert attr_list[2] == b"corepipe" assert attr_list[3] is True - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - def test_get_error_name_and_string(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) _, s = cuda.cuGetErrorString(err) assert s == b"no error" @@ -646,22 +532,13 @@ def test_get_error_name_and_string(): assert s == b"invalid device ordinal" _, s = cuda.cuGetErrorName(err) assert s == b"CUDA_ERROR_INVALID_DEVICE" - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS -@pytest.mark.skipif(not callableBinary("nvidia-smi"), reason="Binary existance needed") -def test_device_get_name(): +@pytest.mark.skipif(not callableBinary("nvidia-smi"), reason="Binary existence needed") +def test_device_get_name(device): # TODO: Refactor this test once we have nvml bindings to avoid the use of subprocess import subprocess - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - p = subprocess.check_output( ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], # noqa: S607 shell=False, @@ -679,9 +556,6 @@ def test_device_get_name(): else: assert any(got in result for result in expect) - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - # TODO: cuStreamGetCaptureInfo_v2 @pytest.mark.skipif(driverVersionLessThan(11030), reason="Driver too old for cuStreamGetCaptureInfo_v2") @@ -690,18 +564,10 @@ def test_stream_capture(): def test_profiler(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS (err,) = cuda.cuProfilerStart() assert err == cuda.CUresult.CUDA_SUCCESS (err,) = cuda.cuProfilerStop() assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS def test_eglFrame(): @@ -779,12 +645,6 @@ def test_invalid_repr_attribute(): reason="Polymorphic graph APIs required", ) def test_graph_poly(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS err, stream = cuda.cuStreamCreate(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -887,22 +747,14 @@ def test_graph_poly(): assert err == cuda.CUresult.CUDA_SUCCESS (err,) = cuda.cuStreamDestroy(stream) assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS @pytest.mark.skipif( driverVersionLessThan(12040) or not supportsCudaAPI("cuDeviceGetDevResource"), reason="Polymorphic graph APIs required", ) -def test_cuDeviceGetDevResource(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS +def test_cuDeviceGetDevResource(device): err, resource_in = cuda.cuDeviceGetDevResource(device, cuda.CUdevResourceType.CU_DEV_RESOURCE_TYPE_SM) - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS err, res, count, rem = cuda.cuDevSmResourceSplitByCount(0, resource_in, 0, 2) assert err == cuda.CUresult.CUDA_SUCCESS @@ -916,22 +768,12 @@ def test_cuDeviceGetDevResource(): assert err == cuda.CUresult.CUDA_SUCCESS assert len(res) == 3 - (err,) = cuda.cuCtxDestroy(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - @pytest.mark.skipif( driverVersionLessThan(12030) or not supportsCudaAPI("cuGraphConditionalHandleCreate"), reason="Conditional graph APIs required", ) -def test_conditional(): - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - +def test_conditional(ctx): err, graph = cuda.cuGraphCreate(0) assert err == cuda.CUresult.CUDA_SUCCESS err, handle = cuda.cuGraphConditionalHandleCreate(graph, ctx, 0, 0) diff --git a/cuda_bindings/tests/test_interoperability.py b/cuda_bindings/tests/test_interoperability.py index 0b1b830d74..cef594139a 100644 --- a/cuda_bindings/tests/test_interoperability.py +++ b/cuda_bindings/tests/test_interoperability.py @@ -13,13 +13,6 @@ def supportsMemoryPool(): def test_interop_stream(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - # DRV to RT err_dr, stream = cuda.cuStreamCreate(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS @@ -32,18 +25,8 @@ def test_interop_stream(): (err_dr,) = cuda.cuStreamDestroy(stream) assert err_dr == cuda.CUresult.CUDA_SUCCESS - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - def test_interop_event(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - # DRV to RT err_dr, event = cuda.cuEventCreate(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS @@ -56,18 +39,8 @@ def test_interop_event(): (err_dr,) = cuda.cuEventDestroy(event) assert err_dr == cuda.CUresult.CUDA_SUCCESS - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - def test_interop_graph(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - # DRV to RT err_dr, graph = cuda.cuGraphCreate(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS @@ -80,18 +53,8 @@ def test_interop_graph(): (err_dr,) = cuda.cuGraphDestroy(graph) assert err_dr == cuda.CUresult.CUDA_SUCCESS - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - def test_interop_graphNode(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, graph = cuda.cuGraphCreate(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS @@ -109,49 +72,18 @@ def test_interop_graphNode(): (err_rt,) = cudart.cudaGraphDestroy(graph) assert err_rt == cudart.cudaError_t.cudaSuccess - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS -def test_interop_userObject(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - - # cudaUserObject_t - # TODO - - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - - -def test_interop_function(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS +# cudaUserObject_t +# TODO - # cudaFunction_t - # TODO - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS +# cudaFunction_t +# TODO @pytest.mark.skipif(not supportsMemoryPool(), reason="Requires mempool operations") def test_interop_memPool(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - # DRV to RT err_dr, pool = cuda.cuDeviceGetDefaultMemPool(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS @@ -164,17 +96,8 @@ def test_interop_memPool(): (err_dr,) = cuda.cuDeviceSetMemPool(0, pool) assert err_dr == cuda.CUresult.CUDA_SUCCESS - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - def test_interop_graphExec(): - (err_dr,) = cuda.cuInit(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, device = cuda.cuDeviceGet(0) - assert err_dr == cuda.CUresult.CUDA_SUCCESS - err_dr, ctx = cuda.cuCtxCreate(None, 0, device) - assert err_dr == cuda.CUresult.CUDA_SUCCESS err_dr, graph = cuda.cuGraphCreate(0) assert err_dr == cuda.CUresult.CUDA_SUCCESS err_dr, node = cuda.cuGraphAddEmptyNode(graph, [], 0) @@ -194,23 +117,9 @@ def test_interop_graphExec(): (err_rt,) = cudart.cudaGraphDestroy(graph) assert err_rt == cudart.cudaError_t.cudaSuccess - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS def test_interop_deviceptr(): - # Init CUDA - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - # Get device - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - # Construct context - err, ctx = cuda.cuCtxCreate(None, 0, device) - assert err == cuda.CUresult.CUDA_SUCCESS - # Allocate dev memory size = 1024 * np.uint8().itemsize err_dr, dptr = cuda.cuMemAlloc(size) @@ -235,5 +144,3 @@ def test_interop_deviceptr(): # Cleanup (err_dr,) = cuda.cuMemFree(dptr) assert err_dr == cuda.CUresult.CUDA_SUCCESS - (err_dr,) = cuda.cuCtxDestroy(ctx) - assert err_dr == cuda.CUresult.CUDA_SUCCESS diff --git a/cuda_bindings/tests/test_kernelParams.py b/cuda_bindings/tests/test_kernelParams.py index 3a93ff9769..e6844607f8 100644 --- a/cuda_bindings/tests/test_kernelParams.py +++ b/cuda_bindings/tests/test_kernelParams.py @@ -7,6 +7,7 @@ import cuda.bindings.nvrtc as nvrtc import cuda.bindings.runtime as cudart import numpy as np +import pytest def ASSERT_DRV(err): @@ -68,14 +69,7 @@ def common_nvrtc(allKernelStrings, dev): return module -def test_kernelParams_empty(): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) - +def test_kernelParams_empty(device): kernelString = """\ static __device__ bool isDone; extern "C" __global__ @@ -86,7 +80,7 @@ def test_kernelParams_empty(): } """ - module = common_nvrtc(kernelString, cuDevice) + module = common_nvrtc(kernelString, device) # cudaStructs kernel err, kernel = cuda.cuModuleGetFunction(module, b"empty_kernel") @@ -139,18 +133,10 @@ def test_kernelParams_empty(): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err) - -def kernelParams_basic(use_ctypes_as_values): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) +@pytest.mark.parametrize("use_ctypes_as_values", [False, True], ids=["no-ctypes", "ctypes"]) +def test_kernelParams(use_ctypes_as_values, device): if use_ctypes_as_values: assertValues_host = ( ctypes.c_bool(True), @@ -283,7 +269,7 @@ def kernelParams_basic(use_ctypes_as_values): basicKernelString = basicKernelString.replace("{}", str(int(val)), 1) idx += 1 - module = common_nvrtc(basicKernelString, cuDevice) + module = common_nvrtc(basicKernelString, device) err, kernel = cuda.cuModuleGetFunction(module, b"basic") ASSERT_DRV(err) @@ -419,29 +405,11 @@ def kernelParams_basic(use_ctypes_as_values): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err) - - -def test_kernelParams_basic(): - # Kernel is given basic Python primative values as value input - kernelParams_basic(use_ctypes_as_values=False) -def test_kernelParams_basic_ctypes(): - # Kernel is given basic c_type instances as primative value input - kernelParams_basic(use_ctypes_as_values=True) - - -def test_kernelParams_types_cuda(): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) +def test_kernelParams_types_cuda(device): err, uvaSupported = cuda.cuDeviceGetAttribute( - cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice + cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, device ) ASSERT_DRV(err) @@ -494,7 +462,7 @@ def test_kernelParams_types_cuda(): } """ - module = common_nvrtc(kernelString, cuDevice) + module = common_nvrtc(kernelString, device) # cudaStructs kernel err, kernel = cuda.cuModuleGetFunction(module, b"structsCuda") @@ -559,19 +527,11 @@ def test_kernelParams_types_cuda(): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err) -def test_kernelParams_struct_custom(): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) +def test_kernelParams_struct_custom(device): err, uvaSupported = cuda.cuDeviceGetAttribute( - cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice + cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, device ) ASSERT_DRV(err) @@ -587,7 +547,7 @@ def test_kernelParams_struct_custom(): } """ - module = common_nvrtc(kernelString, cuDevice) + module = common_nvrtc(kernelString, device) err, kernel = cuda.cuModuleGetFunction(module, b"structCustom") ASSERT_DRV(err) @@ -638,19 +598,12 @@ class testStruct(ctypes.Structure): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err) -def kernelParams_buffer_protocol_ctypes_common(pass_by_address): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) +@pytest.mark.parametrize("pass_by_address", [False, True], ids=["by-address", "not-by-address"]) +def test_kernelParams_buffer_protocol(pass_by_address, device): err, uvaSupported = cuda.cuDeviceGetAttribute( - cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice + cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, device ) ASSERT_DRV(err) @@ -669,7 +622,7 @@ def kernelParams_buffer_protocol_ctypes_common(pass_by_address): } """ - module = common_nvrtc(kernelString, cuDevice) + module = common_nvrtc(kernelString, device) err, kernel = cuda.cuModuleGetFunction(module, b"testkernel") ASSERT_DRV(err) @@ -745,24 +698,11 @@ class testStruct(ctypes.Structure): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err) - - -def test_kernelParams_buffer_protocol_ctypes(): - kernelParams_buffer_protocol_ctypes_common(pass_by_address=True) - kernelParams_buffer_protocol_ctypes_common(pass_by_address=False) -def test_kernelParams_buffer_protocol_numpy(): - (err,) = cuda.cuInit(0) - ASSERT_DRV(err) - err, cuDevice = cuda.cuDeviceGet(0) - ASSERT_DRV(err) - err, context = cuda.cuCtxCreate(None, 0, cuDevice) - ASSERT_DRV(err) +def test_kernelParams_buffer_protocol_numpy(device): err, uvaSupported = cuda.cuDeviceGetAttribute( - cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice + cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, device ) ASSERT_DRV(err) @@ -781,7 +721,7 @@ def test_kernelParams_buffer_protocol_numpy(): } """ - module = common_nvrtc(kernelString, cuDevice) + module = common_nvrtc(kernelString, device) err, kernel = cuda.cuModuleGetFunction(module, b"testkernel") ASSERT_DRV(err) @@ -859,5 +799,3 @@ def __init__(self, address, typestr): ASSERT_DRV(err) (err,) = cuda.cuModuleUnload(module) ASSERT_DRV(err) - (err,) = cuda.cuCtxDestroy(context) - ASSERT_DRV(err)