From d2e782e85c016ee49ee7076866a4e640bbb26ffa Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 17:39:01 +0000 Subject: [PATCH 1/6] Add 13.0 Tests --- cuda_bindings/tests/test_cufile.py | 1034 ++++++++++++++++++++++++---- 1 file changed, 884 insertions(+), 150 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 84ed17426..da9b4894d 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -11,9 +11,8 @@ from contextlib import suppress from functools import cache -import pytest - import cuda.bindings.driver as cuda +import pytest # Configure logging to show INFO level and above logging.basicConfig( @@ -119,8 +118,6 @@ def isSupportedFilesystem(): # Global skip condition for all tests if cuFile library is not available pytestmark = pytest.mark.skipif(not cufileLibraryAvailable(), reason="cuFile library not available on this system") - - def safe_decode_string(raw_value): """Safely decode a string value from ctypes buffer.""" # Find null terminator if present @@ -1420,6 +1417,7 @@ def test_batch_io_cancel(): @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_large_operations(): """Test batch IO with large buffer operations.""" + # Initialize CUDA (err,) = cuda.cuInit(0) assert err == cuda.CUresult.CUDA_SUCCESS @@ -1446,14 +1444,17 @@ def test_batch_io_large_operations(): read_buffers = [] all_buffers = [] # Initialize all_buffers to avoid UnboundLocalError + print("=== CUDA Memory Allocation ===") for i in range(num_operations): err, buf = cuda.cuMemAlloc(buf_size) assert err == cuda.CUresult.CUDA_SUCCESS write_buffers.append(buf) + print(f"Write buffer {i}: {hex(int(buf))} (4K aligned: {int(buf) % 4096 == 0})") err, buf = cuda.cuMemAlloc(buf_size) assert err == cuda.CUresult.CUDA_SUCCESS read_buffers.append(buf) + print(f"Read buffer {i}: {hex(int(buf))} (4K aligned: {int(buf) % 4096 == 0})") # Allocate host memory for data verification host_buf = ctypes.create_string_buffer(buf_size) @@ -1461,12 +1462,15 @@ def test_batch_io_large_operations(): try: # Create file with O_DIRECT fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) - # Register all buffers with cuFile all_buffers = write_buffers + read_buffers - for buf in all_buffers: + for i, buf in enumerate(all_buffers): buf_int = int(buf) - cufile.buf_register(buf_int, buf_size, 0) + try: + cufile.buf_register(buf_int, buf_size, 0) + except Exception as e: + print(f"*** Buffer {i} registration FAILED: {e} ***") + raise # Create file descriptor descr = cufile.Descr() @@ -1474,6 +1478,7 @@ def test_batch_io_large_operations(): descr.handle.fd = fd descr.fs_ops = 0 + # Register file handle handle = cufile.handle_register(descr.ptr) @@ -1499,7 +1504,7 @@ def test_batch_io_large_operations(): test_data = test_data[:buf_size] host_buf = ctypes.create_string_buffer(test_data, buf_size) cuda.cuMemcpyHtoDAsync(write_buffers[i], host_buf, buf_size, 0) - cuda.cuStreamSynchronize(0) + cuda.cuStreamSynchronize(0) # Set up write operations for i in range(num_operations): @@ -1524,52 +1529,210 @@ def test_batch_io_large_operations(): io_params[idx].u.batch.dev_ptr_offset = 0 io_params[idx].u.batch.size_ = buf_size - # Submit batch operations - cufile.batch_io_submit(batch_handle, num_operations * 2, io_params.ptr, 0) + - # Get batch status - min_nr = num_operations * 2 # Wait for all operations to complete - nr_completed = ctypes.c_uint(num_operations * 2) # Initialize to max operations posted - timeout = ctypes.c_int(10000) # 10 second timeout for large operations + + for i in range(num_operations): + print(f" Op {i}: cookie={io_params[i].cookie}, opcode={io_params[i].opcode}, offset={io_params[i].u.batch.file_offset}") + + for i in range(num_operations): + idx = i + num_operations + print(f" Op {idx}: cookie={io_params[idx].cookie}, opcode={io_params[idx].opcode}, offset={io_params[idx].u.batch.file_offset}") + + + # Submit writes first + cufile.batch_io_submit(batch_handle, num_operations, io_params.ptr, 0) # Only writes + + nr_completed_writes = ctypes.c_uint(num_operations) + timeout = ctypes.c_int(10000) cufile.batch_io_get_status( - batch_handle, min_nr, ctypes.addressof(nr_completed), io_events.ptr, ctypes.addressof(timeout) + batch_handle, num_operations, ctypes.addressof(nr_completed_writes), + io_events.ptr, ctypes.addressof(timeout) + ) + + + # Verify writes succeeded + for i in range(nr_completed_writes.value): + if io_events[i].status != cufile.Status.COMPLETE: + raise RuntimeError(f"Write {i} failed: {io_events[i].status}") + print(f"Write {io_events[i].cookie}: {io_events[i].ret} bytes") + + # Force file sync + os.fsync(fd) + print("File sync after writes completed") + + # Now submit reads separately + print("Submitting reads...") + read_batch_handle = cufile.batch_io_set_up(num_operations) + read_io_params = cufile.IOParams(num_operations) + read_io_events = cufile.IOEvents(num_operations) + + # Set up read operations in separate array + for i in range(num_operations): + read_io_params[i].mode = cufile.BatchMode.BATCH + read_io_params[i].fh = handle + read_io_params[i].opcode = cufile.Opcode.READ + read_io_params[i].cookie = i + 100 + read_io_params[i].u.batch.dev_ptr_base = int(read_buffers[i]) + read_io_params[i].u.batch.file_offset = i * buf_size + read_io_params[i].u.batch.dev_ptr_offset = 0 + read_io_params[i].u.batch.size_ = buf_size + + # Submit reads + cufile.batch_io_submit(read_batch_handle, num_operations, read_io_params.ptr, 0) + + # Wait for reads + nr_completed_reads = ctypes.c_uint(num_operations) + cufile.batch_io_get_status( + read_batch_handle, num_operations, ctypes.addressof(nr_completed_reads), + read_io_events.ptr, ctypes.addressof(timeout) ) + + # Check read results + for i in range(nr_completed_reads.value): + print(f"Read {read_io_events[i].cookie}: {read_io_events[i].ret} bytes") + + # Use read_io_events for verification instead of io_events + io_events = read_io_events # Replace for rest of test + nr_completed = nr_completed_reads + + # Clean up read batch + cufile.batch_io_destroy(read_batch_handle) + + # Enhanced operation analysis + print("=== Detailed Operation Results ===") + # Check each operation's detailed status + write_ops = [] + read_ops = [] + + for i in range(nr_completed.value): + event = io_events[i] + status_name = "UNKNOWN" + try: + status_name = cufile.Status(event.status).name + except: + pass + + print(f"Operation {i}:") + print(f" Cookie: {event.cookie}") + print(f" Status: {event.status} ({status_name})") + print(f" Result: {event.ret}") + + # Categorize operations by cookie + if event.cookie < 100: # Write operations (cookies 0, 1) + write_ops.append({ + 'index': i, + 'cookie': event.cookie, + 'result': event.ret, + 'status': event.status + }) + print(f" -> WRITE operation: {event.ret} bytes") + else: # Read operations (cookies 100, 101) + read_ops.append({ + 'index': i, + 'cookie': event.cookie, + 'result': event.ret, + 'status': event.status + }) + print(f" -> READ operation: {event.ret} bytes") + + # Check if operation failed + if event.status != cufile.Status.COMPLETE: + print(f" *** OPERATION {i} FAILED ***") + if event.status == cufile.Status.FAILED: + print(f" Error code: {event.ret}") + + print("=== Operation Analysis ===") + print(f"Write operations completed: {len(write_ops)}") + print(f"Read operations completed: {len(read_ops)}") + + # Check if all writes succeeded before analyzing reads + all_writes_success = all(op['result'] > 0 for op in write_ops) + print(f"All writes successful: {all_writes_success}") + + if all_writes_success: + print("Writes completed successfully, reads should now work") + else: + print("Some writes failed - this could explain read failures") + + # Show operation completion order + print("=== Operation Completion Order ===") + for i, event in enumerate([(io_events[j].cookie, io_events[j].ret) for j in range(nr_completed.value)]): + cookie, result = event + op_type = "WRITE" if cookie < 100 else "READ" + print(f"Position {i}: {op_type} (cookie {cookie}) -> {result} bytes") + + # Write completion check + print("=== Write Completion Check ===") + # Check if writes actually completed by reading file size + file_stat = os.fstat(fd) + print(f"File size after batch: {file_stat.st_size}") + + # Try a small direct read to verify data is in file + try: + test_buf_size = 1024 + err, test_buf = cuda.cuMemAlloc(test_buf_size) + cufile.buf_register(int(test_buf), test_buf_size, 0) + + # Try reading first 1KB directly + cufile.read(handle, int(test_buf), test_buf_size, 0, 0) + + # Copy back and check + test_host_buf = ctypes.create_string_buffer(test_buf_size) + cuda.cuMemcpyDtoH(test_host_buf, test_buf, test_buf_size) + test_data = test_host_buf.value + + print(f"Direct read test: {len(test_data)} bytes") + print(f"First 50 bytes: {test_data[:50]!r}") + + # Cleanup test buffer + cufile.buf_deregister(int(test_buf)) + cuda.cuMemFree(test_buf) + + except Exception as e: + print(f"Direct read test failed: {e}") + # Verify all operations completed successfully - assert nr_completed.value == num_operations * 2, ( - f"Expected {num_operations * 2} operations, got {nr_completed.value}" + assert nr_completed.value == num_operations, ( + f"Expected {num_operations} read operations, got {nr_completed.value}" ) # Collect all returned cookies returned_cookies = set() - for i in range(num_operations * 2): + for i in range(num_operations): + if io_events[i].status != cufile.Status.COMPLETE: + print(f"*** Operation {i} with cookie {io_events[i].cookie} failed with status {io_events[i].status} ***") assert io_events[i].status == cufile.Status.COMPLETE, ( f"Operation {i} failed with status {io_events[i].status}" ) returned_cookies.add(io_events[i].cookie) # Verify all expected cookies are present - expected_cookies = set(range(num_operations)) | set( - range(100, 100 + num_operations) - ) # write cookies 0,1 + read cookies 100,101 + expected_cookies = set(range(100, 100 + num_operations)) # read cookies 100,101 assert returned_cookies == expected_cookies, ( f"Cookie mismatch. Expected {expected_cookies}, got {returned_cookies}" ) # Verify the read data matches the written data for i in range(num_operations): + # Copy read data back to host cuda.cuMemcpyDtoHAsync(host_buf, read_buffers[i], buf_size, 0) cuda.cuStreamSynchronize(0) read_data = host_buf.value + # Prepare expected data test_string = test_strings[i] test_string_len = len(test_string) repetitions = buf_size // test_string_len expected_data = (test_string * repetitions)[:buf_size] + + + if read_data != expected_data: n = 100 # Show first n bytes raise RuntimeError( @@ -1579,148 +1742,62 @@ def test_batch_io_large_operations(): f"expected {expected_data[:n]!r}" ) - # Clean up batch IO - cufile.batch_io_destroy(batch_handle) - - # Deregister file handle - cufile.handle_deregister(handle) - - # Deregister buffers - for buf in all_buffers: - buf_int = int(buf) - cufile.buf_deregister(buf_int) + print("=== Test Completed Successfully ===") finally: - # Close file - os.close(fd) - # Free CUDA memory - for buf in all_buffers: - cuda.cuMemFree(buf) - # Clean up test file + # Cleanup try: - os.unlink(file_path) - except OSError as e: - if e.errno != errno.ENOENT: - raise - # Close cuFile driver - cufile.driver_close() - cuda.cuDevicePrimaryCtxRelease(device) - - -@pytest.mark.skipif( - cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" -) -def test_set_get_parameter_size_t(): - """Test setting and getting size_t parameters with cuFile validation.""" - - # Initialize CUDA - (err,) = cuda.cuInit(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - err, device = cuda.cuDeviceGet(0) - assert err == cuda.CUresult.CUDA_SUCCESS - - err, ctx = cuda.cuDevicePrimaryCtxRetain(device) - assert err == cuda.CUresult.CUDA_SUCCESS - (err,) = cuda.cuCtxSetCurrent(ctx) - assert err == cuda.CUresult.CUDA_SUCCESS - - try: - # Test setting and getting various size_t parameters - - # Test poll threshold size (in KB) - poll_threshold_kb = 64 # 64KB threshold - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB) - assert retrieved_value == poll_threshold_kb, ( - f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}" - ) - - # Test max direct IO size (in KB) - max_direct_io_kb = 1024 # 1MB max direct IO size - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB) - assert retrieved_value == max_direct_io_kb, ( - f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}" - ) - - # Test max device cache size (in KB) - max_cache_kb = 512 # 512KB max cache size - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB) - assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}" - - # Test per buffer cache size (in KB) - per_buffer_cache_kb = 128 # 128KB per buffer cache - cufile.set_parameter_size_t( - cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb - ) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB) - assert retrieved_value == per_buffer_cache_kb, ( - f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}" - ) - - # Test max device pinned memory size (in KB) - max_pinned_kb = 2048 # 2MB max pinned memory - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) - retrieved_value = cufile.get_parameter_size_t( - cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB - ) - assert retrieved_value == max_pinned_kb, ( - f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" - ) - - # Test IO batch size - batch_size = 16 # 16 operations per batch - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE) - assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}" + if 'all_buffers' in locals(): + for buf in all_buffers: + cufile.buf_deregister(int(buf)) + cuda.cuMemFree(buf) + except Exception as e: + print(f"Cleanup error: {e}") - # Test batch IO timeout (in milliseconds) - timeout_ms = 5000 # 5 second timeout - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS) - assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}" + try: + if 'handle' in locals(): + cufile.handle_deregister(handle) + except Exception as e: + print(f"Handle deregister error: {e}") - # Test execution parameters - max_io_queue_depth = 32 # Max 32 operations in queue - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH) - assert retrieved_value == max_io_queue_depth, ( - f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}" - ) + try: + if 'batch_handle' in locals(): + cufile.batch_io_destroy(batch_handle) + except Exception as e: + print(f"Batch destroy error: {e}") - max_io_threads = 8 # Max 8 IO threads - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS) - assert retrieved_value == max_io_threads, ( - f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}" - ) + try: + if 'read_batch_handle' in locals(): + cufile.batch_io_destroy(read_batch_handle) + except Exception as e: + print(f"Read batch destroy error: {e}") - min_io_threshold_kb = 4 # 4KB minimum IO threshold - cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB) - assert retrieved_value == min_io_threshold_kb, ( - f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}" - ) + try: + if 'fd' in locals(): + os.close(fd) + except Exception as e: + print(f"File close error: {e}") - max_request_parallelism = 4 # Max 4 parallel requests - cufile.set_parameter_size_t( - cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism - ) - retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM) - assert retrieved_value == max_request_parallelism, ( - f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}" - ) + try: + if os.path.exists(file_path): + os.remove(file_path) + except Exception as e: + print(f"File remove error: {e}") - finally: - cuda.cuDevicePrimaryCtxRelease(device) + try: + cufile.driver_close() + except Exception as e: + print(f"Driver close error: {e}") + try: + cuda.cuDevicePrimaryCtxRelease(device) + except Exception as e: + print(f"Context release error: {e}") @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) -def test_set_get_parameter_bool(): +def test_get_parameter_bool(): """Test setting and getting boolean parameters with cuFile validation.""" # Initialize CUDA @@ -1805,7 +1882,7 @@ def test_set_get_parameter_bool(): @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) -def test_set_get_parameter_string(): +def test_get_parameter_string(): """Test setting and getting string parameters with cuFile validation.""" # Initialize CUDA @@ -1886,3 +1963,660 @@ def test_set_get_parameter_string(): finally: cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_stats_level(): + """Test cuFile statistics level configuration.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Test setting different statistics levels + valid_levels = [0, 1, 2, 3] # 0=disabled, 1=basic, 2=detailed, 3=verbose + + for level in valid_levels: + cufile.set_stats_level(level) + + # Verify the level was set correctly + current_level = cufile.get_stats_level() + assert current_level == level, f"Expected stats level {level}, but got {current_level}" + + logging.info(f"Successfully set and verified stats level {level}") + + # Test invalid level (should raise an error) + try: + cufile.set_stats_level(-1) # Invalid negative level + assert False, "Expected an error for invalid stats level -1" + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + try: + cufile.set_stats_level(4) # Invalid level > 3 + assert False, "Expected an error for invalid stats level 4" + except Exception as e: + logging.info(f"Correctly caught error for invalid stats level: {e}") + + # Reset to level 0 (disabled) for cleanup + cufile.set_stats_level(0) + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_start(): + """Test cuFile statistics collection start.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + + # Start collecting cuFile statistics + cufile.stats_start() + + # Verify statistics collection is active + # Note: Additional verification would require stats_get() or similar functions + logging.info("cuFile statistics collection started successfully") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_stop(): + """Test cuFile statistics collection stop.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + # Start collecting cuFile statistics first + cufile.stats_start() + + # Stop collecting cuFile statistics + cufile.stats_stop() + + # Verify statistics collection is stopped + logging.info("cuFile statistics collection stopped successfully") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_stats_reset(): + """Test cuFile statistics reset.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Set statistics level first (required before starting stats) + cufile.set_stats_level(1) # Level 1 = basic statistics + # Start collecting cuFile statistics first + + cufile.stats_start() + + # Reset cuFile statistics to clear all counters + cufile.stats_reset() + + # Verify statistics reset completed successfully + logging.info("cuFile statistics reset successfully") + + # Stop statistics collection + cufile.stats_stop() + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l1(): + """Test cuFile L1 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l1.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(1) # L1 = basic operation counts + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 4096 # 4KB, aligned to 4096 bytes + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L1 stats test data" * 100 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform cuFile operations to generate L1 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + + # Allocate buffer for L1 statistics + stats_buffer = ctypes.create_string_buffer(1024) # Allocate sufficient space + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L1 statistics (basic operation counts) + cufile.get_stats_l1(stats_ptr) + + # Verify that statistics data was written to the buffer + # Convert buffer to bytes and check that it's not all zeros + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L1 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l2(): + """Test cuFile L2 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l2.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(2) # L2 = detailed performance metrics + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 8192 # 8KB for more detailed stats + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L2 detailed stats test data" * 150 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform multiple cuFile operations to generate detailed L2 statistics + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + + # Allocate buffer for L2 statistics + stats_buffer = ctypes.create_string_buffer(2048) # Larger buffer for detailed stats + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L2 statistics (detailed performance metrics) + cufile.get_stats_l2(stats_ptr) + + # Verify that statistics data was written to the buffer + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L2 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") +def test_get_stats_l3(): + """Test cuFile L3 statistics retrieval with file operations.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + # Create test file directly with O_DIRECT + file_path = "test_stats_l3.bin" + fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + + try: + cufile.set_stats_level(3) # L3 = comprehensive diagnostic data + + # Start collecting cuFile statistics + cufile.stats_start() + + # Create and initialize the descriptor + descr = cufile.Descr() + descr.type = cufile.FileHandleType.OPAQUE_FD + descr.handle.fd = fd + descr.fs_ops = 0 + + # Register the handle + handle = cufile.handle_register(descr.ptr) + + # Allocate CUDA memory + buffer_size = 16384 # 16KB for comprehensive stats testing + err, buf_ptr = cuda.cuMemAlloc(buffer_size) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Register the buffer with cuFile + buf_ptr_int = int(buf_ptr) + cufile.buf_register(buf_ptr_int, buffer_size, 0) + + # Prepare test data and copy to GPU buffer + test_data = b"cuFile L3 comprehensive stats test data" * 200 # Fill buffer + test_data = test_data[:buffer_size] + host_buf = ctypes.create_string_buffer(test_data, buffer_size) + cuda.cuMemcpyHtoD(buf_ptr, host_buf, len(test_data)) + + # Perform comprehensive cuFile operations to generate L3 statistics + # Multiple writes and reads at different offsets to generate rich stats + cufile.write(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.read(handle, buf_ptr_int, buffer_size, 0, 0) + cufile.write(handle, buf_ptr_int, buffer_size, buffer_size, 0) # Different offset + cufile.read(handle, buf_ptr_int, buffer_size, buffer_size, 0) + cufile.write(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial write + cufile.read(handle, buf_ptr_int, buffer_size // 2, buffer_size * 2, 0) # Partial read + + # Allocate buffer for L3 statistics + stats_buffer = ctypes.create_string_buffer(4096) # Largest buffer for comprehensive stats + stats_ptr = ctypes.addressof(stats_buffer) + + # Get L3 statistics (comprehensive diagnostic data) + cufile.get_stats_l3(stats_ptr) + + # Verify that statistics data was written to the buffer + buffer_bytes = bytes(stats_buffer) + non_zero_bytes = sum(1 for b in buffer_bytes if b != 0) + assert non_zero_bytes > 0, f"Expected statistics data to be written to buffer, but got {non_zero_bytes} non-zero bytes" + + # Verify statistics retrieval completed successfully + logging.info("cuFile L3 statistics retrieved successfully after file operations") + + # Stop statistics collection + cufile.stats_stop() + + # Clean up cuFile resources + cufile.buf_deregister(buf_ptr_int) + cufile.handle_deregister(handle) + cuda.cuMemFree(buf_ptr) + + finally: + os.close(fd) + with suppress(OSError): + os.unlink(file_path) + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_get_bar_size_in_kb(): + """Test cuFile BAR (Base Address Register) size retrieval.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Open cuFile driver + cufile.driver_open() + + try: + # Get BAR size in kilobytes + bar_size_kb = cufile.get_bar_size_in_kb(0) + + # Verify BAR size is a reasonable value + assert isinstance(bar_size_kb, int), "BAR size should be an integer" + assert bar_size_kb > 0, "BAR size should be positive" + + logging.info(f"GPU BAR size: {bar_size_kb} KB ({bar_size_kb / 1024 / 1024:.2f} GB)") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later" +) +def test_set_parameter_posix_pool_slab_array(): + """Test cuFile POSIX pool slab array configuration.""" + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Define slab sizes for POSIX I/O pool (common I/O buffer sizes) - BEFORE driver open + import ctypes + slab_sizes = [ + 4096, # 4KB - small files + 65536, # 64KB - medium files + 1048576, # 1MB - large files + 16777216, # 16MB - very large files + ] + + # Define counts for each slab size (number of buffers) + slab_counts = [ + 10, # 10 buffers of 4KB + 5, # 5 buffers of 64KB + 3, # 3 buffers of 1MB + 2, # 2 buffers of 16MB + ] + + # Convert to ctypes arrays + size_array_type = ctypes.c_size_t * len(slab_sizes) + count_array_type = ctypes.c_size_t * len(slab_counts) + size_array = size_array_type(*slab_sizes) + count_array = count_array_type(*slab_counts) + + # Set POSIX pool slab array configuration BEFORE opening driver + cufile.set_parameter_posix_pool_slab_array(ctypes.addressof(size_array), ctypes.addressof(count_array), len(slab_sizes)) + + # Open cuFile driver AFTER setting parameters + cufile.driver_open() + + try: + # After setting parameters, retrieve them back to verify + retrieved_sizes = (ctypes.c_size_t * len(slab_sizes))() + retrieved_counts = (ctypes.c_size_t * len(slab_counts))() + + cufile.get_parameter_posix_pool_slab_array(ctypes.addressof(retrieved_sizes), ctypes.addressof(retrieved_counts), len(slab_sizes)) + + # Verify they match what we set + for i in range(len(slab_sizes)): + assert retrieved_sizes[i] == slab_sizes[i], f"Size mismatch at index {i}: expected {slab_sizes[i]}, got {retrieved_sizes[i]}" + assert retrieved_counts[i] == slab_counts[i], f"Count mismatch at index {i}: expected {slab_counts[i]}, got {retrieved_counts[i]}" + + # Verify configuration was accepted successfully + logging.info(f"POSIX pool slab array configured with {len(slab_sizes)} slab sizes") + logging.info(f"Slab sizes: {[f'{size//1024}KB' for size in slab_sizes]}") + logging.info("Round-trip verification successful: set and retrieved values match") + + finally: + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + + +@pytest.mark.skipif( + cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" +) +def test_set_get_parameter_size_t(): + """Test setting and getting size_t parameters with cuFile validation.""" + + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + try: + # Test setting and getting various size_t parameters + + # Test poll threshold size (in KB) + poll_threshold_kb = 64 # 64KB threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB) + assert retrieved_value == poll_threshold_kb, ( + f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}" + ) + + # Test max direct IO size (in KB) + max_direct_io_kb = 1024 # 1MB max direct IO size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB) + assert retrieved_value == max_direct_io_kb, ( + f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}" + ) + + # Test max device cache size (in KB) + max_cache_kb = 512 # 512KB max cache size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB) + assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}" + + # Test per buffer cache size (in KB) + per_buffer_cache_kb = 128 # 128KB per buffer cache + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB) + assert retrieved_value == per_buffer_cache_kb, ( + f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}" + ) + + # Test max device pinned memory size (in KB) + max_pinned_kb = 2048 # 2MB max pinned memory + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) + retrieved_value = cufile.get_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + ) + assert retrieved_value == max_pinned_kb, ( + f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" + ) + + # Test IO batch size + batch_size = 16 # 16 operations per batch + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE) + assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}" + + # Test batch IO timeout (in milliseconds) + timeout_ms = 5000 # 5 second timeout + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS) + assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}" + + # Test execution parameters + max_io_queue_depth = 32 # Max 32 operations in queue + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH) + assert retrieved_value == max_io_queue_depth, ( + f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}" + ) + + max_io_threads = 8 # Max 8 IO threads + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS) + assert retrieved_value == max_io_threads, ( + f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}" + ) + + min_io_threshold_kb = 4 # 4KB minimum IO threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB) + assert retrieved_value == min_io_threshold_kb, ( + f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}" + ) + + max_request_parallelism = 4 # Max 4 parallel requests + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM) + assert retrieved_value == max_request_parallelism, ( + f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}" + ) + + finally: + cuda.cuDevicePrimaryCtxRelease(device) From 251e8886ff29a52872122d2388c653bbe5c67fc5 Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 17:59:11 +0000 Subject: [PATCH 2/6] Add first set of generated cybind bindings --- cuda_bindings/cuda/bindings/cufile.pxd | 14 ++- cuda_bindings/cuda/bindings/cufile.pyx | 154 ++++++++++++++++++++++- cuda_bindings/cuda/bindings/cycufile.pxd | 5 +- cuda_bindings/cuda/bindings/cycufile.pyx | 14 +-- 4 files changed, 172 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pxd b/cuda_bindings/cuda/bindings/cufile.pxd index a343caa21..67f34e4b1 100644 --- a/cuda_bindings/cuda/bindings/cufile.pxd +++ b/cuda_bindings/cuda/bindings/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport intptr_t @@ -74,6 +74,18 @@ cpdef int get_version() except? 0 cpdef size_t get_parameter_size_t(int param) except? 0 cpdef bint get_parameter_bool(int param) except? 0 cpdef str get_parameter_string(int param, int len) +cpdef get_parameter_min_max_value(int param, intptr_t min_value, intptr_t max_value) cpdef set_parameter_size_t(int param, size_t value) cpdef set_parameter_bool(int param, bint value) cpdef set_parameter_string(int param, intptr_t desc_str) +cpdef set_stats_level(int level) +cpdef int get_stats_level() except? 0 +cpdef stats_start() +cpdef stats_stop() +cpdef stats_reset() +cpdef get_stats_l1(intptr_t stats) +cpdef get_stats_l2(intptr_t stats) +cpdef get_stats_l3(intptr_t stats) +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0 +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 66b3aca2d..9fc5e787a 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. cimport cython # NOQA from libc cimport errno @@ -1124,7 +1124,7 @@ cpdef driver_get_properties(intptr_t props): """Gets the Driver session properties. Args: - props (intptr_t): Properties to set. + props (intptr_t): to set. .. seealso:: `cuFileDriverGetProperties` """ @@ -1273,6 +1273,21 @@ cpdef str get_parameter_string(int param, int len): return _desc_str_.decode() +cpdef get_parameter_min_max_value(int param, intptr_t min_value, intptr_t max_value): + """Get both the minimum and maximum settable values for a given size_t parameter in a single call. + + Args: + param (SizeTConfigParameter): CUfile SizeT configuration parameter. + min_value (intptr_t): Pointer to store the minimum value. + max_value (intptr_t): Pointer to store the maximum value. + + .. seealso:: `cuFileGetParameterMinMaxValue` + """ + with nogil: + status = cuFileGetParameterMinMaxValue(<_SizeTConfigParameter>param, min_value, max_value) + check_status(status) + + cpdef set_parameter_size_t(int param, size_t value): with nogil: status = cuFileSetParameterSizeT(<_SizeTConfigParameter>param, value) @@ -1291,6 +1306,141 @@ cpdef set_parameter_string(int param, intptr_t desc_str): check_status(status) +cpdef set_stats_level(int level): + """Set the level of statistics collection for cuFile operations. This will override the cufile.json settings for stats. + + Args: + level (int): Statistics level (0 = disabled, 1 = basic, 2 = detailed, 3 = verbose). + + .. seealso:: `cuFileSetStatsLevel` + """ + with nogil: + status = cuFileSetStatsLevel(level) + check_status(status) + + +cpdef int get_stats_level() except? 0: + """Get the current level of statistics collection for cuFile operations. + + Returns: + int: Pointer to store the current statistics level. + + .. seealso:: `cuFileGetStatsLevel` + """ + cdef int level + with nogil: + status = cuFileGetStatsLevel(&level) + check_status(status) + return level + + +cpdef stats_start(): + """Start collecting cuFile statistics. + + .. seealso:: `cuFileStatsStart` + """ + with nogil: + status = cuFileStatsStart() + check_status(status) + + +cpdef stats_stop(): + """Stop collecting cuFile statistics. + + .. seealso:: `cuFileStatsStop` + """ + with nogil: + status = cuFileStatsStop() + check_status(status) + + +cpdef stats_reset(): + """Reset all cuFile statistics counters. + + .. seealso:: `cuFileStatsReset` + """ + with nogil: + status = cuFileStatsReset() + check_status(status) + + +cpdef get_stats_l1(intptr_t stats): + """Get Level 1 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel1_t structure to be filled. + + .. seealso:: `cuFileGetStatsL1` + """ + with nogil: + status = cuFileGetStatsL1(stats) + check_status(status) + + +cpdef get_stats_l2(intptr_t stats): + """Get Level 2 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel2_t structure to be filled. + + .. seealso:: `cuFileGetStatsL2` + """ + with nogil: + status = cuFileGetStatsL2(stats) + check_status(status) + + +cpdef get_stats_l3(intptr_t stats): + """Get Level 3 cuFile statistics. + + Args: + stats (intptr_t): Pointer to CUfileStatsLevel3_t structure to be filled. + + .. seealso:: `cuFileGetStatsL3` + """ + with nogil: + status = cuFileGetStatsL3(stats) + check_status(status) + + +cpdef size_t get_bar_size_in_kb(int gpu_ind_ex) except? 0: + cdef size_t bar_size + with nogil: + status = cuFileGetBARSizeInKB(gpu_ind_ex, &bar_size) + check_status(status) + return bar_size + + +cpdef set_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Set both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Array of slab sizes in KB. + count_values (intptr_t): Array of slab counts. + len (int): Length of both arrays (must be the same). + + .. seealso:: `cuFileSetParameterPosixPoolSlabArray` + """ + with nogil: + status = cuFileSetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(status) + + +cpdef get_parameter_posix_pool_slab_array(intptr_t size_values, intptr_t count_values, int len): + """Get both POSIX pool slab size and count parameters as a pair. + + Args: + size_values (intptr_t): Buffer to receive slab sizes in KB. + count_values (intptr_t): Buffer to receive slab counts. + len (int): Buffer size (must match the actual parameter length). + + .. seealso:: `cuFileGetParameterPosixPoolSlabArray` + """ + with nogil: + status = cuFileGetParameterPosixPoolSlabArray(size_values, count_values, len) + check_status(status) + + cpdef str op_status_error(int status): """cufileop status string. diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index 39142aa1f..11cf737f0 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport uint32_t, uint64_t from libc.time cimport time_t @@ -353,11 +353,10 @@ cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_L cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/cycufile.pyx b/cuda_bindings/cuda/bindings/cycufile.pyx index d6bbb2745..e23177137 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pyx +++ b/cuda_bindings/cuda/bindings/cycufile.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from ._internal cimport cufile as _cufile @@ -122,6 +122,10 @@ cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, return _cufile._cuFileGetParameterString(param, desc_str, len) +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) + + cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileSetParameterSizeT(param, value) @@ -134,14 +138,6 @@ cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, return _cufile._cuFileSetParameterString(param, desc_str) -cdef CUfileError_t cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - return _cufile._cuFileDriverClose() - - -cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: - return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) - - cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: return _cufile._cuFileSetStatsLevel(level) From 3ea65c8cd3e4a1c2a175fda53e5cc0d86149281a Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 18:03:48 +0000 Subject: [PATCH 3/6] Add _internal cybind generated bindings --- .../cuda/bindings/_internal/cufile.pxd | 5 +- .../cuda/bindings/_internal/cufile_linux.pyx | 151 ++++--- .../cuda/bindings/_internal/cycufile.pxd | 370 ++++++++++++++++++ .../cuda/bindings/_internal/cycufile.pyx | 182 +++++++++ 4 files changed, 620 insertions(+), 88 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_internal/cycufile.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/cycufile.pyx diff --git a/cuda_bindings/cuda/bindings/_internal/cufile.pxd b/cuda_bindings/cuda/bindings/_internal/cufile.pxd index 97b1b387f..585fefe3f 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile.pxd +++ b/cuda_bindings/cuda/bindings/_internal/cufile.pxd @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from ..cycufile cimport * @@ -38,11 +38,10 @@ cdef CUfileError_t _cuFileGetVersion(int* version) except?CUFILE_ cdef CUfileError_t _cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil -cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil cdef CUfileError_t _cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx index ffc92f228..92ca46b76 100644 --- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.9.0 to 13.0.1. Do not modify it directly. +# This code was automatically generated with version 13.0.0. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t import threading @@ -85,11 +85,10 @@ cdef void* __cuFileGetVersion = NULL cdef void* __cuFileGetParameterSizeT = NULL cdef void* __cuFileGetParameterBool = NULL cdef void* __cuFileGetParameterString = NULL +cdef void* __cuFileGetParameterMinMaxValue = NULL cdef void* __cuFileSetParameterSizeT = NULL cdef void* __cuFileSetParameterBool = NULL cdef void* __cuFileSetParameterString = NULL -cdef void* __cuFileDriverClose = NULL -cdef void* __cuFileGetParameterMinMaxValue = NULL cdef void* __cuFileSetStatsLevel = NULL cdef void* __cuFileGetStatsLevel = NULL cdef void* __cuFileStatsStart = NULL @@ -103,7 +102,7 @@ cdef void* __cuFileSetParameterPosixPoolSlabArray = NULL cdef void* __cuFileGetParameterPosixPoolSlabArray = NULL -cdef void* load_library() except* with gil: +cdef void* load_library(const int driver_ver) except* with gil: cdef uintptr_t handle = load_nvidia_dynamic_lib("cufile")._handle_uint return handle @@ -116,306 +115,301 @@ cdef int _check_or_init_cufile() except -1 nogil: cdef void* handle = NULL with gil, __symbol_lock: + driver_ver = get_cuda_version() + # Load function global __cuFileHandleRegister __cuFileHandleRegister = dlsym(RTLD_DEFAULT, 'cuFileHandleRegister') if __cuFileHandleRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileHandleRegister = dlsym(handle, 'cuFileHandleRegister') global __cuFileHandleDeregister __cuFileHandleDeregister = dlsym(RTLD_DEFAULT, 'cuFileHandleDeregister') if __cuFileHandleDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileHandleDeregister = dlsym(handle, 'cuFileHandleDeregister') global __cuFileBufRegister __cuFileBufRegister = dlsym(RTLD_DEFAULT, 'cuFileBufRegister') if __cuFileBufRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBufRegister = dlsym(handle, 'cuFileBufRegister') global __cuFileBufDeregister __cuFileBufDeregister = dlsym(RTLD_DEFAULT, 'cuFileBufDeregister') if __cuFileBufDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBufDeregister = dlsym(handle, 'cuFileBufDeregister') global __cuFileRead __cuFileRead = dlsym(RTLD_DEFAULT, 'cuFileRead') if __cuFileRead == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileRead = dlsym(handle, 'cuFileRead') global __cuFileWrite __cuFileWrite = dlsym(RTLD_DEFAULT, 'cuFileWrite') if __cuFileWrite == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileWrite = dlsym(handle, 'cuFileWrite') global __cuFileDriverOpen __cuFileDriverOpen = dlsym(RTLD_DEFAULT, 'cuFileDriverOpen') if __cuFileDriverOpen == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverOpen = dlsym(handle, 'cuFileDriverOpen') global __cuFileDriverClose_v2 __cuFileDriverClose_v2 = dlsym(RTLD_DEFAULT, 'cuFileDriverClose_v2') if __cuFileDriverClose_v2 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverClose_v2 = dlsym(handle, 'cuFileDriverClose_v2') global __cuFileUseCount __cuFileUseCount = dlsym(RTLD_DEFAULT, 'cuFileUseCount') if __cuFileUseCount == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileUseCount = dlsym(handle, 'cuFileUseCount') global __cuFileDriverGetProperties __cuFileDriverGetProperties = dlsym(RTLD_DEFAULT, 'cuFileDriverGetProperties') if __cuFileDriverGetProperties == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverGetProperties = dlsym(handle, 'cuFileDriverGetProperties') global __cuFileDriverSetPollMode __cuFileDriverSetPollMode = dlsym(RTLD_DEFAULT, 'cuFileDriverSetPollMode') if __cuFileDriverSetPollMode == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetPollMode = dlsym(handle, 'cuFileDriverSetPollMode') global __cuFileDriverSetMaxDirectIOSize __cuFileDriverSetMaxDirectIOSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxDirectIOSize') if __cuFileDriverSetMaxDirectIOSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxDirectIOSize = dlsym(handle, 'cuFileDriverSetMaxDirectIOSize') global __cuFileDriverSetMaxCacheSize __cuFileDriverSetMaxCacheSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxCacheSize') if __cuFileDriverSetMaxCacheSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxCacheSize = dlsym(handle, 'cuFileDriverSetMaxCacheSize') global __cuFileDriverSetMaxPinnedMemSize __cuFileDriverSetMaxPinnedMemSize = dlsym(RTLD_DEFAULT, 'cuFileDriverSetMaxPinnedMemSize') if __cuFileDriverSetMaxPinnedMemSize == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileDriverSetMaxPinnedMemSize = dlsym(handle, 'cuFileDriverSetMaxPinnedMemSize') global __cuFileBatchIOSetUp __cuFileBatchIOSetUp = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSetUp') if __cuFileBatchIOSetUp == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOSetUp = dlsym(handle, 'cuFileBatchIOSetUp') global __cuFileBatchIOSubmit __cuFileBatchIOSubmit = dlsym(RTLD_DEFAULT, 'cuFileBatchIOSubmit') if __cuFileBatchIOSubmit == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOSubmit = dlsym(handle, 'cuFileBatchIOSubmit') global __cuFileBatchIOGetStatus __cuFileBatchIOGetStatus = dlsym(RTLD_DEFAULT, 'cuFileBatchIOGetStatus') if __cuFileBatchIOGetStatus == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOGetStatus = dlsym(handle, 'cuFileBatchIOGetStatus') global __cuFileBatchIOCancel __cuFileBatchIOCancel = dlsym(RTLD_DEFAULT, 'cuFileBatchIOCancel') if __cuFileBatchIOCancel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIOCancel = dlsym(handle, 'cuFileBatchIOCancel') global __cuFileBatchIODestroy __cuFileBatchIODestroy = dlsym(RTLD_DEFAULT, 'cuFileBatchIODestroy') if __cuFileBatchIODestroy == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileBatchIODestroy = dlsym(handle, 'cuFileBatchIODestroy') global __cuFileReadAsync __cuFileReadAsync = dlsym(RTLD_DEFAULT, 'cuFileReadAsync') if __cuFileReadAsync == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileReadAsync = dlsym(handle, 'cuFileReadAsync') global __cuFileWriteAsync __cuFileWriteAsync = dlsym(RTLD_DEFAULT, 'cuFileWriteAsync') if __cuFileWriteAsync == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileWriteAsync = dlsym(handle, 'cuFileWriteAsync') global __cuFileStreamRegister __cuFileStreamRegister = dlsym(RTLD_DEFAULT, 'cuFileStreamRegister') if __cuFileStreamRegister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStreamRegister = dlsym(handle, 'cuFileStreamRegister') global __cuFileStreamDeregister __cuFileStreamDeregister = dlsym(RTLD_DEFAULT, 'cuFileStreamDeregister') if __cuFileStreamDeregister == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStreamDeregister = dlsym(handle, 'cuFileStreamDeregister') global __cuFileGetVersion __cuFileGetVersion = dlsym(RTLD_DEFAULT, 'cuFileGetVersion') if __cuFileGetVersion == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetVersion = dlsym(handle, 'cuFileGetVersion') global __cuFileGetParameterSizeT __cuFileGetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileGetParameterSizeT') if __cuFileGetParameterSizeT == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterSizeT = dlsym(handle, 'cuFileGetParameterSizeT') global __cuFileGetParameterBool __cuFileGetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileGetParameterBool') if __cuFileGetParameterBool == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterBool = dlsym(handle, 'cuFileGetParameterBool') global __cuFileGetParameterString __cuFileGetParameterString = dlsym(RTLD_DEFAULT, 'cuFileGetParameterString') if __cuFileGetParameterString == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterString = dlsym(handle, 'cuFileGetParameterString') + global __cuFileGetParameterMinMaxValue + __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') + if __cuFileGetParameterMinMaxValue == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __cuFileGetParameterMinMaxValue = dlsym(handle, 'cuFileGetParameterMinMaxValue') + global __cuFileSetParameterSizeT __cuFileSetParameterSizeT = dlsym(RTLD_DEFAULT, 'cuFileSetParameterSizeT') if __cuFileSetParameterSizeT == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterSizeT = dlsym(handle, 'cuFileSetParameterSizeT') global __cuFileSetParameterBool __cuFileSetParameterBool = dlsym(RTLD_DEFAULT, 'cuFileSetParameterBool') if __cuFileSetParameterBool == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterBool = dlsym(handle, 'cuFileSetParameterBool') global __cuFileSetParameterString __cuFileSetParameterString = dlsym(RTLD_DEFAULT, 'cuFileSetParameterString') if __cuFileSetParameterString == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterString = dlsym(handle, 'cuFileSetParameterString') - global __cuFileDriverClose - __cuFileDriverClose = dlsym(RTLD_DEFAULT, 'cuFileDriverClose') - if __cuFileDriverClose == NULL: - if handle == NULL: - handle = load_library() - __cuFileDriverClose = dlsym(handle, 'cuFileDriverClose') - - global __cuFileGetParameterMinMaxValue - __cuFileGetParameterMinMaxValue = dlsym(RTLD_DEFAULT, 'cuFileGetParameterMinMaxValue') - if __cuFileGetParameterMinMaxValue == NULL: - if handle == NULL: - handle = load_library() - __cuFileGetParameterMinMaxValue = dlsym(handle, 'cuFileGetParameterMinMaxValue') - global __cuFileSetStatsLevel __cuFileSetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileSetStatsLevel') if __cuFileSetStatsLevel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetStatsLevel = dlsym(handle, 'cuFileSetStatsLevel') global __cuFileGetStatsLevel __cuFileGetStatsLevel = dlsym(RTLD_DEFAULT, 'cuFileGetStatsLevel') if __cuFileGetStatsLevel == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsLevel = dlsym(handle, 'cuFileGetStatsLevel') global __cuFileStatsStart __cuFileStatsStart = dlsym(RTLD_DEFAULT, 'cuFileStatsStart') if __cuFileStatsStart == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsStart = dlsym(handle, 'cuFileStatsStart') global __cuFileStatsStop __cuFileStatsStop = dlsym(RTLD_DEFAULT, 'cuFileStatsStop') if __cuFileStatsStop == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsStop = dlsym(handle, 'cuFileStatsStop') global __cuFileStatsReset __cuFileStatsReset = dlsym(RTLD_DEFAULT, 'cuFileStatsReset') if __cuFileStatsReset == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileStatsReset = dlsym(handle, 'cuFileStatsReset') global __cuFileGetStatsL1 __cuFileGetStatsL1 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL1') if __cuFileGetStatsL1 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL1 = dlsym(handle, 'cuFileGetStatsL1') global __cuFileGetStatsL2 __cuFileGetStatsL2 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL2') if __cuFileGetStatsL2 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL2 = dlsym(handle, 'cuFileGetStatsL2') global __cuFileGetStatsL3 __cuFileGetStatsL3 = dlsym(RTLD_DEFAULT, 'cuFileGetStatsL3') if __cuFileGetStatsL3 == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetStatsL3 = dlsym(handle, 'cuFileGetStatsL3') global __cuFileGetBARSizeInKB __cuFileGetBARSizeInKB = dlsym(RTLD_DEFAULT, 'cuFileGetBARSizeInKB') if __cuFileGetBARSizeInKB == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetBARSizeInKB = dlsym(handle, 'cuFileGetBARSizeInKB') global __cuFileSetParameterPosixPoolSlabArray __cuFileSetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileSetParameterPosixPoolSlabArray') if __cuFileSetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileSetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileSetParameterPosixPoolSlabArray') global __cuFileGetParameterPosixPoolSlabArray __cuFileGetParameterPosixPoolSlabArray = dlsym(RTLD_DEFAULT, 'cuFileGetParameterPosixPoolSlabArray') if __cuFileGetParameterPosixPoolSlabArray == NULL: if handle == NULL: - handle = load_library() + handle = load_library(driver_ver) __cuFileGetParameterPosixPoolSlabArray = dlsym(handle, 'cuFileGetParameterPosixPoolSlabArray') __py_cufile_init = True @@ -514,6 +508,9 @@ cpdef dict _inspect_function_pointers(): global __cuFileGetParameterString data["__cuFileGetParameterString"] = __cuFileGetParameterString + global __cuFileGetParameterMinMaxValue + data["__cuFileGetParameterMinMaxValue"] = __cuFileGetParameterMinMaxValue + global __cuFileSetParameterSizeT data["__cuFileSetParameterSizeT"] = __cuFileSetParameterSizeT @@ -523,12 +520,6 @@ cpdef dict _inspect_function_pointers(): global __cuFileSetParameterString data["__cuFileSetParameterString"] = __cuFileSetParameterString - global __cuFileDriverClose - data["__cuFileDriverClose"] = __cuFileDriverClose - - global __cuFileGetParameterMinMaxValue - data["__cuFileGetParameterMinMaxValue"] = __cuFileGetParameterMinMaxValue - global __cuFileSetStatsLevel data["__cuFileSetStatsLevel"] = __cuFileSetStatsLevel @@ -849,6 +840,16 @@ cdef CUfileError_t _cuFileGetParameterString(CUFileStringConfigParameter_t param param, desc_str, len) +cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + global __cuFileGetParameterMinMaxValue + _check_or_init_cufile() + if __cuFileGetParameterMinMaxValue == NULL: + with gil: + raise FunctionNotFoundError("function cuFileGetParameterMinMaxValue is not found") + return (__cuFileGetParameterMinMaxValue)( + param, min_value, max_value) + + cdef CUfileError_t _cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: global __cuFileSetParameterSizeT _check_or_init_cufile() @@ -879,26 +880,6 @@ cdef CUfileError_t _cuFileSetParameterString(CUFileStringConfigParameter_t param param, desc_str) -cdef CUfileError_t _cuFileDriverClose() except?CUFILE_LOADING_ERROR nogil: - global __cuFileDriverClose - _check_or_init_cufile() - if __cuFileDriverClose == NULL: - with gil: - raise FunctionNotFoundError("function cuFileDriverClose is not found") - return (__cuFileDriverClose)( - ) - - -cdef CUfileError_t _cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: - global __cuFileGetParameterMinMaxValue - _check_or_init_cufile() - if __cuFileGetParameterMinMaxValue == NULL: - with gil: - raise FunctionNotFoundError("function cuFileGetParameterMinMaxValue is not found") - return (__cuFileGetParameterMinMaxValue)( - param, min_value, max_value) - - cdef CUfileError_t _cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: global __cuFileSetStatsLevel _check_or_init_cufile() diff --git a/cuda_bindings/cuda/bindings/_internal/cycufile.pxd b/cuda_bindings/cuda/bindings/_internal/cycufile.pxd new file mode 100644 index 000000000..11cf737f0 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cycufile.pxd @@ -0,0 +1,370 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from libc.stdint cimport uint32_t, uint64_t +from libc.time cimport time_t +from libcpp cimport bool as cpp_bool +from posix.types cimport off_t + +cimport cuda.bindings.cydriver +from cuda.bindings.cydriver cimport CUresult + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# TODO: switch to "from libc.time cimport timespec" once we can use recent +# Cython to build +cdef extern from "": + cdef struct timespec: + time_t tv_sec + long tv_nsec +cdef extern from "": + cdef struct sockaddr: + unsigned short sa_family + char sa_data[14] + ctypedef sockaddr sockaddr_t + + +cdef extern from '': + # enums + ctypedef enum CUfileOpError: + CU_FILE_SUCCESS + CU_FILE_DRIVER_NOT_INITIALIZED + CU_FILE_DRIVER_INVALID_PROPS + CU_FILE_DRIVER_UNSUPPORTED_LIMIT + CU_FILE_DRIVER_VERSION_MISMATCH + CU_FILE_DRIVER_VERSION_READ_ERROR + CU_FILE_DRIVER_CLOSING + CU_FILE_PLATFORM_NOT_SUPPORTED + CU_FILE_IO_NOT_SUPPORTED + CU_FILE_DEVICE_NOT_SUPPORTED + CU_FILE_NVFS_DRIVER_ERROR + CU_FILE_CUDA_DRIVER_ERROR + CU_FILE_CUDA_POINTER_INVALID + CU_FILE_CUDA_MEMORY_TYPE_INVALID + CU_FILE_CUDA_POINTER_RANGE_ERROR + CU_FILE_CUDA_CONTEXT_MISMATCH + CU_FILE_INVALID_MAPPING_SIZE + CU_FILE_INVALID_MAPPING_RANGE + CU_FILE_INVALID_FILE_TYPE + CU_FILE_INVALID_FILE_OPEN_FLAG + CU_FILE_DIO_NOT_SET + CU_FILE_INVALID_VALUE + CU_FILE_MEMORY_ALREADY_REGISTERED + CU_FILE_MEMORY_NOT_REGISTERED + CU_FILE_PERMISSION_DENIED + CU_FILE_DRIVER_ALREADY_OPEN + CU_FILE_HANDLE_NOT_REGISTERED + CU_FILE_HANDLE_ALREADY_REGISTERED + CU_FILE_DEVICE_NOT_FOUND + CU_FILE_INTERNAL_ERROR + CU_FILE_GETNEWFD_FAILED + CU_FILE_NVFS_SETUP_ERROR + CU_FILE_IO_DISABLED + CU_FILE_BATCH_SUBMIT_FAILED + CU_FILE_GPU_MEMORY_PINNING_FAILED + CU_FILE_BATCH_FULL + CU_FILE_ASYNC_NOT_SUPPORTED + CU_FILE_INTERNAL_BATCH_SETUP_ERROR + CU_FILE_INTERNAL_BATCH_SUBMIT_ERROR + CU_FILE_INTERNAL_BATCH_GETSTATUS_ERROR + CU_FILE_INTERNAL_BATCH_CANCEL_ERROR + CU_FILE_NOMEM_ERROR + CU_FILE_IO_ERROR + CU_FILE_INTERNAL_BUF_REGISTER_ERROR + CU_FILE_HASH_OPR_ERROR + CU_FILE_INVALID_CONTEXT_ERROR + CU_FILE_NVFS_INTERNAL_DRIVER_ERROR + CU_FILE_BATCH_NOCOMPAT_ERROR + CU_FILE_IO_MAX_ERROR + + ctypedef enum CUfileDriverStatusFlags_t: + CU_FILE_LUSTRE_SUPPORTED + CU_FILE_WEKAFS_SUPPORTED + CU_FILE_NFS_SUPPORTED + CU_FILE_GPFS_SUPPORTED + CU_FILE_NVME_SUPPORTED + CU_FILE_NVMEOF_SUPPORTED + CU_FILE_SCSI_SUPPORTED + CU_FILE_SCALEFLUX_CSD_SUPPORTED + CU_FILE_NVMESH_SUPPORTED + CU_FILE_BEEGFS_SUPPORTED + CU_FILE_NVME_P2P_SUPPORTED + CU_FILE_SCATEFS_SUPPORTED + + ctypedef enum CUfileDriverControlFlags_t: + CU_FILE_USE_POLL_MODE + CU_FILE_ALLOW_COMPAT_MODE + + ctypedef enum CUfileFeatureFlags_t: + CU_FILE_DYN_ROUTING_SUPPORTED + CU_FILE_BATCH_IO_SUPPORTED + CU_FILE_STREAMS_SUPPORTED + CU_FILE_PARALLEL_IO_SUPPORTED + + ctypedef enum CUfileFileHandleType: + CU_FILE_HANDLE_TYPE_OPAQUE_FD + CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 + CU_FILE_HANDLE_TYPE_USERSPACE_FS + + ctypedef enum CUfileOpcode_t: + CUFILE_READ + CUFILE_WRITE + + ctypedef enum CUfileStatus_t: + CUFILE_WAITING + CUFILE_PENDING + CUFILE_INVALID + CUFILE_CANCELED + CUFILE_COMPLETE + CUFILE_TIMEOUT + CUFILE_FAILED + + ctypedef enum CUfileBatchMode_t: + CUFILE_BATCH + + ctypedef enum CUFileSizeTConfigParameter_t: + CUFILE_PARAM_PROFILE_STATS + CUFILE_PARAM_EXECUTION_MAX_IO_QUEUE_DEPTH + CUFILE_PARAM_EXECUTION_MAX_IO_THREADS + CUFILE_PARAM_EXECUTION_MIN_IO_THRESHOLD_SIZE_KB + CUFILE_PARAM_EXECUTION_MAX_REQUEST_PARALLELISM + CUFILE_PARAM_PROPERTIES_MAX_DIRECT_IO_SIZE_KB + CUFILE_PARAM_PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB + CUFILE_PARAM_PROPERTIES_PER_BUFFER_CACHE_SIZE_KB + CUFILE_PARAM_PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + CUFILE_PARAM_PROPERTIES_IO_BATCHSIZE + CUFILE_PARAM_POLLTHRESHOLD_SIZE_KB + CUFILE_PARAM_PROPERTIES_BATCH_IO_TIMEOUT_MS + + ctypedef enum CUFileBoolConfigParameter_t: + CUFILE_PARAM_PROPERTIES_USE_POLL_MODE + CUFILE_PARAM_PROPERTIES_ALLOW_COMPAT_MODE + CUFILE_PARAM_FORCE_COMPAT_MODE + CUFILE_PARAM_FS_MISC_API_CHECK_AGGRESSIVE + CUFILE_PARAM_EXECUTION_PARALLEL_IO + CUFILE_PARAM_PROFILE_NVTX + CUFILE_PARAM_PROPERTIES_ALLOW_SYSTEM_MEMORY + CUFILE_PARAM_USE_PCIP2PDMA + CUFILE_PARAM_PREFER_IO_URING + CUFILE_PARAM_FORCE_ODIRECT_MODE + CUFILE_PARAM_SKIP_TOPOLOGY_DETECTION + CUFILE_PARAM_STREAM_MEMOPS_BYPASS + + ctypedef enum CUFileStringConfigParameter_t: + CUFILE_PARAM_LOGGING_LEVEL + CUFILE_PARAM_ENV_LOGFILE_PATH + CUFILE_PARAM_LOG_DIR + + ctypedef enum CUFileArrayConfigParameter_t: + CUFILE_PARAM_POSIX_POOL_SLAB_SIZE_KB + CUFILE_PARAM_POSIX_POOL_SLAB_COUNT + + # types + ctypedef void* CUfileHandle_t 'CUfileHandle_t' + ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' + ctypedef struct CUfileError_t 'CUfileError_t': + CUfileOpError err + CUresult cu_err + cdef struct _anon_pod0 '_anon_pod0': + unsigned int major_version + unsigned int minor_version + size_t poll_thresh_size + size_t max_direct_io_size + unsigned int dstatusflags + unsigned int dcontrolflags + ctypedef struct cufileRDMAInfo_t 'cufileRDMAInfo_t': + int version + int desc_len + char* desc_str + ctypedef struct CUfileFSOps_t 'CUfileFSOps_t': + char* (*fs_type)(void*) + int (*getRDMADeviceList)(void*, sockaddr_t**) + int (*getRDMADevicePriority)(void*, char*, size_t, loff_t, sockaddr_t*) + ssize_t (*read)(void*, char*, size_t, loff_t, cufileRDMAInfo_t*) + ssize_t (*write)(void*, const char*, size_t, loff_t, cufileRDMAInfo_t*) + cdef union _anon_pod1 '_anon_pod1': + int fd + void* handle + cdef struct _anon_pod3 '_anon_pod3': + void* devPtr_base + off_t file_offset + off_t devPtr_offset + size_t size + ctypedef struct CUfileIOEvents_t 'CUfileIOEvents_t': + void* cookie + CUfileStatus_t status + size_t ret + ctypedef struct CUfileOpCounter_t 'CUfileOpCounter_t': + uint64_t ok + uint64_t err + ctypedef struct CUfilePerGpuStats_t 'CUfilePerGpuStats_t': + char uuid[16] + uint64_t read_bytes + uint64_t read_bw_bytes_per_sec + uint64_t read_utilization + uint64_t read_duration_us + uint64_t n_total_reads + uint64_t n_p2p_reads + uint64_t n_nvfs_reads + uint64_t n_posix_reads + uint64_t n_unaligned_reads + uint64_t n_dr_reads + uint64_t n_sparse_regions + uint64_t n_inline_regions + uint64_t n_reads_err + uint64_t writes_bytes + uint64_t write_bw_bytes_per_sec + uint64_t write_utilization + uint64_t write_duration_us + uint64_t n_total_writes + uint64_t n_p2p_writes + uint64_t n_nvfs_writes + uint64_t n_posix_writes + uint64_t n_unaligned_writes + uint64_t n_dr_writes + uint64_t n_writes_err + uint64_t n_mmap + uint64_t n_mmap_ok + uint64_t n_mmap_err + uint64_t n_mmap_free + uint64_t reg_bytes + ctypedef struct CUfileDrvProps_t 'CUfileDrvProps_t': + _anon_pod0 nvfs + unsigned int fflags + unsigned int max_device_cache_size + unsigned int per_buffer_cache_size + unsigned int max_device_pinned_mem_size + unsigned int max_batch_io_size + unsigned int max_batch_io_timeout_msecs + ctypedef struct CUfileDescr_t 'CUfileDescr_t': + CUfileFileHandleType type + _anon_pod1 handle + CUfileFSOps_t* fs_ops + cdef union _anon_pod2 '_anon_pod2': + _anon_pod3 batch + ctypedef struct CUfileStatsLevel1_t 'CUfileStatsLevel1_t': + CUfileOpCounter_t read_ops + CUfileOpCounter_t write_ops + CUfileOpCounter_t hdl_register_ops + CUfileOpCounter_t hdl_deregister_ops + CUfileOpCounter_t buf_register_ops + CUfileOpCounter_t buf_deregister_ops + uint64_t read_bytes + uint64_t write_bytes + uint64_t read_bw_bytes_per_sec + uint64_t write_bw_bytes_per_sec + uint64_t read_lat_avg_us + uint64_t write_lat_avg_us + uint64_t read_ops_per_sec + uint64_t write_ops_per_sec + uint64_t read_lat_sum_us + uint64_t write_lat_sum_us + CUfileOpCounter_t batch_submit_ops + CUfileOpCounter_t batch_complete_ops + CUfileOpCounter_t batch_setup_ops + CUfileOpCounter_t batch_cancel_ops + CUfileOpCounter_t batch_destroy_ops + CUfileOpCounter_t batch_enqueued_ops + CUfileOpCounter_t batch_posix_enqueued_ops + CUfileOpCounter_t batch_processed_ops + CUfileOpCounter_t batch_posix_processed_ops + CUfileOpCounter_t batch_nvfs_submit_ops + CUfileOpCounter_t batch_p2p_submit_ops + CUfileOpCounter_t batch_aio_submit_ops + CUfileOpCounter_t batch_iouring_submit_ops + CUfileOpCounter_t batch_mixed_io_submit_ops + CUfileOpCounter_t batch_total_submit_ops + uint64_t batch_read_bytes + uint64_t batch_write_bytes + uint64_t batch_read_bw_bytes + uint64_t batch_write_bw_bytes + uint64_t batch_submit_lat_avg_us + uint64_t batch_completion_lat_avg_us + uint64_t batch_submit_ops_per_sec + uint64_t batch_complete_ops_per_sec + uint64_t batch_submit_lat_sum_us + uint64_t batch_completion_lat_sum_us + uint64_t last_batch_read_bytes + uint64_t last_batch_write_bytes + ctypedef struct CUfileIOParams_t 'CUfileIOParams_t': + CUfileBatchMode_t mode + _anon_pod2 u + CUfileHandle_t fh + CUfileOpcode_t opcode + void* cookie + ctypedef struct CUfileStatsLevel2_t 'CUfileStatsLevel2_t': + CUfileStatsLevel1_t basic + uint64_t read_size_kb_hist[32] + uint64_t write_size_kb_hist[32] + ctypedef struct CUfileStatsLevel3_t 'CUfileStatsLevel3_t': + CUfileStatsLevel2_t detailed + uint32_t num_gpus + CUfilePerGpuStats_t per_gpu_stats[16] + + +cdef extern from *: + """ + // This is the missing piece we need to supply to help Cython & C++ compilers. + inline bool operator==(const CUfileError_t& lhs, const CUfileError_t& rhs) { + return (lhs.err == rhs.err) && (lhs.cu_err == rhs.cu_err); + } + static CUfileError_t CUFILE_LOADING_ERROR{(CUfileOpError)-1, (CUresult)-1}; + """ + const CUfileError_t CUFILE_LOADING_ERROR + ctypedef void* CUstream "CUstream" + + const char* cufileop_status_error(CUfileOpError) + + +############################################################################### +# Functions +############################################################################### + +cdef CUfileError_t cuFileHandleRegister(CUfileHandle_t* fh, CUfileDescr_t* descr) except?CUFILE_LOADING_ERROR nogil +cdef void cuFileHandleDeregister(CUfileHandle_t fh) except* nogil +cdef CUfileError_t cuFileBufRegister(const void* bufPtr_base, size_t length, int flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil +cdef ssize_t cuFileRead(CUfileHandle_t fh, void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil +cdef ssize_t cuFileWrite(CUfileHandle_t fh, const void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil +cdef CUfileError_t cuFileDriverOpen() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil +cdef long cuFileUseCount() except* nogil +cdef CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetPollMode(cpp_bool poll, size_t poll_threshold_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t* batch_idp, unsigned nr) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t* iocbp, unsigned int flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr, CUfileIOEvents_t* iocbp, timespec* timeout) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp) except?CUFILE_LOADING_ERROR nogil +cdef void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp) except* nogil +cdef CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_read_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_written_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStreamDeregister(CUstream stream) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsStop() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileStatsReset() except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL1(CUfileStatsLevel1_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL2(CUfileStatsLevel2_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetStatsL3(CUfileStatsLevel3_t* stats) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetBARSizeInKB(int gpuIndex, size_t* barSize) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileSetParameterPosixPoolSlabArray(const size_t* size_values, const size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil +cdef CUfileError_t cuFileGetParameterPosixPoolSlabArray(size_t* size_values, size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cycufile.pyx b/cuda_bindings/cuda/bindings/_internal/cycufile.pyx new file mode 100644 index 000000000..e23177137 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cycufile.pyx @@ -0,0 +1,182 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated with version 13.0.0. Do not modify it directly. + +from ._internal cimport cufile as _cufile + +import cython + +############################################################################### +# Wrapper functions +############################################################################### + +cdef CUfileError_t cuFileHandleRegister(CUfileHandle_t* fh, CUfileDescr_t* descr) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileHandleRegister(fh, descr) + + +@cython.show_performance_hints(False) +cdef void cuFileHandleDeregister(CUfileHandle_t fh) except* nogil: + _cufile._cuFileHandleDeregister(fh) + + +cdef CUfileError_t cuFileBufRegister(const void* bufPtr_base, size_t length, int flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBufRegister(bufPtr_base, length, flags) + + +cdef CUfileError_t cuFileBufDeregister(const void* bufPtr_base) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBufDeregister(bufPtr_base) + + +cdef ssize_t cuFileRead(CUfileHandle_t fh, void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil: + return _cufile._cuFileRead(fh, bufPtr_base, size, file_offset, bufPtr_offset) + + +cdef ssize_t cuFileWrite(CUfileHandle_t fh, const void* bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset) except* nogil: + return _cufile._cuFileWrite(fh, bufPtr_base, size, file_offset, bufPtr_offset) + + +cdef CUfileError_t cuFileDriverOpen() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverOpen() + + +cdef CUfileError_t cuFileDriverClose_v2() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverClose_v2() + + +cdef long cuFileUseCount() except* nogil: + return _cufile._cuFileUseCount() + + +cdef CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t* props) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverGetProperties(props) + + +cdef CUfileError_t cuFileDriverSetPollMode(cpp_bool poll, size_t poll_threshold_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetPollMode(poll, poll_threshold_size) + + +cdef CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxDirectIOSize(max_direct_io_size) + + +cdef CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxCacheSize(max_cache_size) + + +cdef CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileDriverSetMaxPinnedMemSize(max_pinned_size) + + +cdef CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t* batch_idp, unsigned nr) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOSetUp(batch_idp, nr) + + +cdef CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t* iocbp, unsigned int flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOSubmit(batch_idp, nr, iocbp, flags) + + +cdef CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr, CUfileIOEvents_t* iocbp, timespec* timeout) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOGetStatus(batch_idp, min_nr, nr, iocbp, timeout) + + +cdef CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileBatchIOCancel(batch_idp) + + +@cython.show_performance_hints(False) +cdef void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp) except* nogil: + _cufile._cuFileBatchIODestroy(batch_idp) + + +cdef CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_read_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileReadAsync(fh, bufPtr_base, size_p, file_offset_p, bufPtr_offset_p, bytes_read_p, stream) + + +cdef CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void* bufPtr_base, size_t* size_p, off_t* file_offset_p, off_t* bufPtr_offset_p, ssize_t* bytes_written_p, CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileWriteAsync(fh, bufPtr_base, size_p, file_offset_p, bufPtr_offset_p, bytes_written_p, stream) + + +cdef CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStreamRegister(stream, flags) + + +cdef CUfileError_t cuFileStreamDeregister(CUstream stream) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStreamDeregister(stream) + + +cdef CUfileError_t cuFileGetVersion(int* version) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetVersion(version) + + +cdef CUfileError_t cuFileGetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t* value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterSizeT(param, value) + + +cdef CUfileError_t cuFileGetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool* value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterBool(param, value) + + +cdef CUfileError_t cuFileGetParameterString(CUFileStringConfigParameter_t param, char* desc_str, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterString(param, desc_str, len) + + +cdef CUfileError_t cuFileGetParameterMinMaxValue(CUFileSizeTConfigParameter_t param, size_t* min_value, size_t* max_value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterMinMaxValue(param, min_value, max_value) + + +cdef CUfileError_t cuFileSetParameterSizeT(CUFileSizeTConfigParameter_t param, size_t value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterSizeT(param, value) + + +cdef CUfileError_t cuFileSetParameterBool(CUFileBoolConfigParameter_t param, cpp_bool value) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterBool(param, value) + + +cdef CUfileError_t cuFileSetParameterString(CUFileStringConfigParameter_t param, const char* desc_str) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterString(param, desc_str) + + +cdef CUfileError_t cuFileSetStatsLevel(int level) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetStatsLevel(level) + + +cdef CUfileError_t cuFileGetStatsLevel(int* level) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsLevel(level) + + +cdef CUfileError_t cuFileStatsStart() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsStart() + + +cdef CUfileError_t cuFileStatsStop() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsStop() + + +cdef CUfileError_t cuFileStatsReset() except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileStatsReset() + + +cdef CUfileError_t cuFileGetStatsL1(CUfileStatsLevel1_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL1(stats) + + +cdef CUfileError_t cuFileGetStatsL2(CUfileStatsLevel2_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL2(stats) + + +cdef CUfileError_t cuFileGetStatsL3(CUfileStatsLevel3_t* stats) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetStatsL3(stats) + + +cdef CUfileError_t cuFileGetBARSizeInKB(int gpuIndex, size_t* barSize) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetBARSizeInKB(gpuIndex, barSize) + + +cdef CUfileError_t cuFileSetParameterPosixPoolSlabArray(const size_t* size_values, const size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileSetParameterPosixPoolSlabArray(size_values, count_values, len) + + +cdef CUfileError_t cuFileGetParameterPosixPoolSlabArray(size_t* size_values, size_t* count_values, int len) except?CUFILE_LOADING_ERROR nogil: + return _cufile._cuFileGetParameterPosixPoolSlabArray(size_values, count_values, len) From 343c4c731c6783bed0001625678216e6d995d4d0 Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 18:21:09 +0000 Subject: [PATCH 4/6] Remove overriding tests --- cuda_bindings/tests/test_cufile.py | 161 ++++++++++++++++------------- 1 file changed, 91 insertions(+), 70 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index da9b4894d..36386db6b 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -10,9 +10,8 @@ import tempfile from contextlib import suppress from functools import cache - -import cuda.bindings.driver as cuda import pytest +import cuda.bindings.driver as cuda # Configure logging to show INFO level and above logging.basicConfig( @@ -1444,17 +1443,14 @@ def test_batch_io_large_operations(): read_buffers = [] all_buffers = [] # Initialize all_buffers to avoid UnboundLocalError - print("=== CUDA Memory Allocation ===") for i in range(num_operations): err, buf = cuda.cuMemAlloc(buf_size) assert err == cuda.CUresult.CUDA_SUCCESS write_buffers.append(buf) - print(f"Write buffer {i}: {hex(int(buf))} (4K aligned: {int(buf) % 4096 == 0})") err, buf = cuda.cuMemAlloc(buf_size) assert err == cuda.CUresult.CUDA_SUCCESS read_buffers.append(buf) - print(f"Read buffer {i}: {hex(int(buf))} (4K aligned: {int(buf) % 4096 == 0})") # Allocate host memory for data verification host_buf = ctypes.create_string_buffer(buf_size) @@ -1797,8 +1793,8 @@ def test_batch_io_large_operations(): @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) -def test_get_parameter_bool(): - """Test setting and getting boolean parameters with cuFile validation.""" +def test_set_get_parameter_size_t(): + """Test setting and getting size_t parameters with cuFile validation.""" # Initialize CUDA (err,) = cuda.cuInit(0) @@ -1813,67 +1809,92 @@ def test_get_parameter_bool(): assert err == cuda.CUresult.CUDA_SUCCESS try: - # Test setting and getting various boolean parameters - - # Test poll mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE) - assert retrieved_value is True, f"Poll mode mismatch: set True, got {retrieved_value}" - - # Test compatibility mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE) - assert retrieved_value is False, f"Compatibility mode mismatch: set False, got {retrieved_value}" - - # Test force compatibility mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE) - assert retrieved_value is False, f"Force compatibility mode mismatch: set False, got {retrieved_value}" - - # Test aggressive API check - cufile.set_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE) - assert retrieved_value is True, f"Aggressive API check mismatch: set True, got {retrieved_value}" - - # Test parallel IO - cufile.set_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO) - assert retrieved_value is True, f"Parallel IO mismatch: set True, got {retrieved_value}" - - # Test NVTX profiling - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX) - assert retrieved_value is False, f"NVTX profiling mismatch: set False, got {retrieved_value}" - - # Test system memory allowance - cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY) - assert retrieved_value is True, f"System memory allowance mismatch: set True, got {retrieved_value}" - - # Test PCI P2P DMA - cufile.set_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA) - assert retrieved_value is True, f"PCI P2P DMA mismatch: set True, got {retrieved_value}" - - # Test IO uring preference - cufile.set_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING) - assert retrieved_value is False, f"IO uring preference mismatch: set False, got {retrieved_value}" - - # Test force O_DIRECT mode - cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE) - assert retrieved_value is True, f"Force O_DIRECT mode mismatch: set True, got {retrieved_value}" - - # Test topology detection skip - cufile.set_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION) - assert retrieved_value is False, f"Topology detection skip mismatch: set False, got {retrieved_value}" - - # Test stream memops bypass - cufile.set_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True) - retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS) - assert retrieved_value is True, f"Stream memops bypass mismatch: set True, got {retrieved_value}" + # Test setting and getting various size_t parameters + + # Test poll threshold size (in KB) + poll_threshold_kb = 64 # 64KB threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB) + assert retrieved_value == poll_threshold_kb, ( + f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}" + ) + + # Test max direct IO size (in KB) + max_direct_io_kb = 1024 # 1MB max direct IO size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB) + assert retrieved_value == max_direct_io_kb, ( + f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}" + ) + + # Test max device cache size (in KB) + max_cache_kb = 512 # 512KB max cache size + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB) + assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}" + + # Test per buffer cache size (in KB) + per_buffer_cache_kb = 128 # 128KB per buffer cache + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB) + assert retrieved_value == per_buffer_cache_kb, ( + f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}" + ) + + # Test max device pinned memory size (in KB) + max_pinned_kb = 2048 # 2MB max pinned memory + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb) + retrieved_value = cufile.get_parameter_size_t( + cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB + ) + assert retrieved_value == max_pinned_kb, ( + f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}" + ) + + # Test IO batch size + batch_size = 16 # 16 operations per batch + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE) + assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}" + + # Test batch IO timeout (in milliseconds) + timeout_ms = 5000 # 5 second timeout + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS) + assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}" + + # Test execution parameters + max_io_queue_depth = 32 # Max 32 operations in queue + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH) + assert retrieved_value == max_io_queue_depth, ( + f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}" + ) + + max_io_threads = 8 # Max 8 IO threads + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS) + assert retrieved_value == max_io_threads, ( + f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}" + ) + + min_io_threshold_kb = 4 # 4KB minimum IO threshold + cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB) + assert retrieved_value == min_io_threshold_kb, ( + f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}" + ) + + max_request_parallelism = 4 # Max 4 parallel requests + cufile.set_parameter_size_t( + cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism + ) + retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM) + assert retrieved_value == max_request_parallelism, ( + f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}" + ) finally: cuda.cuDevicePrimaryCtxRelease(device) @@ -1882,7 +1903,8 @@ def test_get_parameter_bool(): @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) -def test_get_parameter_string(): + +def test_set_get_parameter_string(): """Test setting and getting string parameters with cuFile validation.""" # Initialize CUDA @@ -1964,7 +1986,6 @@ def test_get_parameter_string(): finally: cuda.cuDevicePrimaryCtxRelease(device) - @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 13.0 or later" ) From d29340a7ec41d21967fdc3d5a410a771141ecdfc Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 18:27:25 +0000 Subject: [PATCH 5/6] Add previously deleted test --- cuda_bindings/tests/test_cufile.py | 81 ++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 36386db6b..19da937f3 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -1904,6 +1904,87 @@ def test_set_get_parameter_size_t(): cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) +def test_set_get_parameter_bool(): + """Test setting and getting boolean parameters with cuFile validation.""" + + # Initialize CUDA + (err,) = cuda.cuInit(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, device = cuda.cuDeviceGet(0) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, ctx = cuda.cuDevicePrimaryCtxRetain(device) + assert err == cuda.CUresult.CUDA_SUCCESS + (err,) = cuda.cuCtxSetCurrent(ctx) + assert err == cuda.CUresult.CUDA_SUCCESS + + try: + # Test setting and getting various boolean parameters + + # Test poll mode + cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE) + assert retrieved_value is True, f"Poll mode mismatch: set True, got {retrieved_value}" + + # Test compatibility mode + cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE) + assert retrieved_value is False, f"Compatibility mode mismatch: set False, got {retrieved_value}" + + # Test force compatibility mode + cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE) + assert retrieved_value is False, f"Force compatibility mode mismatch: set False, got {retrieved_value}" + + # Test aggressive API check + cufile.set_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE) + assert retrieved_value is True, f"Aggressive API check mismatch: set True, got {retrieved_value}" + + # Test parallel IO + cufile.set_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO) + assert retrieved_value is True, f"Parallel IO mismatch: set True, got {retrieved_value}" + + # Test NVTX profiling + cufile.set_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX, False) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX) + assert retrieved_value is False, f"NVTX profiling mismatch: set False, got {retrieved_value}" + + # Test system memory allowance + cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY) + assert retrieved_value is True, f"System memory allowance mismatch: set True, got {retrieved_value}" + + # Test PCI P2P DMA + cufile.set_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA) + assert retrieved_value is True, f"PCI P2P DMA mismatch: set True, got {retrieved_value}" + + # Test IO uring preference + cufile.set_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING, False) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING) + assert retrieved_value is False, f"IO uring preference mismatch: set False, got {retrieved_value}" + + # Test force O_DIRECT mode + cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE) + assert retrieved_value is True, f"Force O_DIRECT mode mismatch: set True, got {retrieved_value}" + + # Test topology detection skip + cufile.set_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION) + assert retrieved_value is False, f"Topology detection skip mismatch: set False, got {retrieved_value}" + + # Test stream memops bypass + cufile.set_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True) + retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS) + assert retrieved_value is True, f"Stream memops bypass mismatch: set True, got {retrieved_value}" + + finally: + cuda.cuDevicePrimaryCtxRelease(device) + def test_set_get_parameter_string(): """Test setting and getting string parameters with cuFile validation.""" From ef73d2e62fbb4b04627c894af9dd432fa9fc0afd Mon Sep 17 00:00:00 2001 From: Chloe Chia Date: Wed, 1 Oct 2025 18:47:18 +0000 Subject: [PATCH 6/6] Simplify test_batch_io_large_operations --- cuda_bindings/tests/test_cufile.py | 259 ++++++----------------------- 1 file changed, 47 insertions(+), 212 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 19da937f3..8148f0552 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -1412,7 +1412,6 @@ def test_batch_io_cancel(): cufile.driver_close() cuda.cuDevicePrimaryCtxRelease(device) - @pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem") def test_batch_io_large_operations(): """Test batch IO with large buffer operations.""" @@ -1458,15 +1457,12 @@ def test_batch_io_large_operations(): try: # Create file with O_DIRECT fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600) + # Register all buffers with cuFile all_buffers = write_buffers + read_buffers - for i, buf in enumerate(all_buffers): + for buf in all_buffers: buf_int = int(buf) - try: - cufile.buf_register(buf_int, buf_size, 0) - except Exception as e: - print(f"*** Buffer {i} registration FAILED: {e} ***") - raise + cufile.buf_register(buf_int, buf_size, 0) # Create file descriptor descr = cufile.Descr() @@ -1474,16 +1470,15 @@ def test_batch_io_large_operations(): descr.handle.fd = fd descr.fs_ops = 0 - # Register file handle handle = cufile.handle_register(descr.ptr) # Set up batch IO - batch_handle = cufile.batch_io_set_up(num_operations * 2) # 2 writes + 2 reads + batch_handle = cufile.batch_io_set_up(num_operations) # Only for writes # Create IOParams array for batch operations - io_params = cufile.IOParams(num_operations * 2) - io_events = cufile.IOEvents(num_operations * 2) + io_params = cufile.IOParams(num_operations) + io_events = cufile.IOEvents(num_operations) # Prepare test data test_strings = [ @@ -1513,33 +1508,10 @@ def test_batch_io_large_operations(): io_params[i].u.batch.dev_ptr_offset = 0 io_params[i].u.batch.size_ = buf_size - # Set up read operations - for i in range(num_operations): - idx = i + num_operations - io_params[idx].mode = cufile.BatchMode.BATCH # Batch mode - io_params[idx].fh = handle - io_params[idx].opcode = cufile.Opcode.READ # Read opcode - io_params[idx].cookie = i + 100 - io_params[idx].u.batch.dev_ptr_base = int(read_buffers[i]) - io_params[idx].u.batch.file_offset = i * buf_size - io_params[idx].u.batch.dev_ptr_offset = 0 - io_params[idx].u.batch.size_ = buf_size - - - - - for i in range(num_operations): - print(f" Op {i}: cookie={io_params[i].cookie}, opcode={io_params[i].opcode}, offset={io_params[i].u.batch.file_offset}") - - for i in range(num_operations): - idx = i + num_operations - print(f" Op {idx}: cookie={io_params[idx].cookie}, opcode={io_params[idx].opcode}, offset={io_params[idx].u.batch.file_offset}") - - - # Submit writes first - cufile.batch_io_submit(batch_handle, num_operations, io_params.ptr, 0) # Only writes + # Submit writes + cufile.batch_io_submit(batch_handle, num_operations, io_params.ptr, 0) - + # Wait for writes to complete nr_completed_writes = ctypes.c_uint(num_operations) timeout = ctypes.c_int(10000) cufile.batch_io_get_status( @@ -1547,24 +1519,24 @@ def test_batch_io_large_operations(): io_events.ptr, ctypes.addressof(timeout) ) - # Verify writes succeeded for i in range(nr_completed_writes.value): - if io_events[i].status != cufile.Status.COMPLETE: - raise RuntimeError(f"Write {i} failed: {io_events[i].status}") - print(f"Write {io_events[i].cookie}: {io_events[i].ret} bytes") + assert io_events[i].status == cufile.Status.COMPLETE, ( + f"Write {i} failed with status {io_events[i].status}" + ) # Force file sync os.fsync(fd) - print("File sync after writes completed") + + # Clean up write batch + cufile.batch_io_destroy(batch_handle) # Now submit reads separately - print("Submitting reads...") read_batch_handle = cufile.batch_io_set_up(num_operations) read_io_params = cufile.IOParams(num_operations) read_io_events = cufile.IOEvents(num_operations) - # Set up read operations in separate array + # Set up read operations for i in range(num_operations): read_io_params[i].mode = cufile.BatchMode.BATCH read_io_params[i].fh = handle @@ -1579,156 +1551,44 @@ def test_batch_io_large_operations(): cufile.batch_io_submit(read_batch_handle, num_operations, read_io_params.ptr, 0) # Wait for reads - nr_completed_reads = ctypes.c_uint(num_operations) + nr_completed = ctypes.c_uint(num_operations) cufile.batch_io_get_status( - read_batch_handle, num_operations, ctypes.addressof(nr_completed_reads), + read_batch_handle, num_operations, ctypes.addressof(nr_completed), read_io_events.ptr, ctypes.addressof(timeout) ) - - # Check read results - for i in range(nr_completed_reads.value): - print(f"Read {read_io_events[i].cookie}: {read_io_events[i].ret} bytes") - - # Use read_io_events for verification instead of io_events - io_events = read_io_events # Replace for rest of test - nr_completed = nr_completed_reads - - # Clean up read batch - cufile.batch_io_destroy(read_batch_handle) - - # Enhanced operation analysis - print("=== Detailed Operation Results ===") - # Check each operation's detailed status - write_ops = [] - read_ops = [] - - for i in range(nr_completed.value): - event = io_events[i] - status_name = "UNKNOWN" - try: - status_name = cufile.Status(event.status).name - except: - pass - - print(f"Operation {i}:") - print(f" Cookie: {event.cookie}") - print(f" Status: {event.status} ({status_name})") - print(f" Result: {event.ret}") - - # Categorize operations by cookie - if event.cookie < 100: # Write operations (cookies 0, 1) - write_ops.append({ - 'index': i, - 'cookie': event.cookie, - 'result': event.ret, - 'status': event.status - }) - print(f" -> WRITE operation: {event.ret} bytes") - else: # Read operations (cookies 100, 101) - read_ops.append({ - 'index': i, - 'cookie': event.cookie, - 'result': event.ret, - 'status': event.status - }) - print(f" -> READ operation: {event.ret} bytes") - - # Check if operation failed - if event.status != cufile.Status.COMPLETE: - print(f" *** OPERATION {i} FAILED ***") - if event.status == cufile.Status.FAILED: - print(f" Error code: {event.ret}") - - print("=== Operation Analysis ===") - print(f"Write operations completed: {len(write_ops)}") - print(f"Read operations completed: {len(read_ops)}") - - # Check if all writes succeeded before analyzing reads - all_writes_success = all(op['result'] > 0 for op in write_ops) - print(f"All writes successful: {all_writes_success}") - - if all_writes_success: - print("Writes completed successfully, reads should now work") - else: - print("Some writes failed - this could explain read failures") - - # Show operation completion order - print("=== Operation Completion Order ===") - for i, event in enumerate([(io_events[j].cookie, io_events[j].ret) for j in range(nr_completed.value)]): - cookie, result = event - op_type = "WRITE" if cookie < 100 else "READ" - print(f"Position {i}: {op_type} (cookie {cookie}) -> {result} bytes") - - # Write completion check - print("=== Write Completion Check ===") - # Check if writes actually completed by reading file size - file_stat = os.fstat(fd) - print(f"File size after batch: {file_stat.st_size}") - - # Try a small direct read to verify data is in file - try: - test_buf_size = 1024 - err, test_buf = cuda.cuMemAlloc(test_buf_size) - cufile.buf_register(int(test_buf), test_buf_size, 0) - - # Try reading first 1KB directly - cufile.read(handle, int(test_buf), test_buf_size, 0, 0) - - # Copy back and check - test_host_buf = ctypes.create_string_buffer(test_buf_size) - cuda.cuMemcpyDtoH(test_host_buf, test_buf, test_buf_size) - test_data = test_host_buf.value - - print(f"Direct read test: {len(test_data)} bytes") - print(f"First 50 bytes: {test_data[:50]!r}") - - # Cleanup test buffer - cufile.buf_deregister(int(test_buf)) - cuda.cuMemFree(test_buf) - - except Exception as e: - print(f"Direct read test failed: {e}") - # Verify all operations completed successfully assert nr_completed.value == num_operations, ( - f"Expected {num_operations} read operations, got {nr_completed.value}" + f"Expected {num_operations} operations, got {nr_completed.value}" ) # Collect all returned cookies returned_cookies = set() for i in range(num_operations): - if io_events[i].status != cufile.Status.COMPLETE: - print(f"*** Operation {i} with cookie {io_events[i].cookie} failed with status {io_events[i].status} ***") - assert io_events[i].status == cufile.Status.COMPLETE, ( - f"Operation {i} failed with status {io_events[i].status}" + assert read_io_events[i].status == cufile.Status.COMPLETE, ( + f"Operation {i} failed with status {read_io_events[i].status}" ) - returned_cookies.add(io_events[i].cookie) + returned_cookies.add(read_io_events[i].cookie) # Verify all expected cookies are present - expected_cookies = set(range(100, 100 + num_operations)) # read cookies 100,101 + expected_cookies = set(range(100, 100 + num_operations)) assert returned_cookies == expected_cookies, ( f"Cookie mismatch. Expected {expected_cookies}, got {returned_cookies}" ) # Verify the read data matches the written data for i in range(num_operations): - # Copy read data back to host cuda.cuMemcpyDtoHAsync(host_buf, read_buffers[i], buf_size, 0) cuda.cuStreamSynchronize(0) read_data = host_buf.value - # Prepare expected data test_string = test_strings[i] test_string_len = len(test_string) repetitions = buf_size // test_string_len expected_data = (test_string * repetitions)[:buf_size] - - - if read_data != expected_data: n = 100 # Show first n bytes raise RuntimeError( @@ -1738,58 +1598,33 @@ def test_batch_io_large_operations(): f"expected {expected_data[:n]!r}" ) - print("=== Test Completed Successfully ===") - - finally: - # Cleanup - try: - if 'all_buffers' in locals(): - for buf in all_buffers: - cufile.buf_deregister(int(buf)) - cuda.cuMemFree(buf) - except Exception as e: - print(f"Cleanup error: {e}") - - try: - if 'handle' in locals(): - cufile.handle_deregister(handle) - except Exception as e: - print(f"Handle deregister error: {e}") - - try: - if 'batch_handle' in locals(): - cufile.batch_io_destroy(batch_handle) - except Exception as e: - print(f"Batch destroy error: {e}") - - try: - if 'read_batch_handle' in locals(): - cufile.batch_io_destroy(read_batch_handle) - except Exception as e: - print(f"Read batch destroy error: {e}") - - try: - if 'fd' in locals(): - os.close(fd) - except Exception as e: - print(f"File close error: {e}") + # Clean up batch IO + cufile.batch_io_destroy(read_batch_handle) - try: - if os.path.exists(file_path): - os.remove(file_path) - except Exception as e: - print(f"File remove error: {e}") + # Deregister file handle + cufile.handle_deregister(handle) - try: - cufile.driver_close() - except Exception as e: - print(f"Driver close error: {e}") + # Deregister buffers + for buf in all_buffers: + buf_int = int(buf) + cufile.buf_deregister(buf_int) + finally: + # Close file + os.close(fd) + # Free CUDA memory + for buf in all_buffers: + cuda.cuMemFree(buf) + # Clean up test file try: - cuda.cuDevicePrimaryCtxRelease(device) - except Exception as e: - print(f"Context release error: {e}") - + os.unlink(file_path) + except OSError as e: + if e.errno != errno.ENOENT: + raise + # Close cuFile driver + cufile.driver_close() + cuda.cuDevicePrimaryCtxRelease(device) + @pytest.mark.skipif( cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later" ) @@ -1984,7 +1819,7 @@ def test_set_get_parameter_bool(): finally: cuda.cuDevicePrimaryCtxRelease(device) - + def test_set_get_parameter_string(): """Test setting and getting string parameters with cuFile validation."""