From f01c309d7b005ea36dc4691b6bad7018fcd6f6b7 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 17 Nov 2025 14:13:37 -0500 Subject: [PATCH 01/15] Test the new AUTO_LOWPP_CLASS generation --- cuda_bindings/cuda/bindings/cufile.pyx | 1587 +++++++++++++--------- cuda_bindings/cuda/bindings/cycufile.pxd | 65 +- 2 files changed, 992 insertions(+), 660 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index a4e5c23994..95d6087f3a 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -8,9 +8,6 @@ cimport cython # NOQA from libc cimport errno from ._internal.utils cimport (get_buffer_pointer, get_nested_resource_ptr, nested_resource) -import numpy as _numpy -from cpython cimport buffer as _buffer -from cpython.memoryview cimport PyMemoryView_FromMemory from enum import IntEnum as _IntEnum cimport cpython @@ -18,6 +15,23 @@ import cython from cuda.bindings.driver import CUresult as pyCUresult +from libc.stdlib cimport calloc, free, malloc +cimport cpython +cimport cpython.buffer +cimport cpython.memoryview +from libc.string cimport memcmp, memcpy +import numpy as _numpy + + +cdef __from_data(data, dtype_name, expected_dtype, lowpp_type_from_ptr): + # _numpy.recarray is a subclass of _numpy.ndarray, so implicitly handled here. + if not isinstance(data, _numpy.ndarray): + raise TypeError("data argument must be a NumPy ndarray") + if data.size != 1: + raise ValueError("data array must have a size of 1") + if data.dtype != expected_dtype: + raise ValueError(f"data array must be of dtype {dtype_name}") + return lowpp_type_from_ptr(data.ctypes.data, not data.flags.writeable, data) ############################################################################### # POD @@ -39,13 +53,22 @@ cdef class _py_anon_pod1: .. seealso:: `_anon_pod1` """ cdef: - readonly object _data + _anon_pod1 *_ptr + object _owner + bint _readonly + dict _refs def __init__(self): - arr = _numpy.empty(1, dtype=_py_anon_pod1_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof((NULL).handle), \ - f"itemsize {self._data.itemsize} mismatches union size {sizeof((NULL).handle)}" + self._ptr = <_anon_pod1 *>calloc(1, sizeof((NULL).handle)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod1") + self._owner = None + self._readonly = False + self._refs = {} + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}._py_anon_pod1 object at {hex(id(self))}>" @@ -53,81 +76,86 @@ cdef class _py_anon_pod1: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef _py_anon_pod1 other_ if not isinstance(other, _py_anon_pod1): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof((NULL).handle)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = <_anon_pod1 *>malloc(sizeof((NULL).handle)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod1") + memcpy(self._ptr, val.ctypes.data, sizeof((NULL).handle)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def fd(self): """int: """ - return int(self._data.fd[0]) + return self._ptr[0].fd @fd.setter def fd(self, val): - self._data.fd = val + if self._readonly: + raise ValueError("This _py_anon_pod1 instance is read-only") + self._ptr[0].fd = val @property def handle(self): """int: """ - return int(self._data.handle[0]) + return (self._ptr[0].handle) @handle.setter def handle(self, val): - self._data.handle = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This _py_anon_pod1 instance is read-only") + self._ptr[0].handle = val @staticmethod def from_data(data): """Create an _py_anon_pod1 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `_py_anon_pod1_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod1_dtype` holding the data. """ - cdef _py_anon_pod1 obj = _py_anon_pod1.__new__(_py_anon_pod1) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != _py_anon_pod1_dtype: - raise ValueError("data array must be of dtype _py_anon_pod1_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "_py_anon_pod1_dtype", _py_anon_pod1_dtype, _py_anon_pod1.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an _py_anon_pod1 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef _py_anon_pod1 obj = _py_anon_pod1.__new__(_py_anon_pod1) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof((NULL).handle), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=_py_anon_pod1_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = <_anon_pod1 *>malloc(sizeof((NULL).handle)) + if obj._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod1") + memcpy((obj._ptr), ptr, sizeof((NULL).handle)) + obj._owner = None + else: + obj._ptr = <_anon_pod1 *>ptr + obj._owner = owner + obj._readonly = readonly + obj._refs = {} return obj @@ -146,13 +174,22 @@ cdef class _py_anon_pod3: .. seealso:: `_anon_pod3` """ cdef: - readonly object _data + _anon_pod3 *_ptr + object _owner + bint _readonly + dict _refs def __init__(self): - arr = _numpy.empty(1, dtype=_py_anon_pod3_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof((NULL).u.batch), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof((NULL).u.batch)}" + self._ptr = <_anon_pod3 *>calloc(1, sizeof((NULL).u.batch)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod3") + self._owner = None + self._readonly = False + self._refs = {} + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}._py_anon_pod3 object at {hex(id(self))}>" @@ -160,99 +197,108 @@ cdef class _py_anon_pod3: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef _py_anon_pod3 other_ if not isinstance(other, _py_anon_pod3): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof((NULL).u.batch)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = <_anon_pod3 *>malloc(sizeof((NULL).u.batch)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod3") + memcpy(self._ptr, val.ctypes.data, sizeof((NULL).u.batch)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def dev_ptr_base(self): """int: """ - return int(self._data.dev_ptr_base[0]) + return (self._ptr[0].devPtr_base) @dev_ptr_base.setter def dev_ptr_base(self, val): - self._data.dev_ptr_base = val + if self._readonly: + raise ValueError("This _py_anon_pod3 instance is read-only") + self._ptr[0].devPtr_base = val @property def file_offset(self): """int: """ - return int(self._data.file_offset[0]) + return self._ptr[0].file_offset @file_offset.setter def file_offset(self, val): - self._data.file_offset = val + if self._readonly: + raise ValueError("This _py_anon_pod3 instance is read-only") + self._ptr[0].file_offset = val @property def dev_ptr_offset(self): """int: """ - return int(self._data.dev_ptr_offset[0]) + return self._ptr[0].devPtr_offset @dev_ptr_offset.setter def dev_ptr_offset(self, val): - self._data.dev_ptr_offset = val + if self._readonly: + raise ValueError("This _py_anon_pod3 instance is read-only") + self._ptr[0].devPtr_offset = val @property def size_(self): """int: """ - return int(self._data.size_[0]) + return self._ptr[0].size @size_.setter def size_(self, val): - self._data.size_ = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This _py_anon_pod3 instance is read-only") + self._ptr[0].size = val @staticmethod def from_data(data): """Create an _py_anon_pod3 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `_py_anon_pod3_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod3_dtype` holding the data. """ - cdef _py_anon_pod3 obj = _py_anon_pod3.__new__(_py_anon_pod3) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != _py_anon_pod3_dtype: - raise ValueError("data array must be of dtype _py_anon_pod3_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "_py_anon_pod3_dtype", _py_anon_pod3_dtype, _py_anon_pod3.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an _py_anon_pod3 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef _py_anon_pod3 obj = _py_anon_pod3.__new__(_py_anon_pod3) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof((NULL).u.batch), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=_py_anon_pod3_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = <_anon_pod3 *>malloc(sizeof((NULL).u.batch)) + if obj._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod3") + memcpy((obj._ptr), ptr, sizeof((NULL).u.batch)) + obj._owner = None + else: + obj._ptr = <_anon_pod3 *>ptr + obj._owner = owner + obj._readonly = readonly + obj._refs = {} return obj @@ -278,11 +324,13 @@ cdef class IOEvents: cdef: readonly object _data + + def __init__(self, size=1): arr = _numpy.empty(size, dtype=io_events_dtype) self._data = arr.view(_numpy.recarray) assert self._data.itemsize == sizeof(CUfileIOEvents_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileIOEvents_t)}" + f"itemsize {self._data.itemsize} mismatches struct size { sizeof(CUfileIOEvents_t) }" def __repr__(self): if self._data.size > 1: @@ -296,7 +344,6 @@ cdef class IOEvents: return self._data.ctypes.data cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" return self._data.ctypes.data def __int__(self): @@ -309,13 +356,10 @@ cdef class IOEvents: return self._data.size def __eq__(self, other): - if not isinstance(other, IOEvents): - return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: + cdef object self_data = self._data + if (not isinstance(other, IOEvents)) or self_data.size != other._data.size or self_data.dtype != other._data.dtype: return False - return bool((self._data == other._data).all()) + return bool((self_data == other._data).all()) @property def cookie(self): @@ -351,13 +395,16 @@ cdef class IOEvents: self._data.ret = val def __getitem__(self, key): + cdef ssize_t key_ + cdef ssize_t size if isinstance(key, int): + key_ = key size = self._data.size - if key >= size or key <= -(size+1): + if key_ >= size or key_ <= -(size+1): raise IndexError("index is out of bounds") - if key < 0: - key += size - return IOEvents.from_data(self._data[key:key+1]) + if key_ < 0: + key_ += size + return IOEvents.from_data(self._data[key_:key_+1]) out = self._data[key] if isinstance(out, _numpy.recarray) and out.dtype == io_events_dtype: return IOEvents.from_data(out) @@ -374,7 +421,7 @@ cdef class IOEvents: data (_numpy.ndarray): a 1D array of dtype `io_events_dtype` holding the data. """ cdef IOEvents obj = IOEvents.__new__(IOEvents) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + if not isinstance(data, _numpy.ndarray): raise TypeError("data argument must be a NumPy ndarray") if data.ndim != 1: raise ValueError("data array must be 1D") @@ -396,11 +443,10 @@ cdef class IOEvents: if ptr == 0: raise ValueError("ptr must not be null (0)") cdef IOEvents obj = IOEvents.__new__(IOEvents) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( + cdef flag = cpython.buffer.PyBUF_READ if readonly else cpython.buffer.PyBUF_WRITE + cdef object buf = cpython.memoryview.PyMemoryView_FromMemory( ptr, sizeof(CUfileIOEvents_t) * size, flag) - data = _numpy.ndarray((size,), buffer=buf, - dtype=io_events_dtype) + data = _numpy.ndarray(size, buffer=buf, dtype=io_events_dtype) obj._data = data.view(_numpy.recarray) return obj @@ -419,13 +465,20 @@ cdef class OpCounter: .. seealso:: `CUfileOpCounter_t` """ cdef: - readonly object _data + CUfileOpCounter_t *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=op_counter_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof(CUfileOpCounter_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileOpCounter_t)}" + self._ptr = calloc(1, sizeof(CUfileOpCounter_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating OpCounter") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}.OpCounter object at {hex(id(self))}>" @@ -433,81 +486,85 @@ cdef class OpCounter: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef OpCounter other_ if not isinstance(other, OpCounter): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CUfileOpCounter_t)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CUfileOpCounter_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating OpCounter") + memcpy(self._ptr, val.ctypes.data, sizeof(CUfileOpCounter_t)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def ok(self): """int: """ - return int(self._data.ok[0]) + return self._ptr[0].ok @ok.setter def ok(self, val): - self._data.ok = val + if self._readonly: + raise ValueError("This OpCounter instance is read-only") + self._ptr[0].ok = val @property def err(self): """int: """ - return int(self._data.err[0]) + return self._ptr[0].err @err.setter def err(self, val): - self._data.err = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This OpCounter instance is read-only") + self._ptr[0].err = val @staticmethod def from_data(data): """Create an OpCounter instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `op_counter_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `op_counter_dtype` holding the data. """ - cdef OpCounter obj = OpCounter.__new__(OpCounter) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != op_counter_dtype: - raise ValueError("data array must be of dtype op_counter_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "op_counter_dtype", op_counter_dtype, OpCounter.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an OpCounter instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef OpCounter obj = OpCounter.__new__(OpCounter) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof(CUfileOpCounter_t), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=op_counter_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = malloc(sizeof(CUfileOpCounter_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating OpCounter") + memcpy((obj._ptr), ptr, sizeof(CUfileOpCounter_t)) + obj._owner = None + else: + obj._ptr = ptr + obj._owner = owner + obj._readonly = readonly return obj @@ -552,13 +609,20 @@ cdef class PerGpuStats: .. seealso:: `CUfilePerGpuStats_t` """ cdef: - readonly object _data + CUfilePerGpuStats_t *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=per_gpu_stats_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof(CUfilePerGpuStats_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfilePerGpuStats_t)}" + self._ptr = calloc(1, sizeof(CUfilePerGpuStats_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating PerGpuStats") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}.PerGpuStats object at {hex(id(self))}>" @@ -566,333 +630,397 @@ cdef class PerGpuStats: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef PerGpuStats other_ if not isinstance(other, PerGpuStats): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CUfilePerGpuStats_t)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CUfilePerGpuStats_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating PerGpuStats") + memcpy(self._ptr, val.ctypes.data, sizeof(CUfilePerGpuStats_t)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def uuid(self): """~_numpy.int8: (array of length 16).""" - return self._data.uuid + return cpython.PyUnicode_FromString(self._ptr[0].uuid) @uuid.setter def uuid(self, val): - self._data.uuid = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + cdef bytes buf = val.encode() + if len(buf) >= 16: + raise ValueError("String too long for field uuid, max length is 15") + cdef char *ptr = buf + memcpy((self._ptr[0].uuid), ptr, 16) @property def read_bytes(self): """int: """ - return int(self._data.read_bytes[0]) + return self._ptr[0].read_bytes @read_bytes.setter def read_bytes(self, val): - self._data.read_bytes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].read_bytes = val @property def read_bw_bytes_per_sec(self): """int: """ - return int(self._data.read_bw_bytes_per_sec[0]) + return self._ptr[0].read_bw_bytes_per_sec @read_bw_bytes_per_sec.setter def read_bw_bytes_per_sec(self, val): - self._data.read_bw_bytes_per_sec = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].read_bw_bytes_per_sec = val @property def read_utilization(self): """int: """ - return int(self._data.read_utilization[0]) + return self._ptr[0].read_utilization @read_utilization.setter def read_utilization(self, val): - self._data.read_utilization = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].read_utilization = val @property def read_duration_us(self): """int: """ - return int(self._data.read_duration_us[0]) + return self._ptr[0].read_duration_us @read_duration_us.setter def read_duration_us(self, val): - self._data.read_duration_us = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].read_duration_us = val @property def n_total_reads(self): """int: """ - return int(self._data.n_total_reads[0]) + return self._ptr[0].n_total_reads @n_total_reads.setter def n_total_reads(self, val): - self._data.n_total_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_total_reads = val @property def n_p2p_reads(self): """int: """ - return int(self._data.n_p2p_reads[0]) + return self._ptr[0].n_p2p_reads @n_p2p_reads.setter def n_p2p_reads(self, val): - self._data.n_p2p_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_p2p_reads = val @property def n_nvfs_reads(self): """int: """ - return int(self._data.n_nvfs_reads[0]) + return self._ptr[0].n_nvfs_reads @n_nvfs_reads.setter def n_nvfs_reads(self, val): - self._data.n_nvfs_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_nvfs_reads = val @property def n_posix_reads(self): """int: """ - return int(self._data.n_posix_reads[0]) + return self._ptr[0].n_posix_reads @n_posix_reads.setter def n_posix_reads(self, val): - self._data.n_posix_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_posix_reads = val @property def n_unaligned_reads(self): """int: """ - return int(self._data.n_unaligned_reads[0]) + return self._ptr[0].n_unaligned_reads @n_unaligned_reads.setter def n_unaligned_reads(self, val): - self._data.n_unaligned_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_unaligned_reads = val @property def n_dr_reads(self): """int: """ - return int(self._data.n_dr_reads[0]) + return self._ptr[0].n_dr_reads @n_dr_reads.setter def n_dr_reads(self, val): - self._data.n_dr_reads = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_dr_reads = val @property def n_sparse_regions(self): """int: """ - return int(self._data.n_sparse_regions[0]) + return self._ptr[0].n_sparse_regions @n_sparse_regions.setter def n_sparse_regions(self, val): - self._data.n_sparse_regions = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_sparse_regions = val @property def n_inline_regions(self): """int: """ - return int(self._data.n_inline_regions[0]) + return self._ptr[0].n_inline_regions @n_inline_regions.setter def n_inline_regions(self, val): - self._data.n_inline_regions = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_inline_regions = val @property def n_reads_err(self): """int: """ - return int(self._data.n_reads_err[0]) + return self._ptr[0].n_reads_err @n_reads_err.setter def n_reads_err(self, val): - self._data.n_reads_err = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_reads_err = val @property def writes_bytes(self): """int: """ - return int(self._data.writes_bytes[0]) + return self._ptr[0].writes_bytes @writes_bytes.setter def writes_bytes(self, val): - self._data.writes_bytes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].writes_bytes = val @property def write_bw_bytes_per_sec(self): """int: """ - return int(self._data.write_bw_bytes_per_sec[0]) + return self._ptr[0].write_bw_bytes_per_sec @write_bw_bytes_per_sec.setter def write_bw_bytes_per_sec(self, val): - self._data.write_bw_bytes_per_sec = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].write_bw_bytes_per_sec = val @property def write_utilization(self): """int: """ - return int(self._data.write_utilization[0]) + return self._ptr[0].write_utilization @write_utilization.setter def write_utilization(self, val): - self._data.write_utilization = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].write_utilization = val @property def write_duration_us(self): """int: """ - return int(self._data.write_duration_us[0]) + return self._ptr[0].write_duration_us @write_duration_us.setter def write_duration_us(self, val): - self._data.write_duration_us = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].write_duration_us = val @property def n_total_writes(self): """int: """ - return int(self._data.n_total_writes[0]) + return self._ptr[0].n_total_writes @n_total_writes.setter def n_total_writes(self, val): - self._data.n_total_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_total_writes = val @property def n_p2p_writes(self): """int: """ - return int(self._data.n_p2p_writes[0]) + return self._ptr[0].n_p2p_writes @n_p2p_writes.setter def n_p2p_writes(self, val): - self._data.n_p2p_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_p2p_writes = val @property def n_nvfs_writes(self): """int: """ - return int(self._data.n_nvfs_writes[0]) + return self._ptr[0].n_nvfs_writes @n_nvfs_writes.setter def n_nvfs_writes(self, val): - self._data.n_nvfs_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_nvfs_writes = val @property def n_posix_writes(self): """int: """ - return int(self._data.n_posix_writes[0]) + return self._ptr[0].n_posix_writes @n_posix_writes.setter def n_posix_writes(self, val): - self._data.n_posix_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_posix_writes = val @property def n_unaligned_writes(self): """int: """ - return int(self._data.n_unaligned_writes[0]) + return self._ptr[0].n_unaligned_writes @n_unaligned_writes.setter def n_unaligned_writes(self, val): - self._data.n_unaligned_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_unaligned_writes = val @property def n_dr_writes(self): """int: """ - return int(self._data.n_dr_writes[0]) + return self._ptr[0].n_dr_writes @n_dr_writes.setter def n_dr_writes(self, val): - self._data.n_dr_writes = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_dr_writes = val @property def n_writes_err(self): """int: """ - return int(self._data.n_writes_err[0]) + return self._ptr[0].n_writes_err @n_writes_err.setter def n_writes_err(self, val): - self._data.n_writes_err = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_writes_err = val @property def n_mmap(self): """int: """ - return int(self._data.n_mmap[0]) + return self._ptr[0].n_mmap @n_mmap.setter def n_mmap(self, val): - self._data.n_mmap = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_mmap = val @property def n_mmap_ok(self): """int: """ - return int(self._data.n_mmap_ok[0]) + return self._ptr[0].n_mmap_ok @n_mmap_ok.setter def n_mmap_ok(self, val): - self._data.n_mmap_ok = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_mmap_ok = val @property def n_mmap_err(self): """int: """ - return int(self._data.n_mmap_err[0]) + return self._ptr[0].n_mmap_err @n_mmap_err.setter def n_mmap_err(self, val): - self._data.n_mmap_err = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_mmap_err = val @property def n_mmap_free(self): """int: """ - return int(self._data.n_mmap_free[0]) + return self._ptr[0].n_mmap_free @n_mmap_free.setter def n_mmap_free(self, val): - self._data.n_mmap_free = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].n_mmap_free = val @property def reg_bytes(self): """int: """ - return int(self._data.reg_bytes[0]) + return self._ptr[0].reg_bytes @reg_bytes.setter def reg_bytes(self, val): - self._data.reg_bytes = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This PerGpuStats instance is read-only") + self._ptr[0].reg_bytes = val @staticmethod def from_data(data): """Create an PerGpuStats instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `per_gpu_stats_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `per_gpu_stats_dtype` holding the data. """ - cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != per_gpu_stats_dtype: - raise ValueError("data array must be of dtype per_gpu_stats_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "per_gpu_stats_dtype", per_gpu_stats_dtype, PerGpuStats.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an PerGpuStats instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof(CUfilePerGpuStats_t), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=per_gpu_stats_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = malloc(sizeof(CUfilePerGpuStats_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating PerGpuStats") + memcpy((obj._ptr), ptr, sizeof(CUfilePerGpuStats_t)) + obj._owner = None + else: + obj._ptr = ptr + obj._owner = owner + obj._readonly = readonly return obj @@ -918,11 +1046,15 @@ cdef class Descr: cdef: readonly object _data + readonly tuple _handle + + + def __init__(self, size=1): arr = _numpy.empty(size, dtype=descr_dtype) self._data = arr.view(_numpy.recarray) assert self._data.itemsize == sizeof(CUfileDescr_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileDescr_t)}" + f"itemsize {self._data.itemsize} mismatches struct size { sizeof(CUfileDescr_t) }" def __repr__(self): if self._data.size > 1: @@ -936,7 +1068,6 @@ cdef class Descr: return self._data.ctypes.data cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" return self._data.ctypes.data def __int__(self): @@ -949,13 +1080,17 @@ cdef class Descr: return self._data.size def __eq__(self, other): - if not isinstance(other, Descr): - return False - if self._data.size != other._data.size: + cdef object self_data = self._data + if (not isinstance(other, Descr)) or self_data.size != other._data.size or self_data.dtype != other._data.dtype: return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + return bool((self_data == other._data).all()) + + @property + def handle(self): + """_py_anon_pod1: """ + if self._data.size == 1: + return self._handle[0] + return self._handle @property def type(self): @@ -968,15 +1103,6 @@ cdef class Descr: def type(self, val): self._data.type = val - @property - def handle(self): - """_py_anon_pod1_dtype: """ - return self._data.handle - - @handle.setter - def handle(self, val): - self._data.handle = val - @property def fs_ops(self): """Union[~_numpy.intp, int]: """ @@ -989,13 +1115,16 @@ cdef class Descr: self._data.fs_ops = val def __getitem__(self, key): + cdef ssize_t key_ + cdef ssize_t size if isinstance(key, int): + key_ = key size = self._data.size - if key >= size or key <= -(size+1): + if key_ >= size or key_ <= -(size+1): raise IndexError("index is out of bounds") - if key < 0: - key += size - return Descr.from_data(self._data[key:key+1]) + if key_ < 0: + key_ += size + return Descr.from_data(self._data[key_:key_+1]) out = self._data[key] if isinstance(out, _numpy.recarray) and out.dtype == descr_dtype: return Descr.from_data(out) @@ -1012,7 +1141,7 @@ cdef class Descr: data (_numpy.ndarray): a 1D array of dtype `descr_dtype` holding the data. """ cdef Descr obj = Descr.__new__(Descr) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + if not isinstance(data, _numpy.ndarray): raise TypeError("data argument must be a NumPy ndarray") if data.ndim != 1: raise ValueError("data array must be 1D") @@ -1020,6 +1149,13 @@ cdef class Descr: raise ValueError("data array must be of dtype descr_dtype") obj._data = data.view(_numpy.recarray) + handle_list = list() + for i in range(obj._data.size): + addr = obj._data.handle[i].__array_interface__['data'][0] + _py_anon_pod1_obj = _py_anon_pod1.from_ptr(addr, owner=obj) + handle_list.append(_py_anon_pod1_obj) + + obj._handle = tuple(handle_list) return obj @staticmethod @@ -1034,13 +1170,19 @@ cdef class Descr: if ptr == 0: raise ValueError("ptr must not be null (0)") cdef Descr obj = Descr.__new__(Descr) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( + cdef flag = cpython.buffer.PyBUF_READ if readonly else cpython.buffer.PyBUF_WRITE + cdef object buf = cpython.memoryview.PyMemoryView_FromMemory( ptr, sizeof(CUfileDescr_t) * size, flag) - data = _numpy.ndarray((size,), buffer=buf, - dtype=descr_dtype) + data = _numpy.ndarray(size, buffer=buf, dtype=descr_dtype) obj._data = data.view(_numpy.recarray) + handle_list = list() + for i in range(obj._data.size): + addr = obj._data.handle[i].__array_interface__['data'][0] + _py_anon_pod1_obj = _py_anon_pod1.from_ptr(addr, owner=obj) + handle_list.append(_py_anon_pod1_obj) + + obj._handle = tuple(handle_list) return obj @@ -1059,15 +1201,20 @@ cdef class _py_anon_pod2: .. seealso:: `_anon_pod2` """ cdef: - readonly object _data - - readonly object _batch + _anon_pod2 *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=_py_anon_pod2_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof((NULL).u), \ - f"itemsize {self._data.itemsize} mismatches union size {sizeof((NULL).u)}" + self._ptr = <_anon_pod2 *>calloc(1, sizeof((NULL).u)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod2") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}._py_anon_pod2 object at {hex(id(self))}>" @@ -1075,72 +1222,75 @@ cdef class _py_anon_pod2: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef _py_anon_pod2 other_ if not isinstance(other, _py_anon_pod2): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof((NULL).u)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = <_anon_pod2 *>malloc(sizeof((NULL).u)) + if self._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod2") + memcpy(self._ptr, val.ctypes.data, sizeof((NULL).u)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def batch(self): """_py_anon_pod3: """ - return self._batch + return _py_anon_pod3.from_ptr(&(self._ptr[0].batch), self._readonly, self) - def __setitem__(self, key, val): - self._data[key] = val + @batch.setter + def batch(self, val): + if self._readonly: + raise ValueError("This _py_anon_pod2 instance is read-only") + cdef _py_anon_pod3 val_ = val + memcpy(&(self._ptr[0].batch), (val_._ptr), sizeof(_anon_pod3)) @staticmethod def from_data(data): """Create an _py_anon_pod2 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `_py_anon_pod2_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod2_dtype` holding the data. """ - cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != _py_anon_pod2_dtype: - raise ValueError("data array must be of dtype _py_anon_pod2_dtype") - obj._data = data.view(_numpy.recarray) - - batch_addr = obj._data.batch[0].__array_interface__['data'][0] - obj._batch = _py_anon_pod3.from_ptr(batch_addr) - return obj + return __from_data(data, "_py_anon_pod2_dtype", _py_anon_pod2_dtype, _py_anon_pod2.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an _py_anon_pod2 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef _py_anon_pod2 obj = _py_anon_pod2.__new__(_py_anon_pod2) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof((NULL).u), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=_py_anon_pod2_dtype) - obj._data = data.view(_numpy.recarray) - - batch_addr = obj._data.batch[0].__array_interface__['data'][0] - obj._batch = _py_anon_pod3.from_ptr(batch_addr) + if owner is None: + obj._ptr = <_anon_pod2 *>malloc(sizeof((NULL).u)) + if obj._ptr == NULL: + raise MemoryError("Error allocating _py_anon_pod2") + memcpy((obj._ptr), ptr, sizeof((NULL).u)) + obj._owner = None + else: + obj._ptr = <_anon_pod2 *>ptr + obj._owner = owner + obj._readonly = readonly return obj @@ -1198,13 +1348,20 @@ cdef class StatsLevel1: .. seealso:: `CUfileStatsLevel1_t` """ cdef: - readonly object _data + CUfileStatsLevel1_t *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=stats_level1_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof(CUfileStatsLevel1_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel1_t)}" + self._ptr = calloc(1, sizeof(CUfileStatsLevel1_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel1") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}.StatsLevel1 object at {hex(id(self))}>" @@ -1212,450 +1369,557 @@ cdef class StatsLevel1: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef StatsLevel1 other_ if not isinstance(other, StatsLevel1): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CUfileStatsLevel1_t)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CUfileStatsLevel1_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel1") + memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel1_t)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def read_ops(self): - """: """ - return self._data.read_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].read_ops), self._readonly, self) @read_ops.setter def read_ops(self, val): - self._data.read_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].read_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def write_ops(self): - """: """ - return self._data.write_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].write_ops), self._readonly, self) @write_ops.setter def write_ops(self, val): - self._data.write_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].write_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def hdl_register_ops(self): - """: """ - return self._data.hdl_register_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].hdl_register_ops), self._readonly, self) @hdl_register_ops.setter def hdl_register_ops(self, val): - self._data.hdl_register_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].hdl_register_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def hdl_deregister_ops(self): - """: """ - return self._data.hdl_deregister_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].hdl_deregister_ops), self._readonly, self) @hdl_deregister_ops.setter def hdl_deregister_ops(self, val): - self._data.hdl_deregister_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].hdl_deregister_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def buf_register_ops(self): - """: """ - return self._data.buf_register_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].buf_register_ops), self._readonly, self) @buf_register_ops.setter def buf_register_ops(self, val): - self._data.buf_register_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].buf_register_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def buf_deregister_ops(self): - """: """ - return self._data.buf_deregister_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].buf_deregister_ops), self._readonly, self) @buf_deregister_ops.setter def buf_deregister_ops(self, val): - self._data.buf_deregister_ops = val - - @property - def read_bytes(self): - """int: """ - return int(self._data.read_bytes[0]) - - @read_bytes.setter - def read_bytes(self, val): - self._data.read_bytes = val - - @property - def write_bytes(self): - """int: """ - return int(self._data.write_bytes[0]) - - @write_bytes.setter - def write_bytes(self, val): - self._data.write_bytes = val - - @property - def read_bw_bytes_per_sec(self): - """int: """ - return int(self._data.read_bw_bytes_per_sec[0]) - - @read_bw_bytes_per_sec.setter - def read_bw_bytes_per_sec(self, val): - self._data.read_bw_bytes_per_sec = val - - @property - def write_bw_bytes_per_sec(self): - """int: """ - return int(self._data.write_bw_bytes_per_sec[0]) - - @write_bw_bytes_per_sec.setter - def write_bw_bytes_per_sec(self, val): - self._data.write_bw_bytes_per_sec = val - - @property - def read_lat_avg_us(self): - """int: """ - return int(self._data.read_lat_avg_us[0]) - - @read_lat_avg_us.setter - def read_lat_avg_us(self, val): - self._data.read_lat_avg_us = val - - @property - def write_lat_avg_us(self): - """int: """ - return int(self._data.write_lat_avg_us[0]) - - @write_lat_avg_us.setter - def write_lat_avg_us(self, val): - self._data.write_lat_avg_us = val - - @property - def read_ops_per_sec(self): - """int: """ - return int(self._data.read_ops_per_sec[0]) - - @read_ops_per_sec.setter - def read_ops_per_sec(self, val): - self._data.read_ops_per_sec = val - - @property - def write_ops_per_sec(self): - """int: """ - return int(self._data.write_ops_per_sec[0]) - - @write_ops_per_sec.setter - def write_ops_per_sec(self, val): - self._data.write_ops_per_sec = val - - @property - def read_lat_sum_us(self): - """int: """ - return int(self._data.read_lat_sum_us[0]) - - @read_lat_sum_us.setter - def read_lat_sum_us(self, val): - self._data.read_lat_sum_us = val - - @property - def write_lat_sum_us(self): - """int: """ - return int(self._data.write_lat_sum_us[0]) - - @write_lat_sum_us.setter - def write_lat_sum_us(self, val): - self._data.write_lat_sum_us = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].buf_deregister_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_submit_ops(self): - """: """ - return self._data.batch_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_submit_ops), self._readonly, self) @batch_submit_ops.setter def batch_submit_ops(self, val): - self._data.batch_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_complete_ops(self): - """: """ - return self._data.batch_complete_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_complete_ops), self._readonly, self) @batch_complete_ops.setter def batch_complete_ops(self, val): - self._data.batch_complete_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_complete_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_setup_ops(self): - """: """ - return self._data.batch_setup_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_setup_ops), self._readonly, self) @batch_setup_ops.setter def batch_setup_ops(self, val): - self._data.batch_setup_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_setup_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_cancel_ops(self): - """: """ - return self._data.batch_cancel_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_cancel_ops), self._readonly, self) @batch_cancel_ops.setter def batch_cancel_ops(self, val): - self._data.batch_cancel_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_cancel_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_destroy_ops(self): - """: """ - return self._data.batch_destroy_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_destroy_ops), self._readonly, self) @batch_destroy_ops.setter def batch_destroy_ops(self, val): - self._data.batch_destroy_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_destroy_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_enqueued_ops(self): - """: """ - return self._data.batch_enqueued_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_enqueued_ops), self._readonly, self) @batch_enqueued_ops.setter def batch_enqueued_ops(self, val): - self._data.batch_enqueued_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_enqueued_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_posix_enqueued_ops(self): - """: """ - return self._data.batch_posix_enqueued_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_posix_enqueued_ops), self._readonly, self) @batch_posix_enqueued_ops.setter def batch_posix_enqueued_ops(self, val): - self._data.batch_posix_enqueued_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_posix_enqueued_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_processed_ops(self): - """: """ - return self._data.batch_processed_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_processed_ops), self._readonly, self) @batch_processed_ops.setter def batch_processed_ops(self, val): - self._data.batch_processed_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_processed_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_posix_processed_ops(self): - """: """ - return self._data.batch_posix_processed_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_posix_processed_ops), self._readonly, self) @batch_posix_processed_ops.setter def batch_posix_processed_ops(self, val): - self._data.batch_posix_processed_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_posix_processed_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_nvfs_submit_ops(self): - """: """ - return self._data.batch_nvfs_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_nvfs_submit_ops), self._readonly, self) @batch_nvfs_submit_ops.setter def batch_nvfs_submit_ops(self, val): - self._data.batch_nvfs_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_nvfs_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_p2p_submit_ops(self): - """: """ - return self._data.batch_p2p_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_p2p_submit_ops), self._readonly, self) @batch_p2p_submit_ops.setter def batch_p2p_submit_ops(self, val): - self._data.batch_p2p_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_p2p_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_aio_submit_ops(self): - """: """ - return self._data.batch_aio_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_aio_submit_ops), self._readonly, self) @batch_aio_submit_ops.setter def batch_aio_submit_ops(self, val): - self._data.batch_aio_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_aio_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_iouring_submit_ops(self): - """: """ - return self._data.batch_iouring_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_iouring_submit_ops), self._readonly, self) @batch_iouring_submit_ops.setter def batch_iouring_submit_ops(self, val): - self._data.batch_iouring_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_iouring_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_mixed_io_submit_ops(self): - """: """ - return self._data.batch_mixed_io_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_mixed_io_submit_ops), self._readonly, self) @batch_mixed_io_submit_ops.setter def batch_mixed_io_submit_ops(self, val): - self._data.batch_mixed_io_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_mixed_io_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) @property def batch_total_submit_ops(self): - """: """ - return self._data.batch_total_submit_ops + """OpCounter: """ + return OpCounter.from_ptr(&(self._ptr[0].batch_total_submit_ops), self._readonly, self) @batch_total_submit_ops.setter def batch_total_submit_ops(self, val): - self._data.batch_total_submit_ops = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + cdef OpCounter val_ = val + memcpy(&(self._ptr[0].batch_total_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + + @property + def read_bytes(self): + """int: """ + return self._ptr[0].read_bytes + + @read_bytes.setter + def read_bytes(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].read_bytes = val + + @property + def write_bytes(self): + """int: """ + return self._ptr[0].write_bytes + + @write_bytes.setter + def write_bytes(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].write_bytes = val + + @property + def read_bw_bytes_per_sec(self): + """int: """ + return self._ptr[0].read_bw_bytes_per_sec + + @read_bw_bytes_per_sec.setter + def read_bw_bytes_per_sec(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].read_bw_bytes_per_sec = val + + @property + def write_bw_bytes_per_sec(self): + """int: """ + return self._ptr[0].write_bw_bytes_per_sec + + @write_bw_bytes_per_sec.setter + def write_bw_bytes_per_sec(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].write_bw_bytes_per_sec = val + + @property + def read_lat_avg_us(self): + """int: """ + return self._ptr[0].read_lat_avg_us + + @read_lat_avg_us.setter + def read_lat_avg_us(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].read_lat_avg_us = val + + @property + def write_lat_avg_us(self): + """int: """ + return self._ptr[0].write_lat_avg_us + + @write_lat_avg_us.setter + def write_lat_avg_us(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].write_lat_avg_us = val + + @property + def read_ops_per_sec(self): + """int: """ + return self._ptr[0].read_ops_per_sec + + @read_ops_per_sec.setter + def read_ops_per_sec(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].read_ops_per_sec = val + + @property + def write_ops_per_sec(self): + """int: """ + return self._ptr[0].write_ops_per_sec + + @write_ops_per_sec.setter + def write_ops_per_sec(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].write_ops_per_sec = val + + @property + def read_lat_sum_us(self): + """int: """ + return self._ptr[0].read_lat_sum_us + + @read_lat_sum_us.setter + def read_lat_sum_us(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].read_lat_sum_us = val + + @property + def write_lat_sum_us(self): + """int: """ + return self._ptr[0].write_lat_sum_us + + @write_lat_sum_us.setter + def write_lat_sum_us(self, val): + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].write_lat_sum_us = val @property def batch_read_bytes(self): """int: """ - return int(self._data.batch_read_bytes[0]) + return self._ptr[0].batch_read_bytes @batch_read_bytes.setter def batch_read_bytes(self, val): - self._data.batch_read_bytes = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_read_bytes = val @property def batch_write_bytes(self): """int: """ - return int(self._data.batch_write_bytes[0]) + return self._ptr[0].batch_write_bytes @batch_write_bytes.setter def batch_write_bytes(self, val): - self._data.batch_write_bytes = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_write_bytes = val @property def batch_read_bw_bytes(self): """int: """ - return int(self._data.batch_read_bw_bytes[0]) + return self._ptr[0].batch_read_bw_bytes @batch_read_bw_bytes.setter def batch_read_bw_bytes(self, val): - self._data.batch_read_bw_bytes = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_read_bw_bytes = val @property def batch_write_bw_bytes(self): """int: """ - return int(self._data.batch_write_bw_bytes[0]) + return self._ptr[0].batch_write_bw_bytes @batch_write_bw_bytes.setter def batch_write_bw_bytes(self, val): - self._data.batch_write_bw_bytes = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_write_bw_bytes = val @property def batch_submit_lat_avg_us(self): """int: """ - return int(self._data.batch_submit_lat_avg_us[0]) + return self._ptr[0].batch_submit_lat_avg_us @batch_submit_lat_avg_us.setter def batch_submit_lat_avg_us(self, val): - self._data.batch_submit_lat_avg_us = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_submit_lat_avg_us = val @property def batch_completion_lat_avg_us(self): """int: """ - return int(self._data.batch_completion_lat_avg_us[0]) + return self._ptr[0].batch_completion_lat_avg_us @batch_completion_lat_avg_us.setter def batch_completion_lat_avg_us(self, val): - self._data.batch_completion_lat_avg_us = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_completion_lat_avg_us = val @property def batch_submit_ops_per_sec(self): """int: """ - return int(self._data.batch_submit_ops_per_sec[0]) + return self._ptr[0].batch_submit_ops_per_sec @batch_submit_ops_per_sec.setter def batch_submit_ops_per_sec(self, val): - self._data.batch_submit_ops_per_sec = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_submit_ops_per_sec = val @property def batch_complete_ops_per_sec(self): """int: """ - return int(self._data.batch_complete_ops_per_sec[0]) + return self._ptr[0].batch_complete_ops_per_sec @batch_complete_ops_per_sec.setter def batch_complete_ops_per_sec(self, val): - self._data.batch_complete_ops_per_sec = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_complete_ops_per_sec = val @property def batch_submit_lat_sum_us(self): """int: """ - return int(self._data.batch_submit_lat_sum_us[0]) + return self._ptr[0].batch_submit_lat_sum_us @batch_submit_lat_sum_us.setter def batch_submit_lat_sum_us(self, val): - self._data.batch_submit_lat_sum_us = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_submit_lat_sum_us = val @property def batch_completion_lat_sum_us(self): """int: """ - return int(self._data.batch_completion_lat_sum_us[0]) + return self._ptr[0].batch_completion_lat_sum_us @batch_completion_lat_sum_us.setter def batch_completion_lat_sum_us(self, val): - self._data.batch_completion_lat_sum_us = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].batch_completion_lat_sum_us = val @property def last_batch_read_bytes(self): """int: """ - return int(self._data.last_batch_read_bytes[0]) + return self._ptr[0].last_batch_read_bytes @last_batch_read_bytes.setter def last_batch_read_bytes(self, val): - self._data.last_batch_read_bytes = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].last_batch_read_bytes = val @property def last_batch_write_bytes(self): """int: """ - return int(self._data.last_batch_write_bytes[0]) + return self._ptr[0].last_batch_write_bytes @last_batch_write_bytes.setter def last_batch_write_bytes(self, val): - self._data.last_batch_write_bytes = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This StatsLevel1 instance is read-only") + self._ptr[0].last_batch_write_bytes = val @staticmethod def from_data(data): """Create an StatsLevel1 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `stats_level1_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `stats_level1_dtype` holding the data. """ - cdef StatsLevel1 obj = StatsLevel1.__new__(StatsLevel1) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != stats_level1_dtype: - raise ValueError("data array must be of dtype stats_level1_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "stats_level1_dtype", stats_level1_dtype, StatsLevel1.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an StatsLevel1 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef StatsLevel1 obj = StatsLevel1.__new__(StatsLevel1) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof(CUfileStatsLevel1_t), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=stats_level1_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = malloc(sizeof(CUfileStatsLevel1_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating StatsLevel1") + memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel1_t)) + obj._owner = None + else: + obj._ptr = ptr + obj._owner = owner + obj._readonly = readonly return obj @@ -1683,11 +1947,15 @@ cdef class IOParams: cdef: readonly object _data + readonly tuple _u + + + def __init__(self, size=1): arr = _numpy.empty(size, dtype=io_params_dtype) self._data = arr.view(_numpy.recarray) assert self._data.itemsize == sizeof(CUfileIOParams_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileIOParams_t)}" + f"itemsize {self._data.itemsize} mismatches struct size { sizeof(CUfileIOParams_t) }" def __repr__(self): if self._data.size > 1: @@ -1701,7 +1969,6 @@ cdef class IOParams: return self._data.ctypes.data cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" return self._data.ctypes.data def __int__(self): @@ -1714,13 +1981,17 @@ cdef class IOParams: return self._data.size def __eq__(self, other): - if not isinstance(other, IOParams): + cdef object self_data = self._data + if (not isinstance(other, IOParams)) or self_data.size != other._data.size or self_data.dtype != other._data.dtype: return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + return bool((self_data == other._data).all()) + + @property + def u(self): + """_py_anon_pod2: """ + if self._data.size == 1: + return self._u[0] + return self._u @property def mode(self): @@ -1733,15 +2004,6 @@ cdef class IOParams: def mode(self, val): self._data.mode = val - @property - def u(self): - """_py_anon_pod2_dtype: """ - return self._data.u - - @u.setter - def u(self, val): - self._data.u = val - @property def fh(self): """Union[~_numpy.intp, int]: """ @@ -1776,13 +2038,16 @@ cdef class IOParams: self._data.cookie = val def __getitem__(self, key): + cdef ssize_t key_ + cdef ssize_t size if isinstance(key, int): + key_ = key size = self._data.size - if key >= size or key <= -(size+1): + if key_ >= size or key_ <= -(size+1): raise IndexError("index is out of bounds") - if key < 0: - key += size - return IOParams.from_data(self._data[key:key+1]) + if key_ < 0: + key_ += size + return IOParams.from_data(self._data[key_:key_+1]) out = self._data[key] if isinstance(out, _numpy.recarray) and out.dtype == io_params_dtype: return IOParams.from_data(out) @@ -1799,7 +2064,7 @@ cdef class IOParams: data (_numpy.ndarray): a 1D array of dtype `io_params_dtype` holding the data. """ cdef IOParams obj = IOParams.__new__(IOParams) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): + if not isinstance(data, _numpy.ndarray): raise TypeError("data argument must be a NumPy ndarray") if data.ndim != 1: raise ValueError("data array must be 1D") @@ -1807,6 +2072,13 @@ cdef class IOParams: raise ValueError("data array must be of dtype io_params_dtype") obj._data = data.view(_numpy.recarray) + u_list = list() + for i in range(obj._data.size): + addr = obj._data.u[i].__array_interface__['data'][0] + _py_anon_pod2_obj = _py_anon_pod2.from_ptr(addr, owner=obj) + u_list.append(_py_anon_pod2_obj) + + obj._u = tuple(u_list) return obj @staticmethod @@ -1821,13 +2093,19 @@ cdef class IOParams: if ptr == 0: raise ValueError("ptr must not be null (0)") cdef IOParams obj = IOParams.__new__(IOParams) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( + cdef flag = cpython.buffer.PyBUF_READ if readonly else cpython.buffer.PyBUF_WRITE + cdef object buf = cpython.memoryview.PyMemoryView_FromMemory( ptr, sizeof(CUfileIOParams_t) * size, flag) - data = _numpy.ndarray((size,), buffer=buf, - dtype=io_params_dtype) + data = _numpy.ndarray(size, buffer=buf, dtype=io_params_dtype) obj._data = data.view(_numpy.recarray) + u_list = list() + for i in range(obj._data.size): + addr = obj._data.u[i].__array_interface__['data'][0] + _py_anon_pod2_obj = _py_anon_pod2.from_ptr(addr, owner=obj) + u_list.append(_py_anon_pod2_obj) + + obj._u = tuple(u_list) return obj @@ -1845,13 +2123,20 @@ cdef class StatsLevel2: .. seealso:: `CUfileStatsLevel2_t` """ cdef: - readonly object _data + CUfileStatsLevel2_t *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=stats_level2_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof(CUfileStatsLevel2_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel2_t)}" + self._ptr = calloc(1, sizeof(CUfileStatsLevel2_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel2") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}.StatsLevel2 object at {hex(id(self))}>" @@ -1859,90 +2144,97 @@ cdef class StatsLevel2: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef StatsLevel2 other_ if not isinstance(other, StatsLevel2): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CUfileStatsLevel2_t)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CUfileStatsLevel2_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel2") + memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel2_t)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def basic(self): - """: """ - return self._data.basic + """StatsLevel1: """ + return StatsLevel1.from_ptr(&(self._ptr[0].basic), self._readonly, self) @basic.setter def basic(self, val): - self._data.basic = val + if self._readonly: + raise ValueError("This StatsLevel2 instance is read-only") + cdef StatsLevel1 val_ = val + memcpy(&(self._ptr[0].basic), (val_._ptr), sizeof(CUfileStatsLevel1_t)) @property def read_size_kb_hist(self): """~_numpy.uint64: (array of length 32).""" - return self._data.read_size_kb_hist + return self._ptr[0].read_size_kb_hist @read_size_kb_hist.setter def read_size_kb_hist(self, val): - self._data.read_size_kb_hist = val + if self._readonly: + raise ValueError("This StatsLevel2 instance is read-only") + self._ptr[0].read_size_kb_hist = val @property def write_size_kb_hist(self): """~_numpy.uint64: (array of length 32).""" - return self._data.write_size_kb_hist + return self._ptr[0].write_size_kb_hist @write_size_kb_hist.setter def write_size_kb_hist(self, val): - self._data.write_size_kb_hist = val - - def __setitem__(self, key, val): - self._data[key] = val + if self._readonly: + raise ValueError("This StatsLevel2 instance is read-only") + self._ptr[0].write_size_kb_hist = val @staticmethod def from_data(data): """Create an StatsLevel2 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `stats_level2_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `stats_level2_dtype` holding the data. """ - cdef StatsLevel2 obj = StatsLevel2.__new__(StatsLevel2) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != stats_level2_dtype: - raise ValueError("data array must be of dtype stats_level2_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "stats_level2_dtype", stats_level2_dtype, StatsLevel2.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an StatsLevel2 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef StatsLevel2 obj = StatsLevel2.__new__(StatsLevel2) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof(CUfileStatsLevel2_t), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=stats_level2_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = malloc(sizeof(CUfileStatsLevel2_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating StatsLevel2") + memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel2_t)) + obj._owner = None + else: + obj._ptr = ptr + obj._owner = owner + obj._readonly = readonly return obj @@ -1960,13 +2252,20 @@ cdef class StatsLevel3: .. seealso:: `CUfileStatsLevel3_t` """ cdef: - readonly object _data + CUfileStatsLevel3_t *_ptr + object _owner + bint _readonly def __init__(self): - arr = _numpy.empty(1, dtype=stats_level3_dtype) - self._data = arr.view(_numpy.recarray) - assert self._data.itemsize == sizeof(CUfileStatsLevel3_t), \ - f"itemsize {self._data.itemsize} mismatches struct size {sizeof(CUfileStatsLevel3_t)}" + self._ptr = calloc(1, sizeof(CUfileStatsLevel3_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel3") + self._owner = None + self._readonly = False + + def __dealloc__(self): + if self._owner is None: + free(self._ptr) def __repr__(self): return f"<{__name__}.StatsLevel3 object at {hex(id(self))}>" @@ -1974,90 +2273,98 @@ cdef class StatsLevel3: @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) cdef intptr_t _get_ptr(self): - """Get the pointer address to the data as Python :class:`int`.""" - return self._data.ctypes.data + return (self._ptr) def __int__(self): - return self._data.ctypes.data + return (self._ptr) def __eq__(self, other): + cdef StatsLevel3 other_ if not isinstance(other, StatsLevel3): return False - if self._data.size != other._data.size: - return False - if self._data.dtype != other._data.dtype: - return False - return bool((self._data == other._data).all()) + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CUfileStatsLevel3_t)) == 0) + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CUfileStatsLevel3_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating StatsLevel3") + memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel3_t)) + self._owner = None + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) @property def detailed(self): - """: """ - return self._data.detailed + """StatsLevel2: """ + return StatsLevel2.from_ptr(&(self._ptr[0].detailed), self._readonly, self) @detailed.setter def detailed(self, val): - self._data.detailed = val - - @property - def num_gpus(self): - """int: """ - return int(self._data.num_gpus[0]) - - @num_gpus.setter - def num_gpus(self, val): - self._data.num_gpus = val + if self._readonly: + raise ValueError("This StatsLevel3 instance is read-only") + cdef StatsLevel2 val_ = val + memcpy(&(self._ptr[0].detailed), (val_._ptr), sizeof(CUfileStatsLevel2_t)) @property def per_gpu_stats(self): - """per_gpu_stats_dtype: (array of length 16).""" - return self._data.per_gpu_stats + """PerGpuStats: """ + return PerGpuStats.from_ptr(&(self._ptr[0].per_gpu_stats), self._readonly, self) @per_gpu_stats.setter def per_gpu_stats(self, val): - self._data.per_gpu_stats = val + if self._readonly: + raise ValueError("This StatsLevel3 instance is read-only") + cdef PerGpuStats val_ = val + memcpy(&(self._ptr[0].per_gpu_stats), (val_._ptr), sizeof(CUfilePerGpuStats_t)) - def __setitem__(self, key, val): - self._data[key] = val + @property + def num_gpus(self): + """int: """ + return self._ptr[0].num_gpus + + @num_gpus.setter + def num_gpus(self, val): + if self._readonly: + raise ValueError("This StatsLevel3 instance is read-only") + self._ptr[0].num_gpus = val @staticmethod def from_data(data): """Create an StatsLevel3 instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a 1D array of dtype `stats_level3_dtype` holding the data. + data (_numpy.ndarray): a single-element array of dtype `stats_level3_dtype` holding the data. """ - cdef StatsLevel3 obj = StatsLevel3.__new__(StatsLevel3) - if not isinstance(data, (_numpy.ndarray, _numpy.recarray)): - raise TypeError("data argument must be a NumPy ndarray") - if data.ndim != 1: - raise ValueError("data array must be 1D") - if data.dtype != stats_level3_dtype: - raise ValueError("data array must be of dtype stats_level3_dtype") - obj._data = data.view(_numpy.recarray) - - return obj + return __from_data(data, "stats_level3_dtype", stats_level3_dtype, StatsLevel3.from_ptr) @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False): + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): """Create an StatsLevel3 instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef StatsLevel3 obj = StatsLevel3.__new__(StatsLevel3) - cdef flag = _buffer.PyBUF_READ if readonly else _buffer.PyBUF_WRITE - cdef object buf = PyMemoryView_FromMemory( - ptr, sizeof(CUfileStatsLevel3_t), flag) - data = _numpy.ndarray((1,), buffer=buf, - dtype=stats_level3_dtype) - obj._data = data.view(_numpy.recarray) - + if owner is None: + obj._ptr = malloc(sizeof(CUfileStatsLevel3_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating StatsLevel3") + memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel3_t)) + obj._owner = None + else: + obj._ptr = ptr + obj._owner = owner + obj._readonly = readonly return obj diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index c57b18b95d..b831913fd3 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -30,8 +30,10 @@ cdef extern from "": ctypedef sockaddr sockaddr_t -cdef extern from '': + + # enums +cdef extern from '': ctypedef enum CUfileOpError: CU_FILE_SUCCESS CU_FILE_DRIVER_NOT_INITIALIZED @@ -83,6 +85,7 @@ cdef extern from '': CU_FILE_BATCH_NOCOMPAT_ERROR CU_FILE_IO_MAX_ERROR +cdef extern from '': ctypedef enum CUfileDriverStatusFlags_t: CU_FILE_LUSTRE_SUPPORTED CU_FILE_WEKAFS_SUPPORTED @@ -97,25 +100,30 @@ cdef extern from '': CU_FILE_NVME_P2P_SUPPORTED CU_FILE_SCATEFS_SUPPORTED +cdef extern from '': ctypedef enum CUfileDriverControlFlags_t: CU_FILE_USE_POLL_MODE CU_FILE_ALLOW_COMPAT_MODE +cdef extern from '': ctypedef enum CUfileFeatureFlags_t: CU_FILE_DYN_ROUTING_SUPPORTED CU_FILE_BATCH_IO_SUPPORTED CU_FILE_STREAMS_SUPPORTED CU_FILE_PARALLEL_IO_SUPPORTED +cdef extern from '': ctypedef enum CUfileFileHandleType: CU_FILE_HANDLE_TYPE_OPAQUE_FD CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 CU_FILE_HANDLE_TYPE_USERSPACE_FS +cdef extern from '': ctypedef enum CUfileOpcode_t: CUFILE_READ CUFILE_WRITE +cdef extern from '': ctypedef enum CUfileStatus_t: CUFILE_WAITING CUFILE_PENDING @@ -125,9 +133,11 @@ cdef extern from '': CUFILE_TIMEOUT CUFILE_FAILED +cdef extern from '': ctypedef enum CUfileBatchMode_t: CUFILE_BATCH +cdef extern from '': ctypedef enum CUFileSizeTConfigParameter_t: CUFILE_PARAM_PROFILE_STATS CUFILE_PARAM_EXECUTION_MAX_IO_QUEUE_DEPTH @@ -142,6 +152,7 @@ cdef extern from '': CUFILE_PARAM_POLLTHRESHOLD_SIZE_KB CUFILE_PARAM_PROPERTIES_BATCH_IO_TIMEOUT_MS +cdef extern from '': ctypedef enum CUFileBoolConfigParameter_t: CUFILE_PARAM_PROPERTIES_USE_POLL_MODE CUFILE_PARAM_PROPERTIES_ALLOW_COMPAT_MODE @@ -156,53 +167,61 @@ cdef extern from '': CUFILE_PARAM_SKIP_TOPOLOGY_DETECTION CUFILE_PARAM_STREAM_MEMOPS_BYPASS +cdef extern from '': ctypedef enum CUFileStringConfigParameter_t: CUFILE_PARAM_LOGGING_LEVEL CUFILE_PARAM_ENV_LOGFILE_PATH CUFILE_PARAM_LOG_DIR +cdef extern from '': ctypedef enum CUFileArrayConfigParameter_t: CUFILE_PARAM_POSIX_POOL_SLAB_SIZE_KB CUFILE_PARAM_POSIX_POOL_SLAB_COUNT # types - ctypedef void* CUfileHandle_t 'CUfileHandle_t' - ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' +ctypedef void* CUfileHandle_t 'CUfileHandle_t' +ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' +cdef extern from '': ctypedef struct CUfileError_t 'CUfileError_t': CUfileOpError err CUresult cu_err - cdef struct _anon_pod0 '_anon_pod0': - unsigned int major_version - unsigned int minor_version - size_t poll_thresh_size - size_t max_direct_io_size - unsigned int dstatusflags - unsigned int dcontrolflags +cdef struct _anon_pod0 '_anon_pod0': + unsigned int major_version + unsigned int minor_version + size_t poll_thresh_size + size_t max_direct_io_size + unsigned int dstatusflags + unsigned int dcontrolflags +cdef extern from '': ctypedef struct cufileRDMAInfo_t 'cufileRDMAInfo_t': int version int desc_len char* desc_str +cdef extern from '': ctypedef struct CUfileFSOps_t 'CUfileFSOps_t': char* (*fs_type)(void*) int (*getRDMADeviceList)(void*, sockaddr_t**) int (*getRDMADevicePriority)(void*, char*, size_t, loff_t, sockaddr_t*) ssize_t (*read)(void*, char*, size_t, loff_t, cufileRDMAInfo_t*) ssize_t (*write)(void*, const char*, size_t, loff_t, cufileRDMAInfo_t*) - cdef union _anon_pod1 '_anon_pod1': - int fd - void* handle - cdef struct _anon_pod3 '_anon_pod3': - void* devPtr_base - off_t file_offset - off_t devPtr_offset - size_t size +cdef union _anon_pod1 '_anon_pod1': + int fd + void* handle +cdef struct _anon_pod3 '_anon_pod3': + void* devPtr_base + off_t file_offset + off_t devPtr_offset + size_t size +cdef extern from '': ctypedef struct CUfileIOEvents_t 'CUfileIOEvents_t': void* cookie CUfileStatus_t status size_t ret +cdef extern from '': ctypedef struct CUfileOpCounter_t 'CUfileOpCounter_t': uint64_t ok uint64_t err +cdef extern from '': ctypedef struct CUfilePerGpuStats_t 'CUfilePerGpuStats_t': char uuid[16] uint64_t read_bytes @@ -234,6 +253,7 @@ cdef extern from '': uint64_t n_mmap_err uint64_t n_mmap_free uint64_t reg_bytes +cdef extern from '': ctypedef struct CUfileDrvProps_t 'CUfileDrvProps_t': _anon_pod0 nvfs unsigned int fflags @@ -242,12 +262,14 @@ cdef extern from '': unsigned int max_device_pinned_mem_size unsigned int max_batch_io_size unsigned int max_batch_io_timeout_msecs +cdef extern from '': ctypedef struct CUfileDescr_t 'CUfileDescr_t': CUfileFileHandleType type _anon_pod1 handle CUfileFSOps_t* fs_ops - cdef union _anon_pod2 '_anon_pod2': - _anon_pod3 batch +cdef union _anon_pod2 '_anon_pod2': + _anon_pod3 batch +cdef extern from '': ctypedef struct CUfileStatsLevel1_t 'CUfileStatsLevel1_t': CUfileOpCounter_t read_ops CUfileOpCounter_t write_ops @@ -292,16 +314,19 @@ cdef extern from '': uint64_t batch_completion_lat_sum_us uint64_t last_batch_read_bytes uint64_t last_batch_write_bytes +cdef extern from '': ctypedef struct CUfileIOParams_t 'CUfileIOParams_t': CUfileBatchMode_t mode _anon_pod2 u CUfileHandle_t fh CUfileOpcode_t opcode void* cookie +cdef extern from '': ctypedef struct CUfileStatsLevel2_t 'CUfileStatsLevel2_t': CUfileStatsLevel1_t basic uint64_t read_size_kb_hist[32] uint64_t write_size_kb_hist[32] +cdef extern from '': ctypedef struct CUfileStatsLevel3_t 'CUfileStatsLevel3_t': CUfileStatsLevel2_t detailed uint32_t num_gpus From 42c603eb87d64039c2fbce3dbcb5153094d02129 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 21 Nov 2025 14:30:35 -0600 Subject: [PATCH 02/15] Updates for numeric array --- cuda_bindings/cuda/bindings/cufile.pyx | 18 +++++++++++++----- cuda_bindings/cuda/bindings/cycufile.pxd | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 95d6087f3a..a521472f18 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -16,7 +16,7 @@ import cython from cuda.bindings.driver import CUresult as pyCUresult from libc.stdlib cimport calloc, free, malloc -cimport cpython +from cython cimport view cimport cpython.buffer cimport cpython.memoryview from libc.string cimport memcmp, memcpy @@ -2185,24 +2185,32 @@ cdef class StatsLevel2: @property def read_size_kb_hist(self): """~_numpy.uint64: (array of length 32).""" - return self._ptr[0].read_size_kb_hist + cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c", allocate_buffer=False) + arr.data = (&(self._ptr[0].read_size_kb_hist)) + return arr @read_size_kb_hist.setter def read_size_kb_hist(self, val): if self._readonly: raise ValueError("This StatsLevel2 instance is read-only") - self._ptr[0].read_size_kb_hist = val + cdef view.array arr = view.array(shape=(len(val),), itemsize=sizeof(uint64_t), format="Q", mode="c") + arr[:] = _numpy.asarray(val, dtype=_numpy.uint64) + memcpy((&(self._ptr[0].read_size_kb_hist)), (arr.data), sizeof(uint64_t) * len(val)) @property def write_size_kb_hist(self): """~_numpy.uint64: (array of length 32).""" - return self._ptr[0].write_size_kb_hist + cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c", allocate_buffer=False) + arr.data = (&(self._ptr[0].write_size_kb_hist)) + return arr @write_size_kb_hist.setter def write_size_kb_hist(self, val): if self._readonly: raise ValueError("This StatsLevel2 instance is read-only") - self._ptr[0].write_size_kb_hist = val + cdef view.array arr = view.array(shape=(len(val),), itemsize=sizeof(uint64_t), format="Q", mode="c") + arr[:] = _numpy.asarray(val, dtype=_numpy.uint64) + memcpy((&(self._ptr[0].write_size_kb_hist)), (arr.data), sizeof(uint64_t) * len(val)) @staticmethod def from_data(data): diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index b831913fd3..cc2ea7fe80 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -185,6 +185,7 @@ cdef extern from '': ctypedef struct CUfileError_t 'CUfileError_t': CUfileOpError err CUresult cu_err + cdef struct _anon_pod0 '_anon_pod0': unsigned int major_version unsigned int minor_version @@ -192,11 +193,13 @@ cdef struct _anon_pod0 '_anon_pod0': size_t max_direct_io_size unsigned int dstatusflags unsigned int dcontrolflags + cdef extern from '': ctypedef struct cufileRDMAInfo_t 'cufileRDMAInfo_t': int version int desc_len char* desc_str + cdef extern from '': ctypedef struct CUfileFSOps_t 'CUfileFSOps_t': char* (*fs_type)(void*) @@ -204,23 +207,28 @@ cdef extern from '': int (*getRDMADevicePriority)(void*, char*, size_t, loff_t, sockaddr_t*) ssize_t (*read)(void*, char*, size_t, loff_t, cufileRDMAInfo_t*) ssize_t (*write)(void*, const char*, size_t, loff_t, cufileRDMAInfo_t*) + cdef union _anon_pod1 '_anon_pod1': int fd void* handle + cdef struct _anon_pod3 '_anon_pod3': void* devPtr_base off_t file_offset off_t devPtr_offset size_t size + cdef extern from '': ctypedef struct CUfileIOEvents_t 'CUfileIOEvents_t': void* cookie CUfileStatus_t status size_t ret + cdef extern from '': ctypedef struct CUfileOpCounter_t 'CUfileOpCounter_t': uint64_t ok uint64_t err + cdef extern from '': ctypedef struct CUfilePerGpuStats_t 'CUfilePerGpuStats_t': char uuid[16] @@ -253,6 +261,7 @@ cdef extern from '': uint64_t n_mmap_err uint64_t n_mmap_free uint64_t reg_bytes + cdef extern from '': ctypedef struct CUfileDrvProps_t 'CUfileDrvProps_t': _anon_pod0 nvfs @@ -262,13 +271,16 @@ cdef extern from '': unsigned int max_device_pinned_mem_size unsigned int max_batch_io_size unsigned int max_batch_io_timeout_msecs + cdef extern from '': ctypedef struct CUfileDescr_t 'CUfileDescr_t': CUfileFileHandleType type _anon_pod1 handle CUfileFSOps_t* fs_ops + cdef union _anon_pod2 '_anon_pod2': _anon_pod3 batch + cdef extern from '': ctypedef struct CUfileStatsLevel1_t 'CUfileStatsLevel1_t': CUfileOpCounter_t read_ops @@ -314,6 +326,7 @@ cdef extern from '': uint64_t batch_completion_lat_sum_us uint64_t last_batch_read_bytes uint64_t last_batch_write_bytes + cdef extern from '': ctypedef struct CUfileIOParams_t 'CUfileIOParams_t': CUfileBatchMode_t mode @@ -321,11 +334,13 @@ cdef extern from '': CUfileHandle_t fh CUfileOpcode_t opcode void* cookie + cdef extern from '': ctypedef struct CUfileStatsLevel2_t 'CUfileStatsLevel2_t': CUfileStatsLevel1_t basic uint64_t read_size_kb_hist[32] uint64_t write_size_kb_hist[32] + cdef extern from '': ctypedef struct CUfileStatsLevel3_t 'CUfileStatsLevel3_t': CUfileStatsLevel2_t detailed @@ -333,6 +348,7 @@ cdef extern from '': CUfilePerGpuStats_t per_gpu_stats[16] + cdef extern from *: """ // This is the missing piece we need to supply to help Cython & C++ compilers. From 92a564f58836ce9855e96aaec0f2187cf7dfbd2b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 24 Nov 2025 10:25:07 -0500 Subject: [PATCH 03/15] More cufile changes --- cuda_bindings/cuda/bindings/cufile.pyx | 58 +++++++++++++------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index a521472f18..3cde8aa8fa 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -1258,7 +1258,7 @@ cdef class _py_anon_pod2: if self._readonly: raise ValueError("This _py_anon_pod2 instance is read-only") cdef _py_anon_pod3 val_ = val - memcpy(&(self._ptr[0].batch), (val_._ptr), sizeof(_anon_pod3)) + memcpy(&(self._ptr[0].batch), (val_._get_ptr()), sizeof(_anon_pod3) * 1) @staticmethod def from_data(data): @@ -1405,7 +1405,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].read_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].read_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def write_ops(self): @@ -1417,7 +1417,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].write_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].write_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def hdl_register_ops(self): @@ -1429,7 +1429,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].hdl_register_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].hdl_register_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def hdl_deregister_ops(self): @@ -1441,7 +1441,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].hdl_deregister_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].hdl_deregister_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def buf_register_ops(self): @@ -1453,7 +1453,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].buf_register_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].buf_register_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def buf_deregister_ops(self): @@ -1465,7 +1465,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].buf_deregister_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].buf_deregister_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_submit_ops(self): @@ -1477,7 +1477,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_complete_ops(self): @@ -1489,7 +1489,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_complete_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_complete_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_setup_ops(self): @@ -1501,7 +1501,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_setup_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_setup_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_cancel_ops(self): @@ -1513,7 +1513,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_cancel_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_cancel_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_destroy_ops(self): @@ -1525,7 +1525,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_destroy_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_destroy_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_enqueued_ops(self): @@ -1537,7 +1537,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_enqueued_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_enqueued_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_posix_enqueued_ops(self): @@ -1549,7 +1549,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_posix_enqueued_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_posix_enqueued_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_processed_ops(self): @@ -1561,7 +1561,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_processed_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_processed_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_posix_processed_ops(self): @@ -1573,7 +1573,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_posix_processed_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_posix_processed_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_nvfs_submit_ops(self): @@ -1585,7 +1585,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_nvfs_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_nvfs_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_p2p_submit_ops(self): @@ -1597,7 +1597,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_p2p_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_p2p_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_aio_submit_ops(self): @@ -1609,7 +1609,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_aio_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_aio_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_iouring_submit_ops(self): @@ -1621,7 +1621,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_iouring_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_iouring_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_mixed_io_submit_ops(self): @@ -1633,7 +1633,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_mixed_io_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_mixed_io_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def batch_total_submit_ops(self): @@ -1645,7 +1645,7 @@ cdef class StatsLevel1: if self._readonly: raise ValueError("This StatsLevel1 instance is read-only") cdef OpCounter val_ = val - memcpy(&(self._ptr[0].batch_total_submit_ops), (val_._ptr), sizeof(CUfileOpCounter_t)) + memcpy(&(self._ptr[0].batch_total_submit_ops), (val_._get_ptr()), sizeof(CUfileOpCounter_t) * 1) @property def read_bytes(self): @@ -2180,7 +2180,7 @@ cdef class StatsLevel2: if self._readonly: raise ValueError("This StatsLevel2 instance is read-only") cdef StatsLevel1 val_ = val - memcpy(&(self._ptr[0].basic), (val_._ptr), sizeof(CUfileStatsLevel1_t)) + memcpy(&(self._ptr[0].basic), (val_._get_ptr()), sizeof(CUfileStatsLevel1_t) * 1) @property def read_size_kb_hist(self): @@ -2193,7 +2193,7 @@ cdef class StatsLevel2: def read_size_kb_hist(self, val): if self._readonly: raise ValueError("This StatsLevel2 instance is read-only") - cdef view.array arr = view.array(shape=(len(val),), itemsize=sizeof(uint64_t), format="Q", mode="c") + cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c") arr[:] = _numpy.asarray(val, dtype=_numpy.uint64) memcpy((&(self._ptr[0].read_size_kb_hist)), (arr.data), sizeof(uint64_t) * len(val)) @@ -2208,7 +2208,7 @@ cdef class StatsLevel2: def write_size_kb_hist(self, val): if self._readonly: raise ValueError("This StatsLevel2 instance is read-only") - cdef view.array arr = view.array(shape=(len(val),), itemsize=sizeof(uint64_t), format="Q", mode="c") + cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c") arr[:] = _numpy.asarray(val, dtype=_numpy.uint64) memcpy((&(self._ptr[0].write_size_kb_hist)), (arr.data), sizeof(uint64_t) * len(val)) @@ -2317,19 +2317,21 @@ cdef class StatsLevel3: if self._readonly: raise ValueError("This StatsLevel3 instance is read-only") cdef StatsLevel2 val_ = val - memcpy(&(self._ptr[0].detailed), (val_._ptr), sizeof(CUfileStatsLevel2_t)) + memcpy(&(self._ptr[0].detailed), (val_._get_ptr()), sizeof(CUfileStatsLevel2_t) * 1) @property def per_gpu_stats(self): """PerGpuStats: """ - return PerGpuStats.from_ptr(&(self._ptr[0].per_gpu_stats), self._readonly, self) + return PerGpuStats.from_ptr(&(self._ptr[0].per_gpu_stats), 16, self._readonly) @per_gpu_stats.setter def per_gpu_stats(self, val): if self._readonly: raise ValueError("This StatsLevel3 instance is read-only") cdef PerGpuStats val_ = val - memcpy(&(self._ptr[0].per_gpu_stats), (val_._ptr), sizeof(CUfilePerGpuStats_t)) + if len(val) != 16: + raise ValueError(f"Expected length 16 for field per_gpu_stats, got {len(val)}") + memcpy(&(self._ptr[0].per_gpu_stats), (val_._get_ptr()), sizeof(CUfilePerGpuStats_t) * 16) @property def num_gpus(self): From 3dc1fe422b5ff3fe03f46f728d93015f2d255ea8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 24 Nov 2025 11:33:32 -0500 Subject: [PATCH 04/15] Fix externs --- cuda_bindings/cuda/bindings/cycufile.pxd | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd index cc2ea7fe80..aa8ea93d48 100644 --- a/cuda_bindings/cuda/bindings/cycufile.pxd +++ b/cuda_bindings/cuda/bindings/cycufile.pxd @@ -179,8 +179,12 @@ cdef extern from '': CUFILE_PARAM_POSIX_POOL_SLAB_COUNT # types -ctypedef void* CUfileHandle_t 'CUfileHandle_t' -ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' +cdef extern from '': + ctypedef void* CUfileHandle_t 'CUfileHandle_t' + +cdef extern from '': + ctypedef void* CUfileBatchHandle_t 'CUfileBatchHandle_t' + cdef extern from '': ctypedef struct CUfileError_t 'CUfileError_t': CUfileOpError err From 146021b349934783b1de3350c5b3f80ec9f9165f Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 1 Dec 2025 08:52:17 -0500 Subject: [PATCH 05/15] Update with bugfix and dtype fix --- cuda_bindings/cuda/bindings/cufile.pyx | 324 ++++++++++++++----------- 1 file changed, 188 insertions(+), 136 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 3cde8aa8fa..c676c34801 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -56,7 +56,6 @@ cdef class _py_anon_pod1: _anon_pod1 *_ptr object _owner bint _readonly - dict _refs def __init__(self): self._ptr = <_anon_pod1 *>calloc(1, sizeof((NULL).handle)) @@ -64,7 +63,6 @@ cdef class _py_anon_pod1: raise MemoryError("Error allocating _py_anon_pod1") self._owner = None self._readonly = False - self._refs = {} def __dealloc__(self): if self._owner is None: @@ -155,17 +153,24 @@ cdef class _py_anon_pod1: obj._ptr = <_anon_pod1 *>ptr obj._owner = owner obj._readonly = readonly - obj._refs = {} return obj -_py_anon_pod3_dtype = _numpy.dtype([ - ("dev_ptr_base", _numpy.intp, ), - ("file_offset", _numpy.int64, ), - ("dev_ptr_offset", _numpy.int64, ), - ("size_", _numpy.uint64, ), - ], align=True) +cdef _get__py_anon_pod3_dtype_offsets(): + cdef _anon_pod3 pod = _anon_pod3() + return _numpy.dtype({ + 'names': ['dev_ptr_base', 'file_offset', 'dev_ptr_offset', 'size_'], + 'formats': [_numpy.intp, _numpy.int64, _numpy.int64, _numpy.uint64], + 'offsets': [ + (&(pod.devPtr_base)) - (&pod), + (&(pod.file_offset)) - (&pod), + (&(pod.devPtr_offset)) - (&pod), + (&(pod.size)) - (&pod), + ], + 'itemsize': sizeof((NULL).u.batch), + }) +_py_anon_pod3_dtype = _get__py_anon_pod3_dtype_offsets() cdef class _py_anon_pod3: """Empty-initialize an instance of `_anon_pod3`. @@ -177,7 +182,6 @@ cdef class _py_anon_pod3: _anon_pod3 *_ptr object _owner bint _readonly - dict _refs def __init__(self): self._ptr = <_anon_pod3 *>calloc(1, sizeof((NULL).u.batch)) @@ -185,7 +189,6 @@ cdef class _py_anon_pod3: raise MemoryError("Error allocating _py_anon_pod3") self._owner = None self._readonly = False - self._refs = {} def __dealloc__(self): if self._owner is None: @@ -298,16 +301,23 @@ cdef class _py_anon_pod3: obj._ptr = <_anon_pod3 *>ptr obj._owner = owner obj._readonly = readonly - obj._refs = {} return obj -io_events_dtype = _numpy.dtype([ - ("cookie", _numpy.intp, ), - ("status", _numpy.int32, ), - ("ret", _numpy.uint64, ), - ], align=True) +cdef _get_io_events_dtype_offsets(): + cdef CUfileIOEvents_t pod = CUfileIOEvents_t() + return _numpy.dtype({ + 'names': ['cookie', 'status', 'ret'], + 'formats': [_numpy.intp, _numpy.int32, _numpy.uint64], + 'offsets': [ + (&(pod.cookie)) - (&pod), + (&(pod.status)) - (&pod), + (&(pod.ret)) - (&pod), + ], + 'itemsize': sizeof(CUfileIOEvents_t), + }) +io_events_dtype = _get_io_events_dtype_offsets() cdef class IOEvents: """Empty-initialize an array of `CUfileIOEvents_t`. @@ -452,11 +462,19 @@ cdef class IOEvents: return obj -op_counter_dtype = _numpy.dtype([ - ("ok", _numpy.uint64, ), - ("err", _numpy.uint64, ), - ], align=True) +cdef _get_op_counter_dtype_offsets(): + cdef CUfileOpCounter_t pod = CUfileOpCounter_t() + return _numpy.dtype({ + 'names': ['ok', 'err'], + 'formats': [_numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.ok)) - (&pod), + (&(pod.err)) - (&pod), + ], + 'itemsize': sizeof(CUfileOpCounter_t), + }) +op_counter_dtype = _get_op_counter_dtype_offsets() cdef class OpCounter: """Empty-initialize an instance of `CUfileOpCounter_t`. @@ -568,39 +586,47 @@ cdef class OpCounter: return obj -per_gpu_stats_dtype = _numpy.dtype([ - ("uuid", _numpy.int8, (16,)), - ("read_bytes", _numpy.uint64, ), - ("read_bw_bytes_per_sec", _numpy.uint64, ), - ("read_utilization", _numpy.uint64, ), - ("read_duration_us", _numpy.uint64, ), - ("n_total_reads", _numpy.uint64, ), - ("n_p2p_reads", _numpy.uint64, ), - ("n_nvfs_reads", _numpy.uint64, ), - ("n_posix_reads", _numpy.uint64, ), - ("n_unaligned_reads", _numpy.uint64, ), - ("n_dr_reads", _numpy.uint64, ), - ("n_sparse_regions", _numpy.uint64, ), - ("n_inline_regions", _numpy.uint64, ), - ("n_reads_err", _numpy.uint64, ), - ("writes_bytes", _numpy.uint64, ), - ("write_bw_bytes_per_sec", _numpy.uint64, ), - ("write_utilization", _numpy.uint64, ), - ("write_duration_us", _numpy.uint64, ), - ("n_total_writes", _numpy.uint64, ), - ("n_p2p_writes", _numpy.uint64, ), - ("n_nvfs_writes", _numpy.uint64, ), - ("n_posix_writes", _numpy.uint64, ), - ("n_unaligned_writes", _numpy.uint64, ), - ("n_dr_writes", _numpy.uint64, ), - ("n_writes_err", _numpy.uint64, ), - ("n_mmap", _numpy.uint64, ), - ("n_mmap_ok", _numpy.uint64, ), - ("n_mmap_err", _numpy.uint64, ), - ("n_mmap_free", _numpy.uint64, ), - ("reg_bytes", _numpy.uint64, ), - ], align=True) - +cdef _get_per_gpu_stats_dtype_offsets(): + cdef CUfilePerGpuStats_t pod = CUfilePerGpuStats_t() + return _numpy.dtype({ + 'names': ['uuid', 'read_bytes', 'read_bw_bytes_per_sec', 'read_utilization', 'read_duration_us', 'n_total_reads', 'n_p2p_reads', 'n_nvfs_reads', 'n_posix_reads', 'n_unaligned_reads', 'n_dr_reads', 'n_sparse_regions', 'n_inline_regions', 'n_reads_err', 'writes_bytes', 'write_bw_bytes_per_sec', 'write_utilization', 'write_duration_us', 'n_total_writes', 'n_p2p_writes', 'n_nvfs_writes', 'n_posix_writes', 'n_unaligned_writes', 'n_dr_writes', 'n_writes_err', 'n_mmap', 'n_mmap_ok', 'n_mmap_err', 'n_mmap_free', 'reg_bytes'], + 'formats': [_numpy.int8, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.uuid)) - (&pod), + (&(pod.read_bytes)) - (&pod), + (&(pod.read_bw_bytes_per_sec)) - (&pod), + (&(pod.read_utilization)) - (&pod), + (&(pod.read_duration_us)) - (&pod), + (&(pod.n_total_reads)) - (&pod), + (&(pod.n_p2p_reads)) - (&pod), + (&(pod.n_nvfs_reads)) - (&pod), + (&(pod.n_posix_reads)) - (&pod), + (&(pod.n_unaligned_reads)) - (&pod), + (&(pod.n_dr_reads)) - (&pod), + (&(pod.n_sparse_regions)) - (&pod), + (&(pod.n_inline_regions)) - (&pod), + (&(pod.n_reads_err)) - (&pod), + (&(pod.writes_bytes)) - (&pod), + (&(pod.write_bw_bytes_per_sec)) - (&pod), + (&(pod.write_utilization)) - (&pod), + (&(pod.write_duration_us)) - (&pod), + (&(pod.n_total_writes)) - (&pod), + (&(pod.n_p2p_writes)) - (&pod), + (&(pod.n_nvfs_writes)) - (&pod), + (&(pod.n_posix_writes)) - (&pod), + (&(pod.n_unaligned_writes)) - (&pod), + (&(pod.n_dr_writes)) - (&pod), + (&(pod.n_writes_err)) - (&pod), + (&(pod.n_mmap)) - (&pod), + (&(pod.n_mmap_ok)) - (&pod), + (&(pod.n_mmap_err)) - (&pod), + (&(pod.n_mmap_free)) - (&pod), + (&(pod.reg_bytes)) - (&pod), + ], + 'itemsize': sizeof(CUfilePerGpuStats_t), + }) + +per_gpu_stats_dtype = _get_per_gpu_stats_dtype_offsets() cdef class PerGpuStats: """Empty-initialize an instance of `CUfilePerGpuStats_t`. @@ -1024,12 +1050,20 @@ cdef class PerGpuStats: return obj -descr_dtype = _numpy.dtype([ - ("type", _numpy.int32, ), - ("handle", _py_anon_pod1_dtype, ), - ("fs_ops", _numpy.intp, ), - ], align=True) +cdef _get_descr_dtype_offsets(): + cdef CUfileDescr_t pod = CUfileDescr_t() + return _numpy.dtype({ + 'names': ['type', 'handle', 'fs_ops'], + 'formats': [_numpy.int32, _py_anon_pod1_dtype, _numpy.intp], + 'offsets': [ + (&(pod.type)) - (&pod), + (&(pod.handle)) - (&pod), + (&(pod.fs_ops)) - (&pod), + ], + 'itemsize': sizeof(CUfileDescr_t), + }) +descr_dtype = _get_descr_dtype_offsets() cdef class Descr: """Empty-initialize an array of `CUfileDescr_t`. @@ -1294,52 +1328,60 @@ cdef class _py_anon_pod2: return obj -stats_level1_dtype = _numpy.dtype([ - ("read_ops", op_counter_dtype, ), - ("write_ops", op_counter_dtype, ), - ("hdl_register_ops", op_counter_dtype, ), - ("hdl_deregister_ops", op_counter_dtype, ), - ("buf_register_ops", op_counter_dtype, ), - ("buf_deregister_ops", op_counter_dtype, ), - ("read_bytes", _numpy.uint64, ), - ("write_bytes", _numpy.uint64, ), - ("read_bw_bytes_per_sec", _numpy.uint64, ), - ("write_bw_bytes_per_sec", _numpy.uint64, ), - ("read_lat_avg_us", _numpy.uint64, ), - ("write_lat_avg_us", _numpy.uint64, ), - ("read_ops_per_sec", _numpy.uint64, ), - ("write_ops_per_sec", _numpy.uint64, ), - ("read_lat_sum_us", _numpy.uint64, ), - ("write_lat_sum_us", _numpy.uint64, ), - ("batch_submit_ops", op_counter_dtype, ), - ("batch_complete_ops", op_counter_dtype, ), - ("batch_setup_ops", op_counter_dtype, ), - ("batch_cancel_ops", op_counter_dtype, ), - ("batch_destroy_ops", op_counter_dtype, ), - ("batch_enqueued_ops", op_counter_dtype, ), - ("batch_posix_enqueued_ops", op_counter_dtype, ), - ("batch_processed_ops", op_counter_dtype, ), - ("batch_posix_processed_ops", op_counter_dtype, ), - ("batch_nvfs_submit_ops", op_counter_dtype, ), - ("batch_p2p_submit_ops", op_counter_dtype, ), - ("batch_aio_submit_ops", op_counter_dtype, ), - ("batch_iouring_submit_ops", op_counter_dtype, ), - ("batch_mixed_io_submit_ops", op_counter_dtype, ), - ("batch_total_submit_ops", op_counter_dtype, ), - ("batch_read_bytes", _numpy.uint64, ), - ("batch_write_bytes", _numpy.uint64, ), - ("batch_read_bw_bytes", _numpy.uint64, ), - ("batch_write_bw_bytes", _numpy.uint64, ), - ("batch_submit_lat_avg_us", _numpy.uint64, ), - ("batch_completion_lat_avg_us", _numpy.uint64, ), - ("batch_submit_ops_per_sec", _numpy.uint64, ), - ("batch_complete_ops_per_sec", _numpy.uint64, ), - ("batch_submit_lat_sum_us", _numpy.uint64, ), - ("batch_completion_lat_sum_us", _numpy.uint64, ), - ("last_batch_read_bytes", _numpy.uint64, ), - ("last_batch_write_bytes", _numpy.uint64, ), - ], align=True) - +cdef _get_stats_level1_dtype_offsets(): + cdef CUfileStatsLevel1_t pod = CUfileStatsLevel1_t() + return _numpy.dtype({ + 'names': ['read_ops', 'write_ops', 'hdl_register_ops', 'hdl_deregister_ops', 'buf_register_ops', 'buf_deregister_ops', 'read_bytes', 'write_bytes', 'read_bw_bytes_per_sec', 'write_bw_bytes_per_sec', 'read_lat_avg_us', 'write_lat_avg_us', 'read_ops_per_sec', 'write_ops_per_sec', 'read_lat_sum_us', 'write_lat_sum_us', 'batch_submit_ops', 'batch_complete_ops', 'batch_setup_ops', 'batch_cancel_ops', 'batch_destroy_ops', 'batch_enqueued_ops', 'batch_posix_enqueued_ops', 'batch_processed_ops', 'batch_posix_processed_ops', 'batch_nvfs_submit_ops', 'batch_p2p_submit_ops', 'batch_aio_submit_ops', 'batch_iouring_submit_ops', 'batch_mixed_io_submit_ops', 'batch_total_submit_ops', 'batch_read_bytes', 'batch_write_bytes', 'batch_read_bw_bytes', 'batch_write_bw_bytes', 'batch_submit_lat_avg_us', 'batch_completion_lat_avg_us', 'batch_submit_ops_per_sec', 'batch_complete_ops_per_sec', 'batch_submit_lat_sum_us', 'batch_completion_lat_sum_us', 'last_batch_read_bytes', 'last_batch_write_bytes'], + 'formats': [op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.read_ops)) - (&pod), + (&(pod.write_ops)) - (&pod), + (&(pod.hdl_register_ops)) - (&pod), + (&(pod.hdl_deregister_ops)) - (&pod), + (&(pod.buf_register_ops)) - (&pod), + (&(pod.buf_deregister_ops)) - (&pod), + (&(pod.read_bytes)) - (&pod), + (&(pod.write_bytes)) - (&pod), + (&(pod.read_bw_bytes_per_sec)) - (&pod), + (&(pod.write_bw_bytes_per_sec)) - (&pod), + (&(pod.read_lat_avg_us)) - (&pod), + (&(pod.write_lat_avg_us)) - (&pod), + (&(pod.read_ops_per_sec)) - (&pod), + (&(pod.write_ops_per_sec)) - (&pod), + (&(pod.read_lat_sum_us)) - (&pod), + (&(pod.write_lat_sum_us)) - (&pod), + (&(pod.batch_submit_ops)) - (&pod), + (&(pod.batch_complete_ops)) - (&pod), + (&(pod.batch_setup_ops)) - (&pod), + (&(pod.batch_cancel_ops)) - (&pod), + (&(pod.batch_destroy_ops)) - (&pod), + (&(pod.batch_enqueued_ops)) - (&pod), + (&(pod.batch_posix_enqueued_ops)) - (&pod), + (&(pod.batch_processed_ops)) - (&pod), + (&(pod.batch_posix_processed_ops)) - (&pod), + (&(pod.batch_nvfs_submit_ops)) - (&pod), + (&(pod.batch_p2p_submit_ops)) - (&pod), + (&(pod.batch_aio_submit_ops)) - (&pod), + (&(pod.batch_iouring_submit_ops)) - (&pod), + (&(pod.batch_mixed_io_submit_ops)) - (&pod), + (&(pod.batch_total_submit_ops)) - (&pod), + (&(pod.batch_read_bytes)) - (&pod), + (&(pod.batch_write_bytes)) - (&pod), + (&(pod.batch_read_bw_bytes)) - (&pod), + (&(pod.batch_write_bw_bytes)) - (&pod), + (&(pod.batch_submit_lat_avg_us)) - (&pod), + (&(pod.batch_completion_lat_avg_us)) - (&pod), + (&(pod.batch_submit_ops_per_sec)) - (&pod), + (&(pod.batch_complete_ops_per_sec)) - (&pod), + (&(pod.batch_submit_lat_sum_us)) - (&pod), + (&(pod.batch_completion_lat_sum_us)) - (&pod), + (&(pod.last_batch_read_bytes)) - (&pod), + (&(pod.last_batch_write_bytes)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel1_t), + }) + +stats_level1_dtype = _get_stats_level1_dtype_offsets() cdef class StatsLevel1: """Empty-initialize an instance of `CUfileStatsLevel1_t`. @@ -1923,14 +1965,22 @@ cdef class StatsLevel1: return obj -io_params_dtype = _numpy.dtype([ - ("mode", _numpy.int32, ), - ("u", _py_anon_pod2_dtype, ), - ("fh", _numpy.intp, ), - ("opcode", _numpy.int32, ), - ("cookie", _numpy.intp, ), - ], align=True) +cdef _get_io_params_dtype_offsets(): + cdef CUfileIOParams_t pod = CUfileIOParams_t() + return _numpy.dtype({ + 'names': ['mode', 'u', 'fh', 'opcode', 'cookie'], + 'formats': [_numpy.int32, _py_anon_pod2_dtype, _numpy.intp, _numpy.int32, _numpy.intp], + 'offsets': [ + (&(pod.mode)) - (&pod), + (&(pod.u)) - (&pod), + (&(pod.fh)) - (&pod), + (&(pod.opcode)) - (&pod), + (&(pod.cookie)) - (&pod), + ], + 'itemsize': sizeof(CUfileIOParams_t), + }) +io_params_dtype = _get_io_params_dtype_offsets() cdef class IOParams: """Empty-initialize an array of `CUfileIOParams_t`. @@ -2109,12 +2159,20 @@ cdef class IOParams: return obj -stats_level2_dtype = _numpy.dtype([ - ("basic", stats_level1_dtype, ), - ("read_size_kb_hist", _numpy.uint64, (32,)), - ("write_size_kb_hist", _numpy.uint64, (32,)), - ], align=True) +cdef _get_stats_level2_dtype_offsets(): + cdef CUfileStatsLevel2_t pod = CUfileStatsLevel2_t() + return _numpy.dtype({ + 'names': ['basic', 'read_size_kb_hist', 'write_size_kb_hist'], + 'formats': [stats_level1_dtype, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.basic)) - (&pod), + (&(pod.read_size_kb_hist)) - (&pod), + (&(pod.write_size_kb_hist)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel2_t), + }) +stats_level2_dtype = _get_stats_level2_dtype_offsets() cdef class StatsLevel2: """Empty-initialize an instance of `CUfileStatsLevel2_t`. @@ -2246,12 +2304,20 @@ cdef class StatsLevel2: return obj -stats_level3_dtype = _numpy.dtype([ - ("detailed", stats_level2_dtype, ), - ("num_gpus", _numpy.uint32, ), - ("per_gpu_stats", per_gpu_stats_dtype, (16,)), - ], align=True) +cdef _get_stats_level3_dtype_offsets(): + cdef CUfileStatsLevel3_t pod = CUfileStatsLevel3_t() + return _numpy.dtype({ + 'names': ['detailed', 'num_gpus', 'per_gpu_stats'], + 'formats': [stats_level2_dtype, _numpy.uint32, per_gpu_stats_dtype], + 'offsets': [ + (&(pod.detailed)) - (&pod), + (&(pod.num_gpus)) - (&pod), + (&(pod.per_gpu_stats)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel3_t), + }) +stats_level3_dtype = _get_stats_level3_dtype_offsets() cdef class StatsLevel3: """Empty-initialize an instance of `CUfileStatsLevel3_t`. @@ -2378,20 +2444,6 @@ cdef class StatsLevel3: return obj -# Hack: Overwrite the generated descr_dtype, which NumPy deduced the offset wrong. -descr_dtype = _numpy.dtype({ - "names": ['type', 'handle', 'fs_ops'], - "formats": [_numpy.int32, _py_anon_pod1_dtype, _numpy.intp], - "offsets": [0, 8, 16], -}, align=True) - -# Hack: Overwrite the generated io_params_dtype, which NumPy deduced the offset wrong. -io_params_dtype = _numpy.dtype({ - "names": ['mode', 'u', 'fh', 'opcode', 'cookie'], - "formats": [_numpy.int32, _py_anon_pod2_dtype, _numpy.intp, _numpy.int32, _numpy.intp], - "offsets": [0, 8, 40, 48, 56], -}, align=True) - ############################################################################### # Enum From ffbc5c995a2c0cb7ed966b810d6cc21253050137 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 1 Dec 2025 15:45:42 -0500 Subject: [PATCH 06/15] Free-threading fixes --- cuda_bindings/cuda/bindings/cufile.pyx | 96 +++++++++++++++++++++----- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index c676c34801..8488572222 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -55,6 +55,7 @@ cdef class _py_anon_pod1: cdef: _anon_pod1 *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -62,11 +63,15 @@ cdef class _py_anon_pod1: if self._ptr == NULL: raise MemoryError("Error allocating _py_anon_pod1") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef _anon_pod1 *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}._py_anon_pod1 object at {hex(id(self))}>" @@ -96,6 +101,7 @@ cdef class _py_anon_pod1: raise MemoryError("Error allocating _py_anon_pod1") memcpy(self._ptr, val.ctypes.data, sizeof((NULL).handle)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -149,9 +155,11 @@ cdef class _py_anon_pod1: raise MemoryError("Error allocating _py_anon_pod1") memcpy((obj._ptr), ptr, sizeof((NULL).handle)) obj._owner = None + obj._owned = True else: obj._ptr = <_anon_pod1 *>ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -181,6 +189,7 @@ cdef class _py_anon_pod3: cdef: _anon_pod3 *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -188,11 +197,15 @@ cdef class _py_anon_pod3: if self._ptr == NULL: raise MemoryError("Error allocating _py_anon_pod3") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef _anon_pod3 *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}._py_anon_pod3 object at {hex(id(self))}>" @@ -222,6 +235,7 @@ cdef class _py_anon_pod3: raise MemoryError("Error allocating _py_anon_pod3") memcpy(self._ptr, val.ctypes.data, sizeof((NULL).u.batch)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -297,9 +311,11 @@ cdef class _py_anon_pod3: raise MemoryError("Error allocating _py_anon_pod3") memcpy((obj._ptr), ptr, sizeof((NULL).u.batch)) obj._owner = None + obj._owned = True else: obj._ptr = <_anon_pod3 *>ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -485,6 +501,7 @@ cdef class OpCounter: cdef: CUfileOpCounter_t *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -492,11 +509,15 @@ cdef class OpCounter: if self._ptr == NULL: raise MemoryError("Error allocating OpCounter") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef CUfileOpCounter_t *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}.OpCounter object at {hex(id(self))}>" @@ -526,6 +547,7 @@ cdef class OpCounter: raise MemoryError("Error allocating OpCounter") memcpy(self._ptr, val.ctypes.data, sizeof(CUfileOpCounter_t)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -579,9 +601,11 @@ cdef class OpCounter: raise MemoryError("Error allocating OpCounter") memcpy((obj._ptr), ptr, sizeof(CUfileOpCounter_t)) obj._owner = None + obj._owned = True else: obj._ptr = ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -637,6 +661,7 @@ cdef class PerGpuStats: cdef: CUfilePerGpuStats_t *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -644,11 +669,15 @@ cdef class PerGpuStats: if self._ptr == NULL: raise MemoryError("Error allocating PerGpuStats") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef CUfilePerGpuStats_t *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}.PerGpuStats object at {hex(id(self))}>" @@ -678,6 +707,7 @@ cdef class PerGpuStats: raise MemoryError("Error allocating PerGpuStats") memcpy(self._ptr, val.ctypes.data, sizeof(CUfilePerGpuStats_t)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -1043,9 +1073,11 @@ cdef class PerGpuStats: raise MemoryError("Error allocating PerGpuStats") memcpy((obj._ptr), ptr, sizeof(CUfilePerGpuStats_t)) obj._owner = None + obj._owned = True else: obj._ptr = ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -1237,6 +1269,7 @@ cdef class _py_anon_pod2: cdef: _anon_pod2 *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -1244,11 +1277,15 @@ cdef class _py_anon_pod2: if self._ptr == NULL: raise MemoryError("Error allocating _py_anon_pod2") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef _anon_pod2 *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}._py_anon_pod2 object at {hex(id(self))}>" @@ -1278,6 +1315,7 @@ cdef class _py_anon_pod2: raise MemoryError("Error allocating _py_anon_pod2") memcpy(self._ptr, val.ctypes.data, sizeof((NULL).u)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -1321,9 +1359,11 @@ cdef class _py_anon_pod2: raise MemoryError("Error allocating _py_anon_pod2") memcpy((obj._ptr), ptr, sizeof((NULL).u)) obj._owner = None + obj._owned = True else: obj._ptr = <_anon_pod2 *>ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -1392,6 +1432,7 @@ cdef class StatsLevel1: cdef: CUfileStatsLevel1_t *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -1399,11 +1440,15 @@ cdef class StatsLevel1: if self._ptr == NULL: raise MemoryError("Error allocating StatsLevel1") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef CUfileStatsLevel1_t *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}.StatsLevel1 object at {hex(id(self))}>" @@ -1433,6 +1478,7 @@ cdef class StatsLevel1: raise MemoryError("Error allocating StatsLevel1") memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel1_t)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -1958,9 +2004,11 @@ cdef class StatsLevel1: raise MemoryError("Error allocating StatsLevel1") memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel1_t)) obj._owner = None + obj._owned = True else: obj._ptr = ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -2183,6 +2231,7 @@ cdef class StatsLevel2: cdef: CUfileStatsLevel2_t *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -2190,11 +2239,15 @@ cdef class StatsLevel2: if self._ptr == NULL: raise MemoryError("Error allocating StatsLevel2") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef CUfileStatsLevel2_t *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}.StatsLevel2 object at {hex(id(self))}>" @@ -2224,6 +2277,7 @@ cdef class StatsLevel2: raise MemoryError("Error allocating StatsLevel2") memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel2_t)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -2297,9 +2351,11 @@ cdef class StatsLevel2: raise MemoryError("Error allocating StatsLevel2") memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel2_t)) obj._owner = None + obj._owned = True else: obj._ptr = ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj @@ -2328,6 +2384,7 @@ cdef class StatsLevel3: cdef: CUfileStatsLevel3_t *_ptr object _owner + bint _owned bint _readonly def __init__(self): @@ -2335,11 +2392,15 @@ cdef class StatsLevel3: if self._ptr == NULL: raise MemoryError("Error allocating StatsLevel3") self._owner = None + self._owned = True self._readonly = False def __dealloc__(self): - if self._owner is None: - free(self._ptr) + cdef CUfileStatsLevel3_t *ptr + if self._owner is None and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) def __repr__(self): return f"<{__name__}.StatsLevel3 object at {hex(id(self))}>" @@ -2369,6 +2430,7 @@ cdef class StatsLevel3: raise MemoryError("Error allocating StatsLevel3") memcpy(self._ptr, val.ctypes.data, sizeof(CUfileStatsLevel3_t)) self._owner = None + self._owned = True self._readonly = not val.flags.writeable else: setattr(self, key, val) @@ -2437,9 +2499,11 @@ cdef class StatsLevel3: raise MemoryError("Error allocating StatsLevel3") memcpy((obj._ptr), ptr, sizeof(CUfileStatsLevel3_t)) obj._owner = None + obj._owned = True else: obj._ptr = ptr obj._owner = owner + obj._owned = False obj._readonly = readonly return obj From 5f71c54e0d4a917854a4a5b465823f5ff580090c Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 1 Dec 2025 16:23:16 -0500 Subject: [PATCH 07/15] Fix free-threading --- cuda_bindings/cuda/bindings/cufile.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 8488572222..b9991ce3b7 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -68,7 +68,7 @@ cdef class _py_anon_pod1: def __dealloc__(self): cdef _anon_pod1 *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -202,7 +202,7 @@ cdef class _py_anon_pod3: def __dealloc__(self): cdef _anon_pod3 *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -514,7 +514,7 @@ cdef class OpCounter: def __dealloc__(self): cdef CUfileOpCounter_t *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -674,7 +674,7 @@ cdef class PerGpuStats: def __dealloc__(self): cdef CUfilePerGpuStats_t *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -1282,7 +1282,7 @@ cdef class _py_anon_pod2: def __dealloc__(self): cdef _anon_pod2 *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -1445,7 +1445,7 @@ cdef class StatsLevel1: def __dealloc__(self): cdef CUfileStatsLevel1_t *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -2244,7 +2244,7 @@ cdef class StatsLevel2: def __dealloc__(self): cdef CUfileStatsLevel2_t *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) @@ -2397,7 +2397,7 @@ cdef class StatsLevel3: def __dealloc__(self): cdef CUfileStatsLevel3_t *ptr - if self._owner is None and self._ptr != NULL: + if self._owned and self._ptr != NULL: ptr = self._ptr self._ptr = NULL free(ptr) From 8ae081efce2bac2d57249f8c6496d85096e34b85 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 16:00:22 -0500 Subject: [PATCH 08/15] Fix types --- cuda_bindings/cuda/bindings/cufile.pyx | 64 ++++++++------------------ 1 file changed, 18 insertions(+), 46 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index b9991ce3b7..3ff38fefef 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -1112,8 +1112,6 @@ cdef class Descr: cdef: readonly object _data - readonly tuple _handle - def __init__(self, size=1): @@ -1151,13 +1149,6 @@ cdef class Descr: return False return bool((self_data == other._data).all()) - @property - def handle(self): - """_py_anon_pod1: """ - if self._data.size == 1: - return self._handle[0] - return self._handle - @property def type(self): """Union[~_numpy.int32, int]: """ @@ -1169,6 +1160,15 @@ cdef class Descr: def type(self, val): self._data.type = val + @property + def handle(self): + """_py_anon_pod1_dtype: """ + return self._data.handle + + @handle.setter + def handle(self, val): + self._data.handle = val + @property def fs_ops(self): """Union[~_numpy.intp, int]: """ @@ -1215,13 +1215,6 @@ cdef class Descr: raise ValueError("data array must be of dtype descr_dtype") obj._data = data.view(_numpy.recarray) - handle_list = list() - for i in range(obj._data.size): - addr = obj._data.handle[i].__array_interface__['data'][0] - _py_anon_pod1_obj = _py_anon_pod1.from_ptr(addr, owner=obj) - handle_list.append(_py_anon_pod1_obj) - - obj._handle = tuple(handle_list) return obj @staticmethod @@ -1242,13 +1235,6 @@ cdef class Descr: data = _numpy.ndarray(size, buffer=buf, dtype=descr_dtype) obj._data = data.view(_numpy.recarray) - handle_list = list() - for i in range(obj._data.size): - addr = obj._data.handle[i].__array_interface__['data'][0] - _py_anon_pod1_obj = _py_anon_pod1.from_ptr(addr, owner=obj) - handle_list.append(_py_anon_pod1_obj) - - obj._handle = tuple(handle_list) return obj @@ -2045,8 +2031,6 @@ cdef class IOParams: cdef: readonly object _data - readonly tuple _u - def __init__(self, size=1): @@ -2084,13 +2068,6 @@ cdef class IOParams: return False return bool((self_data == other._data).all()) - @property - def u(self): - """_py_anon_pod2: """ - if self._data.size == 1: - return self._u[0] - return self._u - @property def mode(self): """Union[~_numpy.int32, int]: """ @@ -2102,6 +2079,15 @@ cdef class IOParams: def mode(self, val): self._data.mode = val + @property + def u(self): + """_py_anon_pod2_dtype: """ + return self._data.u + + @u.setter + def u(self, val): + self._data.u = val + @property def fh(self): """Union[~_numpy.intp, int]: """ @@ -2170,13 +2156,6 @@ cdef class IOParams: raise ValueError("data array must be of dtype io_params_dtype") obj._data = data.view(_numpy.recarray) - u_list = list() - for i in range(obj._data.size): - addr = obj._data.u[i].__array_interface__['data'][0] - _py_anon_pod2_obj = _py_anon_pod2.from_ptr(addr, owner=obj) - u_list.append(_py_anon_pod2_obj) - - obj._u = tuple(u_list) return obj @staticmethod @@ -2197,13 +2176,6 @@ cdef class IOParams: data = _numpy.ndarray(size, buffer=buf, dtype=io_params_dtype) obj._data = data.view(_numpy.recarray) - u_list = list() - for i in range(obj._data.size): - addr = obj._data.u[i].__array_interface__['data'][0] - _py_anon_pod2_obj = _py_anon_pod2.from_ptr(addr, owner=obj) - u_list.append(_py_anon_pod2_obj) - - obj._u = tuple(u_list) return obj From 57af93d306a85b94e666443f6e47327b95694e7a Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 16:22:22 -0500 Subject: [PATCH 09/15] More bugfixes --- cuda_bindings/cuda/bindings/cufile.pyx | 22 ++++++++++++---------- cuda_bindings/tests/test_cufile.py | 5 +++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 3ff38fefef..dcae3db47e 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -23,15 +23,17 @@ from libc.string cimport memcmp, memcpy import numpy as _numpy -cdef __from_data(data, dtype_name, expected_dtype, lowpp_type_from_ptr): +cdef __from_data(data, dtype_name, expected_dtype, lowpp_type): # _numpy.recarray is a subclass of _numpy.ndarray, so implicitly handled here. + if isinstance(data, lowpp_type): + return data if not isinstance(data, _numpy.ndarray): raise TypeError("data argument must be a NumPy ndarray") if data.size != 1: raise ValueError("data array must have a size of 1") if data.dtype != expected_dtype: raise ValueError(f"data array must be of dtype {dtype_name}") - return lowpp_type_from_ptr(data.ctypes.data, not data.flags.writeable, data) + return lowpp_type.from_ptr(data.ctypes.data, not data.flags.writeable, data) ############################################################################### # POD @@ -135,7 +137,7 @@ cdef class _py_anon_pod1: Args: data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod1_dtype` holding the data. """ - return __from_data(data, "_py_anon_pod1_dtype", _py_anon_pod1_dtype, _py_anon_pod1.from_ptr) + return __from_data(data, "_py_anon_pod1_dtype", _py_anon_pod1_dtype, _py_anon_pod1) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -291,7 +293,7 @@ cdef class _py_anon_pod3: Args: data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod3_dtype` holding the data. """ - return __from_data(data, "_py_anon_pod3_dtype", _py_anon_pod3_dtype, _py_anon_pod3.from_ptr) + return __from_data(data, "_py_anon_pod3_dtype", _py_anon_pod3_dtype, _py_anon_pod3) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -581,7 +583,7 @@ cdef class OpCounter: Args: data (_numpy.ndarray): a single-element array of dtype `op_counter_dtype` holding the data. """ - return __from_data(data, "op_counter_dtype", op_counter_dtype, OpCounter.from_ptr) + return __from_data(data, "op_counter_dtype", op_counter_dtype, OpCounter) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -1053,7 +1055,7 @@ cdef class PerGpuStats: Args: data (_numpy.ndarray): a single-element array of dtype `per_gpu_stats_dtype` holding the data. """ - return __from_data(data, "per_gpu_stats_dtype", per_gpu_stats_dtype, PerGpuStats.from_ptr) + return __from_data(data, "per_gpu_stats_dtype", per_gpu_stats_dtype, PerGpuStats) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -1325,7 +1327,7 @@ cdef class _py_anon_pod2: Args: data (_numpy.ndarray): a single-element array of dtype `_py_anon_pod2_dtype` holding the data. """ - return __from_data(data, "_py_anon_pod2_dtype", _py_anon_pod2_dtype, _py_anon_pod2.from_ptr) + return __from_data(data, "_py_anon_pod2_dtype", _py_anon_pod2_dtype, _py_anon_pod2) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -1970,7 +1972,7 @@ cdef class StatsLevel1: Args: data (_numpy.ndarray): a single-element array of dtype `stats_level1_dtype` holding the data. """ - return __from_data(data, "stats_level1_dtype", stats_level1_dtype, StatsLevel1.from_ptr) + return __from_data(data, "stats_level1_dtype", stats_level1_dtype, StatsLevel1) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -2303,7 +2305,7 @@ cdef class StatsLevel2: Args: data (_numpy.ndarray): a single-element array of dtype `stats_level2_dtype` holding the data. """ - return __from_data(data, "stats_level2_dtype", stats_level2_dtype, StatsLevel2.from_ptr) + return __from_data(data, "stats_level2_dtype", stats_level2_dtype, StatsLevel2) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): @@ -2451,7 +2453,7 @@ cdef class StatsLevel3: Args: data (_numpy.ndarray): a single-element array of dtype `stats_level3_dtype` holding the data. """ - return __from_data(data, "stats_level3_dtype", stats_level3_dtype, StatsLevel3.from_ptr) + return __from_data(data, "stats_level3_dtype", stats_level3_dtype, StatsLevel3) @staticmethod def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 8ac12dfc7c..0afcee031e 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -12,6 +12,7 @@ from functools import cache import cuda.bindings.driver as cuda +import numpy as np import pytest # Configure logging to show INFO level and above @@ -2098,8 +2099,8 @@ def test_get_stats_l2(): # Verify L2 histogram fields contain data # Access numpy array fields: histograms are numpy arrays - read_hist_total = int(stats.read_size_kb_hist.sum()) - write_hist_total = int(stats.write_size_kb_hist.sum()) + read_hist_total = int(np.asarray(stats.read_size_kb_hist).sum()) + write_hist_total = int(np.asarray(stats.write_size_kb_hist).sum()) assert read_hist_total > 0 or write_hist_total > 0, "Expected L2 histogram data" # L2 also contains L1 basic stats - verify using OpCounter class From 67615efc4d55be8cb61c0cec9cf68f0c5cc335b7 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 16:34:56 -0500 Subject: [PATCH 10/15] Make pergpustats an array --- cuda_bindings/cuda/bindings/cufile.pyx | 422 +++++++++++++------------ cuda_bindings/tests/test_cufile.py | 1 - 2 files changed, 215 insertions(+), 208 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index dcae3db47e..4744aaea3a 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -655,432 +655,440 @@ cdef _get_per_gpu_stats_dtype_offsets(): per_gpu_stats_dtype = _get_per_gpu_stats_dtype_offsets() cdef class PerGpuStats: - """Empty-initialize an instance of `CUfilePerGpuStats_t`. + """Empty-initialize an array of `CUfilePerGpuStats_t`. + + The resulting object is of length `size` and of dtype `per_gpu_stats_dtype`. + If default-constructed, the instance represents a single struct. + + Args: + size (int): number of structs, default=1. .. seealso:: `CUfilePerGpuStats_t` """ cdef: - CUfilePerGpuStats_t *_ptr - object _owner - bint _owned - bint _readonly + readonly object _data - def __init__(self): - self._ptr = calloc(1, sizeof(CUfilePerGpuStats_t)) - if self._ptr == NULL: - raise MemoryError("Error allocating PerGpuStats") - self._owner = None - self._owned = True - self._readonly = False - def __dealloc__(self): - cdef CUfilePerGpuStats_t *ptr - if self._owned and self._ptr != NULL: - ptr = self._ptr - self._ptr = NULL - free(ptr) + + def __init__(self, size=1): + arr = _numpy.empty(size, dtype=per_gpu_stats_dtype) + self._data = arr.view(_numpy.recarray) + assert self._data.itemsize == sizeof(CUfilePerGpuStats_t), \ + f"itemsize {self._data.itemsize} mismatches struct size { sizeof(CUfilePerGpuStats_t) }" def __repr__(self): - return f"<{__name__}.PerGpuStats object at {hex(id(self))}>" + if self._data.size > 1: + return f"<{__name__}.PerGpuStats_Array_{self._data.size} object at {hex(id(self))}>" + else: + return f"<{__name__}.PerGpuStats object at {hex(id(self))}>" @property def ptr(self): """Get the pointer address to the data as Python :class:`int`.""" - return (self._ptr) + return self._data.ctypes.data cdef intptr_t _get_ptr(self): - return (self._ptr) + return self._data.ctypes.data def __int__(self): - return (self._ptr) + if self._data.size > 1: + raise TypeError("int() argument must be a bytes-like object of size 1. " + "To get the pointer address of an array, use .ptr") + return self._data.ctypes.data + + def __len__(self): + return self._data.size def __eq__(self, other): - cdef PerGpuStats other_ - if not isinstance(other, PerGpuStats): + cdef object self_data = self._data + if (not isinstance(other, PerGpuStats)) or self_data.size != other._data.size or self_data.dtype != other._data.dtype: return False - other_ = other - return (memcmp((self._ptr), (other_._ptr), sizeof(CUfilePerGpuStats_t)) == 0) - - def __setitem__(self, key, val): - if key == 0 and isinstance(val, _numpy.ndarray): - self._ptr = malloc(sizeof(CUfilePerGpuStats_t)) - if self._ptr == NULL: - raise MemoryError("Error allocating PerGpuStats") - memcpy(self._ptr, val.ctypes.data, sizeof(CUfilePerGpuStats_t)) - self._owner = None - self._owned = True - self._readonly = not val.flags.writeable - else: - setattr(self, key, val) + return bool((self_data == other._data).all()) @property def uuid(self): """~_numpy.int8: (array of length 16).""" - return cpython.PyUnicode_FromString(self._ptr[0].uuid) + return self._data.uuid @uuid.setter def uuid(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - cdef bytes buf = val.encode() - if len(buf) >= 16: - raise ValueError("String too long for field uuid, max length is 15") - cdef char *ptr = buf - memcpy((self._ptr[0].uuid), ptr, 16) + self._data.uuid = val @property def read_bytes(self): - """int: """ - return self._ptr[0].read_bytes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.read_bytes[0]) + return self._data.read_bytes @read_bytes.setter def read_bytes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].read_bytes = val + self._data.read_bytes = val @property def read_bw_bytes_per_sec(self): - """int: """ - return self._ptr[0].read_bw_bytes_per_sec + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.read_bw_bytes_per_sec[0]) + return self._data.read_bw_bytes_per_sec @read_bw_bytes_per_sec.setter def read_bw_bytes_per_sec(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].read_bw_bytes_per_sec = val + self._data.read_bw_bytes_per_sec = val @property def read_utilization(self): - """int: """ - return self._ptr[0].read_utilization + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.read_utilization[0]) + return self._data.read_utilization @read_utilization.setter def read_utilization(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].read_utilization = val + self._data.read_utilization = val @property def read_duration_us(self): - """int: """ - return self._ptr[0].read_duration_us + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.read_duration_us[0]) + return self._data.read_duration_us @read_duration_us.setter def read_duration_us(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].read_duration_us = val + self._data.read_duration_us = val @property def n_total_reads(self): - """int: """ - return self._ptr[0].n_total_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_total_reads[0]) + return self._data.n_total_reads @n_total_reads.setter def n_total_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_total_reads = val + self._data.n_total_reads = val @property def n_p2p_reads(self): - """int: """ - return self._ptr[0].n_p2p_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_p2p_reads[0]) + return self._data.n_p2p_reads @n_p2p_reads.setter def n_p2p_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_p2p_reads = val + self._data.n_p2p_reads = val @property def n_nvfs_reads(self): - """int: """ - return self._ptr[0].n_nvfs_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_nvfs_reads[0]) + return self._data.n_nvfs_reads @n_nvfs_reads.setter def n_nvfs_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_nvfs_reads = val + self._data.n_nvfs_reads = val @property def n_posix_reads(self): - """int: """ - return self._ptr[0].n_posix_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_posix_reads[0]) + return self._data.n_posix_reads @n_posix_reads.setter def n_posix_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_posix_reads = val + self._data.n_posix_reads = val @property def n_unaligned_reads(self): - """int: """ - return self._ptr[0].n_unaligned_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_unaligned_reads[0]) + return self._data.n_unaligned_reads @n_unaligned_reads.setter def n_unaligned_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_unaligned_reads = val + self._data.n_unaligned_reads = val @property def n_dr_reads(self): - """int: """ - return self._ptr[0].n_dr_reads + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_dr_reads[0]) + return self._data.n_dr_reads @n_dr_reads.setter def n_dr_reads(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_dr_reads = val + self._data.n_dr_reads = val @property def n_sparse_regions(self): - """int: """ - return self._ptr[0].n_sparse_regions + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_sparse_regions[0]) + return self._data.n_sparse_regions @n_sparse_regions.setter def n_sparse_regions(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_sparse_regions = val + self._data.n_sparse_regions = val @property def n_inline_regions(self): - """int: """ - return self._ptr[0].n_inline_regions + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_inline_regions[0]) + return self._data.n_inline_regions @n_inline_regions.setter def n_inline_regions(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_inline_regions = val + self._data.n_inline_regions = val @property def n_reads_err(self): - """int: """ - return self._ptr[0].n_reads_err + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_reads_err[0]) + return self._data.n_reads_err @n_reads_err.setter def n_reads_err(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_reads_err = val + self._data.n_reads_err = val @property def writes_bytes(self): - """int: """ - return self._ptr[0].writes_bytes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.writes_bytes[0]) + return self._data.writes_bytes @writes_bytes.setter def writes_bytes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].writes_bytes = val + self._data.writes_bytes = val @property def write_bw_bytes_per_sec(self): - """int: """ - return self._ptr[0].write_bw_bytes_per_sec + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.write_bw_bytes_per_sec[0]) + return self._data.write_bw_bytes_per_sec @write_bw_bytes_per_sec.setter def write_bw_bytes_per_sec(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].write_bw_bytes_per_sec = val + self._data.write_bw_bytes_per_sec = val @property def write_utilization(self): - """int: """ - return self._ptr[0].write_utilization + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.write_utilization[0]) + return self._data.write_utilization @write_utilization.setter def write_utilization(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].write_utilization = val + self._data.write_utilization = val @property def write_duration_us(self): - """int: """ - return self._ptr[0].write_duration_us + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.write_duration_us[0]) + return self._data.write_duration_us @write_duration_us.setter def write_duration_us(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].write_duration_us = val + self._data.write_duration_us = val @property def n_total_writes(self): - """int: """ - return self._ptr[0].n_total_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_total_writes[0]) + return self._data.n_total_writes @n_total_writes.setter def n_total_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_total_writes = val + self._data.n_total_writes = val @property def n_p2p_writes(self): - """int: """ - return self._ptr[0].n_p2p_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_p2p_writes[0]) + return self._data.n_p2p_writes @n_p2p_writes.setter def n_p2p_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_p2p_writes = val + self._data.n_p2p_writes = val @property def n_nvfs_writes(self): - """int: """ - return self._ptr[0].n_nvfs_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_nvfs_writes[0]) + return self._data.n_nvfs_writes @n_nvfs_writes.setter def n_nvfs_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_nvfs_writes = val + self._data.n_nvfs_writes = val @property def n_posix_writes(self): - """int: """ - return self._ptr[0].n_posix_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_posix_writes[0]) + return self._data.n_posix_writes @n_posix_writes.setter def n_posix_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_posix_writes = val + self._data.n_posix_writes = val @property def n_unaligned_writes(self): - """int: """ - return self._ptr[0].n_unaligned_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_unaligned_writes[0]) + return self._data.n_unaligned_writes @n_unaligned_writes.setter def n_unaligned_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_unaligned_writes = val + self._data.n_unaligned_writes = val @property def n_dr_writes(self): - """int: """ - return self._ptr[0].n_dr_writes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_dr_writes[0]) + return self._data.n_dr_writes @n_dr_writes.setter def n_dr_writes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_dr_writes = val + self._data.n_dr_writes = val @property def n_writes_err(self): - """int: """ - return self._ptr[0].n_writes_err + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_writes_err[0]) + return self._data.n_writes_err @n_writes_err.setter def n_writes_err(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_writes_err = val + self._data.n_writes_err = val @property def n_mmap(self): - """int: """ - return self._ptr[0].n_mmap + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_mmap[0]) + return self._data.n_mmap @n_mmap.setter def n_mmap(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_mmap = val + self._data.n_mmap = val @property def n_mmap_ok(self): - """int: """ - return self._ptr[0].n_mmap_ok + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_mmap_ok[0]) + return self._data.n_mmap_ok @n_mmap_ok.setter def n_mmap_ok(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_mmap_ok = val + self._data.n_mmap_ok = val @property def n_mmap_err(self): - """int: """ - return self._ptr[0].n_mmap_err + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_mmap_err[0]) + return self._data.n_mmap_err @n_mmap_err.setter def n_mmap_err(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_mmap_err = val + self._data.n_mmap_err = val @property def n_mmap_free(self): - """int: """ - return self._ptr[0].n_mmap_free + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.n_mmap_free[0]) + return self._data.n_mmap_free @n_mmap_free.setter def n_mmap_free(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].n_mmap_free = val + self._data.n_mmap_free = val @property def reg_bytes(self): - """int: """ - return self._ptr[0].reg_bytes + """Union[~_numpy.uint64, int]: """ + if self._data.size == 1: + return int(self._data.reg_bytes[0]) + return self._data.reg_bytes @reg_bytes.setter def reg_bytes(self, val): - if self._readonly: - raise ValueError("This PerGpuStats instance is read-only") - self._ptr[0].reg_bytes = val + self._data.reg_bytes = val + + def __getitem__(self, key): + cdef ssize_t key_ + cdef ssize_t size + if isinstance(key, int): + key_ = key + size = self._data.size + if key_ >= size or key_ <= -(size+1): + raise IndexError("index is out of bounds") + if key_ < 0: + key_ += size + return PerGpuStats.from_data(self._data[key_:key_+1]) + out = self._data[key] + if isinstance(out, _numpy.recarray) and out.dtype == per_gpu_stats_dtype: + return PerGpuStats.from_data(out) + return out + + def __setitem__(self, key, val): + self._data[key] = val @staticmethod def from_data(data): """Create an PerGpuStats instance wrapping the given NumPy array. Args: - data (_numpy.ndarray): a single-element array of dtype `per_gpu_stats_dtype` holding the data. + data (_numpy.ndarray): a 1D array of dtype `per_gpu_stats_dtype` holding the data. """ - return __from_data(data, "per_gpu_stats_dtype", per_gpu_stats_dtype, PerGpuStats) + cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) + if not isinstance(data, _numpy.ndarray): + raise TypeError("data argument must be a NumPy ndarray") + if data.ndim != 1: + raise ValueError("data array must be 1D") + if data.dtype != per_gpu_stats_dtype: + raise ValueError("data array must be of dtype per_gpu_stats_dtype") + obj._data = data.view(_numpy.recarray) + + return obj @staticmethod - def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + def from_ptr(intptr_t ptr, size_t size=1, bint readonly=False): """Create an PerGpuStats instance wrapping the given pointer. Args: ptr (intptr_t): pointer address as Python :class:`int` to the data. - owner (object): The Python object that owns the pointer. If not provided, data will be copied. + size (int): number of structs, default=1. readonly (bool): whether the data is read-only (to the user). default is `False`. """ if ptr == 0: raise ValueError("ptr must not be null (0)") cdef PerGpuStats obj = PerGpuStats.__new__(PerGpuStats) - if owner is None: - obj._ptr = malloc(sizeof(CUfilePerGpuStats_t)) - if obj._ptr == NULL: - raise MemoryError("Error allocating PerGpuStats") - memcpy((obj._ptr), ptr, sizeof(CUfilePerGpuStats_t)) - obj._owner = None - obj._owned = True - else: - obj._ptr = ptr - obj._owner = owner - obj._owned = False - obj._readonly = readonly + cdef flag = cpython.buffer.PyBUF_READ if readonly else cpython.buffer.PyBUF_WRITE + cdef object buf = cpython.memoryview.PyMemoryView_FromMemory( + ptr, sizeof(CUfilePerGpuStats_t) * size, flag) + data = _numpy.ndarray(size, buffer=buf, dtype=per_gpu_stats_dtype) + obj._data = data.view(_numpy.recarray) + return obj diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 0afcee031e..de8111221f 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -2098,7 +2098,6 @@ def test_get_stats_l2(): cufile.get_stats_l2(stats.ptr) # Verify L2 histogram fields contain data - # Access numpy array fields: histograms are numpy arrays read_hist_total = int(np.asarray(stats.read_size_kb_hist).sum()) write_hist_total = int(np.asarray(stats.write_size_kb_hist).sum()) assert read_hist_total > 0 or write_hist_total > 0, "Expected L2 histogram data" From 72b2a1f4bb88396af658f43ef4ba981a2a4adc8f Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 16:37:16 -0500 Subject: [PATCH 11/15] More fixes --- cuda_bindings/tests/test_cufile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index de8111221f..cf4ace73e7 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -2212,8 +2212,7 @@ def test_get_stats_l3(): # Access per-GPU stats using PerGpuStats class # stats.per_gpu_stats has shape (1, 16), we need to get [0] first to get the (16,) array # then slice [i:i+1] to get a 1-d array view (required by from_data) - per_gpu_array = stats.per_gpu_stats[0] # Get the (16,) array - gpu_stats = cufile.PerGpuStats.from_data(per_gpu_array[i : i + 1]) + gpu_stats = stats.per_gpu_stats[i] # Get the (16,) array if gpu_stats.n_total_reads > 0 or gpu_stats.read_bytes > 0: gpu_with_data = True break From 104a5bb7943a7d0fc7a2e59b2971eddba0f421bb Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 16:47:40 -0500 Subject: [PATCH 12/15] Fix test --- cuda_bindings/tests/test_cufile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index cf4ace73e7..fe78244f13 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -2219,7 +2219,7 @@ def test_get_stats_l3(): # L3 also contains L2 detailed stats (which includes L1 basic stats) detailed_stats = cufile.StatsLevel2.from_data(stats.detailed) - read_hist_total = int(detailed_stats.read_size_kb_hist.sum()) + read_hist_total = int(np.asarray(detailed_stats.read_size_kb_hist).sum()) logging.info( f"L3 Stats: num_gpus={num_gpus}, gpu_with_data={gpu_with_data}, detailed_read_hist={read_hist_total}" From 62f4c44ad28979bb3f81b68d6c2b289671bd8f97 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 2 Dec 2025 17:05:42 -0500 Subject: [PATCH 13/15] Add backward-incompatibility note --- cuda_bindings/docs/source/release/13.X.Y-notes.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/docs/source/release/13.X.Y-notes.rst b/cuda_bindings/docs/source/release/13.X.Y-notes.rst index 34e8303cf7..6f4a071cb4 100644 --- a/cuda_bindings/docs/source/release/13.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/13.X.Y-notes.rst @@ -3,7 +3,7 @@ .. module:: cuda.bindings -``cuda-bindings`` 13.X.Y Release notes +````cuda-bindings```` 13.X.Y Release notes ====================================== @@ -15,10 +15,18 @@ Highlights Bug fixes --------- +Backward incompatible changes +----------------------------- +In ``cuda.bindings.cufile``, the following class members are no longer Numpy arrays, but are Python ``memoryview`` objects. To get Numpy array behavior, pass the result to ``numpy.asarray``: + +- ``StatsLevel2.read_size_kb_hist`` +- ``StatsLevel2.write_size_kb_hist`` + +Additionally, ``cufile.StatsLevel3.per_gpu_stats`` no longer returns a raw array, instead it returns a ``cufile.PerGpuStats`` array. Known issues ------------ -* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``. -* The graphics APIs in ``cuda.bindings.runtime`` are inadvertently disabled in 13.0.2. Users needing these APIs should update to 13.0.3. +* Updating from older versions (v12.6.2.post1 and below) via ````pip install -U cuda-python```` might not work. Please do a clean re-installation by uninstalling ````pip uninstall -y cuda-python```` followed by installing ````pip install cuda-python````. +* The graphics APIs in ````cuda.bindings.runtime```` are inadvertently disabled in 13.0.2. Users needing these APIs should update to 13.0.3. From 1a26c2bded1ddb8d96800dede1b7e6bf02e703c3 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 3 Dec 2025 07:47:25 -0500 Subject: [PATCH 14/15] Improve backward compatibility --- cuda_bindings/cuda/bindings/cufile.pyx | 4 ++-- cuda_bindings/docs/source/release/13.X.Y-notes.rst | 13 ++++--------- cuda_bindings/tests/test_cufile.py | 7 +++---- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index 4744aaea3a..338f2cb8a6 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -2281,7 +2281,7 @@ cdef class StatsLevel2: """~_numpy.uint64: (array of length 32).""" cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c", allocate_buffer=False) arr.data = (&(self._ptr[0].read_size_kb_hist)) - return arr + return _numpy.asarray(arr) @read_size_kb_hist.setter def read_size_kb_hist(self, val): @@ -2296,7 +2296,7 @@ cdef class StatsLevel2: """~_numpy.uint64: (array of length 32).""" cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c", allocate_buffer=False) arr.data = (&(self._ptr[0].write_size_kb_hist)) - return arr + return _numpy.asarray(arr) @write_size_kb_hist.setter def write_size_kb_hist(self, val): diff --git a/cuda_bindings/docs/source/release/13.X.Y-notes.rst b/cuda_bindings/docs/source/release/13.X.Y-notes.rst index 6f4a071cb4..00278667ec 100644 --- a/cuda_bindings/docs/source/release/13.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/13.X.Y-notes.rst @@ -3,7 +3,7 @@ .. module:: cuda.bindings -````cuda-bindings```` 13.X.Y Release notes +``cuda-bindings`` 13.X.Y Release notes ====================================== @@ -18,15 +18,10 @@ Bug fixes Backward incompatible changes ----------------------------- -In ``cuda.bindings.cufile``, the following class members are no longer Numpy arrays, but are Python ``memoryview`` objects. To get Numpy array behavior, pass the result to ``numpy.asarray``: - -- ``StatsLevel2.read_size_kb_hist`` -- ``StatsLevel2.write_size_kb_hist`` - -Additionally, ``cufile.StatsLevel3.per_gpu_stats`` no longer returns a raw array, instead it returns a ``cufile.PerGpuStats`` array. +Additionally, ``cufile.StatsLevel3.per_gpu_stats`` no longer returns a singleton that must be manually converted to a Numpy array, instead it returns a ``cufile.PerGpuStats`` array which is directly addressable. Known issues ------------ -* Updating from older versions (v12.6.2.post1 and below) via ````pip install -U cuda-python```` might not work. Please do a clean re-installation by uninstalling ````pip uninstall -y cuda-python```` followed by installing ````pip install cuda-python````. -* The graphics APIs in ````cuda.bindings.runtime```` are inadvertently disabled in 13.0.2. Users needing these APIs should update to 13.0.3. +* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``. +* The graphics APIs in ``cuda.bindings.runtime`` are inadvertently disabled in 13.0.2. Users needing these APIs should update to 13.0.3. diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index fe78244f13..8643d87afb 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -12,7 +12,6 @@ from functools import cache import cuda.bindings.driver as cuda -import numpy as np import pytest # Configure logging to show INFO level and above @@ -2098,8 +2097,8 @@ def test_get_stats_l2(): cufile.get_stats_l2(stats.ptr) # Verify L2 histogram fields contain data - read_hist_total = int(np.asarray(stats.read_size_kb_hist).sum()) - write_hist_total = int(np.asarray(stats.write_size_kb_hist).sum()) + read_hist_total = int(stats.read_size_kb_hist.sum()) + write_hist_total = int(stats.write_size_kb_hist.sum()) assert read_hist_total > 0 or write_hist_total > 0, "Expected L2 histogram data" # L2 also contains L1 basic stats - verify using OpCounter class @@ -2219,7 +2218,7 @@ def test_get_stats_l3(): # L3 also contains L2 detailed stats (which includes L1 basic stats) detailed_stats = cufile.StatsLevel2.from_data(stats.detailed) - read_hist_total = int(np.asarray(detailed_stats.read_size_kb_hist).sum()) + read_hist_total = int(detailed_stats.read_size_kb_hist.sum()) logging.info( f"L3 Stats: num_gpus={num_gpus}, gpu_with_data={gpu_with_data}, detailed_read_hist={read_hist_total}" From b6f6f7d27260f4b56e2fd6823df9d990a2526840 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 3 Dec 2025 07:48:31 -0500 Subject: [PATCH 15/15] Restore comment --- cuda_bindings/tests/test_cufile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/tests/test_cufile.py b/cuda_bindings/tests/test_cufile.py index 8643d87afb..58cce848aa 100644 --- a/cuda_bindings/tests/test_cufile.py +++ b/cuda_bindings/tests/test_cufile.py @@ -2097,6 +2097,7 @@ def test_get_stats_l2(): cufile.get_stats_l2(stats.ptr) # Verify L2 histogram fields contain data + # Access numpy array fields: histograms are numpy arrays read_hist_total = int(stats.read_size_kb_hist.sum()) write_hist_total = int(stats.write_size_kb_hist.sum()) assert read_hist_total > 0 or write_hist_total > 0, "Expected L2 histogram data"