From 974f2704a762670cd97071c9689f48812eef586a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabien=20P=C3=A9an?= Date: Wed, 1 Apr 2026 11:16:57 +0200 Subject: [PATCH] Add support for indexed arrays in structs and related kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fabien Péan --- CHANGELOG.md | 2 + warp/_src/codegen.py | 41 ++++++++ warp/_src/types.py | 35 ++++++- warp/native/array.h | 4 + warp/tests/test_indexedarray.py | 162 ++++++++++++++++++++++++++++++++ 5 files changed, 243 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36347cb432..299979e118 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ based on occupancy ([GH-1270](https://github.com/NVIDIA/warp/issues/1270)). - Add `module_options` dict parameter to `@wp.kernel` for inline module-level compilation options on `"unique"` modules ([GH-1250](https://github.com/NVIDIA/warp/issues/1250)). +- Add support for `wp.indexedarray` fields in `@wp.struct` (assignment, device transfer, and NumPy structured values) + ([GH-1327](https://github.com/NVIDIA/warp/issues/1327)). ### Removed diff --git a/warp/_src/codegen.py b/warp/_src/codegen.py index ff2e09beb8..f0b1db9738 100644 --- a/warp/_src/codegen.py +++ b/warp/_src/codegen.py @@ -237,6 +237,11 @@ def to(self, device): if matches_array_class(var.type, array): # array_t setattr(dst, name, value.to(device)) + elif matches_array_class(var.type, indexedarray): + # indexedarray_t + # `.to` returns an array if on different device, force to identity indexedarray + cloned = value.to(device) + setattr(dst, name, cloned if isinstance(cloned, indexedarray) else indexedarray(cloned)) elif isinstance(var.type, Struct): # nested struct new_struct = var.type() @@ -264,6 +269,9 @@ def numpy_value(self): if matches_array_class(var.type, array): # array_t npvalue.append(value.numpy_value()) + elif matches_array_class(var.type, indexedarray): + # indexedarray_t + npvalue.append(value.numpy_value()) elif isinstance(var.type, Struct): # nested struct npvalue.append(value.numpy_value()) @@ -299,6 +307,8 @@ def _make_struct_field_constructor(field: str, var_type: type): return lambda ctype: var_type.instance_type(ctype=getattr(ctype, field)) elif matches_array_class(var_type, warp._src.types.array): return lambda ctype: None + elif matches_array_class(var_type, warp._src.types.indexedarray): + return lambda ctype: None elif _is_texture_type(var_type): return lambda ctype: None elif issubclass(var_type, ctypes.Array): @@ -329,6 +339,27 @@ def set_array_value(inst, value): cls.__setattr__(inst, field, value) + def set_indexedarray_value(inst, value): + if value is None: + setattr(inst._ctype, field, var_type.__ctype__()) + else: + assert isinstance(value, indexedarray) + assert types_equal(value.dtype, var_type.dtype), ( + f"assign to struct member variable {field} failed, expected type {type_repr(var_type.dtype)}, got type {type_repr(value.dtype)}" + ) + setattr(inst._ctype, field, value.__ctype__()) + + # workaround to prevent gradient buffers being garbage collected + # (indexedarray_t embeds an array_t) + grad_attr = "_" + field + "_grad" + if value is not None and value.data is not None and value.data.requires_grad: + cls.__setattr__(inst, grad_attr, value.data.grad) + else: + # clear any previous keepalive + cls.__setattr__(inst, grad_attr, None) + + cls.__setattr__(inst, field, value) + def set_struct_value(inst, value): getattr(inst, field).assign(value) @@ -388,6 +419,8 @@ def set_texture_value(inst, value): if matches_array_class(var_type, array): return set_array_value + elif matches_array_class(var_type, indexedarray): + return set_indexedarray_value elif isinstance(var_type, Struct): return set_struct_value elif _is_texture_type(var_type): @@ -418,6 +451,8 @@ def __init__(self, key: str, cls: type, module: warp._src.context.Module): for label, var in self.vars.items(): if matches_array_class(var.type, array): fields.append((label, array_t)) + elif matches_array_class(var.type, indexedarray): + fields.append((label, indexedarray_t)) elif isinstance(var.type, Struct): fields.append((label, var.type.ctype)) elif issubclass(var.type, ctypes.Array): @@ -533,6 +568,9 @@ def numpy_dtype(self): if matches_array_class(var.type, array): # array_t formats.append(array_t.numpy_dtype()) + elif matches_array_class(var.type, indexedarray): + # indexedarray_t + formats.append(indexedarray_t.numpy_dtype()) elif isinstance(var.type, Struct): # nested struct formats.append(var.type.numpy_dtype()) @@ -566,6 +604,9 @@ def from_ptr(self, ptr): # no easy way to make a backref. # Instead, we just create a stub annotation, which is not a fully usable array object. setattr(instance, name, array(dtype=var.type.dtype, ndim=var.type.ndim)) + elif matches_array_class(var.type, indexedarray): + # Same as regular arrays: return an annotation stub only. + setattr(instance, name, indexedarray(dtype=var.type.dtype, ndim=var.type.ndim)) elif isinstance(var.type, Struct): # nested struct value = var.type.from_ptr(ptr + offset) diff --git a/warp/_src/types.py b/warp/_src/types.py index 918bd1a38c..56136d1559 100644 --- a/warp/_src/types.py +++ b/warp/_src/types.py @@ -2184,7 +2184,8 @@ class indexedarray_t(ctypes.Structure): def __init__(self, data, indices, shape): if data is None: - self.data = array().__ctype__() + ndim = len(shape) + self.data = array_t(data=0, grad=0, ndim=ndim, shape=(0,) * ndim, strides=(0,) * ndim) for i in range(ARRAY_MAX_DIMS): self.indices[i] = ctypes.c_void_p(None) self.shape[i] = 0 @@ -2197,6 +2198,38 @@ def __init__(self, data, indices, shape): self.indices[i] = ctypes.c_void_p(None) self.shape[i] = shape[i] + # structured type description used when indexedarray_t is packed in a struct and shared via numpy structured array. + @classmethod + def numpy_dtype(cls): + return cls._numpy_dtype_ + + # structured value used when indexedarray_t is packed in a struct and shared via a numpy structured array + def numpy_value(self): + # pointers are represented as unsigned 64-bit integers + indices = [] + for i in range(ARRAY_MAX_DIMS): + v = self.indices[i] + # v may be a ctypes.c_void_p instance + if isinstance(v, ctypes.c_void_p): + indices.append(0 if v.value is None else int(v.value)) + else: + indices.append(0 if v is None else int(v)) + + return (self.data.numpy_value(), indices, list(self.shape)) + + +# NOTE: must match indexedarray_t._fields_ +indexedarray_t._numpy_dtype_ = { + "names": ["data", "indices", "shape"], + "formats": [array_t.numpy_dtype(), f"{ARRAY_MAX_DIMS}u8", f"{ARRAY_MAX_DIMS}i4"], + "offsets": [ + indexedarray_t.data.offset, + indexedarray_t.indices.offset, + indexedarray_t.shape.offset, + ], + "itemsize": ctypes.sizeof(indexedarray_t), +} + class tuple_t: """Used during codegen to store multiple values into a single variable.""" diff --git a/warp/native/array.h b/warp/native/array.h index d6e38b090b..417a352c47 100644 --- a/warp/native/array.h +++ b/warp/native/array.h @@ -1275,6 +1275,10 @@ CUDA_CALLABLE inline void adj_where( // atomic add the whole struct onto an array (e.g.: during backwards pass) template CUDA_CALLABLE inline void atomic_add(array_t*, array_t) { } +// stub for the case where we have an indexed array inside a struct and +// atomic add the whole struct onto an array (e.g.: during backwards pass) +template CUDA_CALLABLE inline void atomic_add(indexedarray_t*, indexedarray_t) { } + // for float and vector types this is just an alias for an atomic add template CUDA_CALLABLE inline void adj_atomic_add(T* buf, T value) { atomic_add(buf, value); } diff --git a/warp/tests/test_indexedarray.py b/warp/tests/test_indexedarray.py index 760064d07d..7b8b6a8784 100644 --- a/warp/tests/test_indexedarray.py +++ b/warp/tests/test_indexedarray.py @@ -42,6 +42,152 @@ def test_indexedarray_1d(test, device): wp.launch(kernel_1d, dim=iarr.size, inputs=[iarr, expected_arr], device=device) +@wp.struct +class IndexedArrayStruct: + iarr: wp.indexedarray(dtype=float) + + +@wp.struct +class NestedIndexedArrayStruct: + inner: IndexedArrayStruct + + +@wp.kernel +def kernel_indexedarray_in_struct(arg: IndexedArrayStruct, expected: wp.array(dtype=float)): + i = wp.tid() + + wp.expect_eq(arg.iarr[i], expected[i]) + + arg.iarr[i] = 2.0 * arg.iarr[i] + wp.atomic_add(arg.iarr, i, 1.0) + + wp.expect_eq(arg.iarr[i], 2.0 * expected[i] + 1.0) + + +@wp.kernel +def kernel_indexedarray_in_nested_struct(arg: NestedIndexedArrayStruct, expected: wp.array(dtype=float)): + i = wp.tid() + + wp.expect_eq(arg.inner.iarr[i], expected[i]) + + arg.inner.iarr[i] = 2.0 * arg.inner.iarr[i] + wp.atomic_add(arg.inner.iarr, i, 1.0) + + wp.expect_eq(arg.inner.iarr[i], 2.0 * expected[i] + 1.0) + + +@wp.kernel +def kernel_indexedarray_in_struct_array(args: wp.array(dtype=IndexedArrayStruct), expected: wp.array(dtype=float)): + i = wp.tid() + + s = args[0] + wp.expect_eq(s.iarr[i], expected[i]) + + s.iarr[i] = 2.0 * s.iarr[i] + wp.atomic_add(s.iarr, i, 1.0) + + wp.expect_eq(s.iarr[i], 2.0 * expected[i] + 1.0) + + +def test_indexedarray_in_struct(test, device): + values = np.arange(10, dtype=np.float32) + arr = wp.array(data=values, device=device) + + indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device) + iarr = wp.indexedarray1d(arr, [indices]) + + expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device) + + s = IndexedArrayStruct() + s.iarr = iarr + + wp.launch(kernel_indexedarray_in_struct, dim=iarr.size, inputs=[s, expected_arr], device=device) + wp.synchronize_device(device) + + +def test_indexedarray_in_nested_struct(test, device): + values = np.arange(10, dtype=np.float32) + arr = wp.array(data=values, device=device) + + indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device) + iarr = wp.indexedarray1d(arr, [indices]) + + expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device) + + inner = IndexedArrayStruct() + inner.iarr = iarr + + outer = NestedIndexedArrayStruct() + outer.inner = inner + + wp.launch(kernel_indexedarray_in_nested_struct, dim=iarr.size, inputs=[outer, expected_arr], device=device) + wp.synchronize_device(device) + + +def test_indexedarray_in_struct_array(test, device): + values = np.arange(10, dtype=np.float32) + arr = wp.array(data=values, device=device) + + indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device) + iarr = wp.indexedarray1d(arr, [indices]) + + expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device) + + s = IndexedArrayStruct() + s.iarr = iarr + struct_arr = wp.array([s], dtype=IndexedArrayStruct, device=device) + + wp.launch(kernel_indexedarray_in_struct_array, dim=iarr.size, inputs=[struct_arr, expected_arr], device=device) + wp.synchronize_device(device) + + +def test_indexedarray_in_struct_numpy(test, device): + values = np.arange(4, dtype=np.float32) + arr = wp.array(data=values, device=device) + + indices = wp.array([0, 2], dtype=int, device=device) + iarr = wp.indexedarray1d(arr, [indices]) + + s = IndexedArrayStruct() + s.iarr = iarr + + # Just ensure these are functional for structs embedding indexedarray_t + dtype = IndexedArrayStruct.numpy_dtype() + value = s.numpy_value() + + test.assertIsInstance(dtype, dict) + test.assertEqual(dtype["names"], ["iarr"]) + test.assertEqual(len(value), 1) + + +def test_indexedarray_in_struct_to_device_transfer(test, device): + # This test only applies to CUDA target devices. + if not wp.is_cuda_available() or not wp.get_device(device).is_cuda: + test.skipTest("Requires CUDA") + + # Create the indexedarray on CPU, then move the struct to CUDA. + values = np.arange(10, dtype=np.float32) + arr_cpu = wp.array(data=values, device="cpu") + indices_cpu = wp.array([1, 3, 5, 7, 9], dtype=int, device="cpu") + iarr_cpu = wp.indexedarray1d(arr_cpu, [indices_cpu]) + + s = IndexedArrayStruct() + s.iarr = iarr_cpu + + s_cuda = s.to(device) + test.assertIsInstance(s_cuda.iarr, wp.indexedarray) + test.assertTrue(all(x is None for x in s_cuda.iarr.indices)) + test.assertEqual(s_cuda.iarr.shape, iarr_cpu.shape) + + expected_values = np.array([1, 3, 5, 7, 9], dtype=np.float32) + expected_arr = wp.array(data=expected_values, dtype=float, device=device) + + wp.launch(kernel_indexedarray_in_struct, dim=s_cuda.iarr.size, inputs=[s_cuda, expected_arr], device=device) + # After the kernel: a[i] = 2*a[i] then atomic_add(a, i, 1) => 2*expected + 1 + result = s_cuda.iarr.numpy() + assert_np_equal(result, 2.0 * expected_values + 1.0) + + @wp.kernel def kernel_2d(a: wp.indexedarray2d(dtype=float), expected: wp.array2d(dtype=float)): i, j = wp.tid() @@ -1121,6 +1267,22 @@ class TestIndexedArray(unittest.TestCase): add_function_test(TestIndexedArray, "test_indexedarray_fill_vector", test_indexedarray_fill_vector, devices=devices) add_function_test(TestIndexedArray, "test_indexedarray_fill_matrix", test_indexedarray_fill_matrix, devices=devices) add_function_test(TestIndexedArray, "test_indexedarray_fill_struct", test_indexedarray_fill_struct, devices=devices) +add_function_test(TestIndexedArray, "test_indexedarray_in_struct", test_indexedarray_in_struct, devices=devices) +add_function_test( + TestIndexedArray, "test_indexedarray_in_nested_struct", test_indexedarray_in_nested_struct, devices=devices +) +add_function_test( + TestIndexedArray, "test_indexedarray_in_struct_array", test_indexedarray_in_struct_array, devices=devices +) +add_function_test( + TestIndexedArray, "test_indexedarray_in_struct_numpy", test_indexedarray_in_struct_numpy, devices=devices +) +add_function_test( + TestIndexedArray, + "test_indexedarray_in_struct_to_device_transfer", + test_indexedarray_in_struct_to_device_transfer, + devices=devices, +) if __name__ == "__main__":