From 974f2704a762670cd97071c9689f48812eef586a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabien=20P=C3=A9an?= <pean@virtonomy.io>
Date: Wed, 1 Apr 2026 11:16:57 +0200
Subject: [PATCH] Add support for indexed arrays in structs and related kernels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Fabien Péan <pean@virtonomy.io>
---
 CHANGELOG.md                    |   2 +
 warp/_src/codegen.py            |  41 ++++++++
 warp/_src/types.py              |  35 ++++++-
 warp/native/array.h             |   4 +
 warp/tests/test_indexedarray.py | 162 ++++++++++++++++++++++++++++++++
 5 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36347cb432..299979e118 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,8 @@
   based on occupancy ([GH-1270](https://github.com/NVIDIA/warp/issues/1270)).
 - Add `module_options` dict parameter to `@wp.kernel` for inline module-level compilation options
   on `"unique"` modules ([GH-1250](https://github.com/NVIDIA/warp/issues/1250)).
+- Add support for `wp.indexedarray` fields in `@wp.struct` (assignment, device transfer, and NumPy structured values)
+  ([GH-1327](https://github.com/NVIDIA/warp/issues/1327)).
 
 ### Removed
 
diff --git a/warp/_src/codegen.py b/warp/_src/codegen.py
index ff2e09beb8..f0b1db9738 100644
--- a/warp/_src/codegen.py
+++ b/warp/_src/codegen.py
@@ -237,6 +237,11 @@ def to(self, device):
             if matches_array_class(var.type, array):
                 # array_t
                 setattr(dst, name, value.to(device))
+            elif matches_array_class(var.type, indexedarray):
+                # indexedarray_t
+                # `.to` returns an array if on different device, force to identity indexedarray
+                cloned = value.to(device)
+                setattr(dst, name, cloned if isinstance(cloned, indexedarray) else indexedarray(cloned))
             elif isinstance(var.type, Struct):
                 # nested struct
                 new_struct = var.type()
@@ -264,6 +269,9 @@ def numpy_value(self):
             if matches_array_class(var.type, array):
                 # array_t
                 npvalue.append(value.numpy_value())
+            elif matches_array_class(var.type, indexedarray):
+                # indexedarray_t
+                npvalue.append(value.numpy_value())
             elif isinstance(var.type, Struct):
                 # nested struct
                 npvalue.append(value.numpy_value())
@@ -299,6 +307,8 @@ def _make_struct_field_constructor(field: str, var_type: type):
         return lambda ctype: var_type.instance_type(ctype=getattr(ctype, field))
     elif matches_array_class(var_type, warp._src.types.array):
         return lambda ctype: None
+    elif matches_array_class(var_type, warp._src.types.indexedarray):
+        return lambda ctype: None
     elif _is_texture_type(var_type):
         return lambda ctype: None
     elif issubclass(var_type, ctypes.Array):
@@ -329,6 +339,27 @@ def set_array_value(inst, value):
 
         cls.__setattr__(inst, field, value)
 
+    def set_indexedarray_value(inst, value):
+        if value is None:
+            setattr(inst._ctype, field, var_type.__ctype__())
+        else:
+            assert isinstance(value, indexedarray)
+            assert types_equal(value.dtype, var_type.dtype), (
+                f"assign to struct member variable {field} failed, expected type {type_repr(var_type.dtype)}, got type {type_repr(value.dtype)}"
+            )
+            setattr(inst._ctype, field, value.__ctype__())
+
+        # workaround to prevent gradient buffers being garbage collected
+        # (indexedarray_t embeds an array_t)
+        grad_attr = "_" + field + "_grad"
+        if value is not None and value.data is not None and value.data.requires_grad:
+            cls.__setattr__(inst, grad_attr, value.data.grad)
+        else:
+            # clear any previous keepalive
+            cls.__setattr__(inst, grad_attr, None)
+
+        cls.__setattr__(inst, field, value)
+
     def set_struct_value(inst, value):
         getattr(inst, field).assign(value)
 
@@ -388,6 +419,8 @@ def set_texture_value(inst, value):
 
     if matches_array_class(var_type, array):
         return set_array_value
+    elif matches_array_class(var_type, indexedarray):
+        return set_indexedarray_value
     elif isinstance(var_type, Struct):
         return set_struct_value
     elif _is_texture_type(var_type):
@@ -418,6 +451,8 @@ def __init__(self, key: str, cls: type, module: warp._src.context.Module):
         for label, var in self.vars.items():
             if matches_array_class(var.type, array):
                 fields.append((label, array_t))
+            elif matches_array_class(var.type, indexedarray):
+                fields.append((label, indexedarray_t))
             elif isinstance(var.type, Struct):
                 fields.append((label, var.type.ctype))
             elif issubclass(var.type, ctypes.Array):
@@ -533,6 +568,9 @@ def numpy_dtype(self):
             if matches_array_class(var.type, array):
                 # array_t
                 formats.append(array_t.numpy_dtype())
+            elif matches_array_class(var.type, indexedarray):
+                # indexedarray_t
+                formats.append(indexedarray_t.numpy_dtype())
             elif isinstance(var.type, Struct):
                 # nested struct
                 formats.append(var.type.numpy_dtype())
@@ -566,6 +604,9 @@ def from_ptr(self, ptr):
                 # no easy way to make a backref.
                 # Instead, we just create a stub annotation, which is not a fully usable array object.
                 setattr(instance, name, array(dtype=var.type.dtype, ndim=var.type.ndim))
+            elif matches_array_class(var.type, indexedarray):
+                # Same as regular arrays: return an annotation stub only.
+                setattr(instance, name, indexedarray(dtype=var.type.dtype, ndim=var.type.ndim))
             elif isinstance(var.type, Struct):
                 # nested struct
                 value = var.type.from_ptr(ptr + offset)
diff --git a/warp/_src/types.py b/warp/_src/types.py
index 918bd1a38c..56136d1559 100644
--- a/warp/_src/types.py
+++ b/warp/_src/types.py
@@ -2184,7 +2184,8 @@ class indexedarray_t(ctypes.Structure):
 
     def __init__(self, data, indices, shape):
         if data is None:
-            self.data = array().__ctype__()
+            ndim = len(shape)
+            self.data = array_t(data=0, grad=0, ndim=ndim, shape=(0,) * ndim, strides=(0,) * ndim)
             for i in range(ARRAY_MAX_DIMS):
                 self.indices[i] = ctypes.c_void_p(None)
                 self.shape[i] = 0
@@ -2197,6 +2198,38 @@ def __init__(self, data, indices, shape):
                     self.indices[i] = ctypes.c_void_p(None)
                 self.shape[i] = shape[i]
 
+    # structured type description used when indexedarray_t is packed in a struct and shared via numpy structured array.
+    @classmethod
+    def numpy_dtype(cls):
+        return cls._numpy_dtype_
+
+    # structured value used when indexedarray_t is packed in a struct and shared via a numpy structured array
+    def numpy_value(self):
+        # pointers are represented as unsigned 64-bit integers
+        indices = []
+        for i in range(ARRAY_MAX_DIMS):
+            v = self.indices[i]
+            # v may be a ctypes.c_void_p instance
+            if isinstance(v, ctypes.c_void_p):
+                indices.append(0 if v.value is None else int(v.value))
+            else:
+                indices.append(0 if v is None else int(v))
+
+        return (self.data.numpy_value(), indices, list(self.shape))
+
+
+# NOTE: must match indexedarray_t._fields_
+indexedarray_t._numpy_dtype_ = {
+    "names": ["data", "indices", "shape"],
+    "formats": [array_t.numpy_dtype(), f"{ARRAY_MAX_DIMS}u8", f"{ARRAY_MAX_DIMS}i4"],
+    "offsets": [
+        indexedarray_t.data.offset,
+        indexedarray_t.indices.offset,
+        indexedarray_t.shape.offset,
+    ],
+    "itemsize": ctypes.sizeof(indexedarray_t),
+}
+
 
 class tuple_t:
     """Used during codegen to store multiple values into a single variable."""
diff --git a/warp/native/array.h b/warp/native/array.h
index d6e38b090b..417a352c47 100644
--- a/warp/native/array.h
+++ b/warp/native/array.h
@@ -1275,6 +1275,10 @@ CUDA_CALLABLE inline void adj_where(
 // atomic add the whole struct onto an array (e.g.: during backwards pass)
 template <typename T> CUDA_CALLABLE inline void atomic_add(array_t<T>*, array_t<T>) { }
 
+// stub for the case where we have an indexed array inside a struct and
+// atomic add the whole struct onto an array (e.g.: during backwards pass)
+template <typename T> CUDA_CALLABLE inline void atomic_add(indexedarray_t<T>*, indexedarray_t<T>) { }
+
 // for float and vector types this is just an alias for an atomic add
 template <typename T> CUDA_CALLABLE inline void adj_atomic_add(T* buf, T value) { atomic_add(buf, value); }
 
diff --git a/warp/tests/test_indexedarray.py b/warp/tests/test_indexedarray.py
index 760064d07d..7b8b6a8784 100644
--- a/warp/tests/test_indexedarray.py
+++ b/warp/tests/test_indexedarray.py
@@ -42,6 +42,152 @@ def test_indexedarray_1d(test, device):
     wp.launch(kernel_1d, dim=iarr.size, inputs=[iarr, expected_arr], device=device)
 
 
+@wp.struct
+class IndexedArrayStruct:
+    iarr: wp.indexedarray(dtype=float)
+
+
+@wp.struct
+class NestedIndexedArrayStruct:
+    inner: IndexedArrayStruct
+
+
+@wp.kernel
+def kernel_indexedarray_in_struct(arg: IndexedArrayStruct, expected: wp.array(dtype=float)):
+    i = wp.tid()
+
+    wp.expect_eq(arg.iarr[i], expected[i])
+
+    arg.iarr[i] = 2.0 * arg.iarr[i]
+    wp.atomic_add(arg.iarr, i, 1.0)
+
+    wp.expect_eq(arg.iarr[i], 2.0 * expected[i] + 1.0)
+
+
+@wp.kernel
+def kernel_indexedarray_in_nested_struct(arg: NestedIndexedArrayStruct, expected: wp.array(dtype=float)):
+    i = wp.tid()
+
+    wp.expect_eq(arg.inner.iarr[i], expected[i])
+
+    arg.inner.iarr[i] = 2.0 * arg.inner.iarr[i]
+    wp.atomic_add(arg.inner.iarr, i, 1.0)
+
+    wp.expect_eq(arg.inner.iarr[i], 2.0 * expected[i] + 1.0)
+
+
+@wp.kernel
+def kernel_indexedarray_in_struct_array(args: wp.array(dtype=IndexedArrayStruct), expected: wp.array(dtype=float)):
+    i = wp.tid()
+
+    s = args[0]
+    wp.expect_eq(s.iarr[i], expected[i])
+
+    s.iarr[i] = 2.0 * s.iarr[i]
+    wp.atomic_add(s.iarr, i, 1.0)
+
+    wp.expect_eq(s.iarr[i], 2.0 * expected[i] + 1.0)
+
+
+def test_indexedarray_in_struct(test, device):
+    values = np.arange(10, dtype=np.float32)
+    arr = wp.array(data=values, device=device)
+
+    indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+    iarr = wp.indexedarray1d(arr, [indices])
+
+    expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+
+    s = IndexedArrayStruct()
+    s.iarr = iarr
+
+    wp.launch(kernel_indexedarray_in_struct, dim=iarr.size, inputs=[s, expected_arr], device=device)
+    wp.synchronize_device(device)
+
+
+def test_indexedarray_in_nested_struct(test, device):
+    values = np.arange(10, dtype=np.float32)
+    arr = wp.array(data=values, device=device)
+
+    indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+    iarr = wp.indexedarray1d(arr, [indices])
+
+    expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+
+    inner = IndexedArrayStruct()
+    inner.iarr = iarr
+
+    outer = NestedIndexedArrayStruct()
+    outer.inner = inner
+
+    wp.launch(kernel_indexedarray_in_nested_struct, dim=iarr.size, inputs=[outer, expected_arr], device=device)
+    wp.synchronize_device(device)
+
+
+def test_indexedarray_in_struct_array(test, device):
+    values = np.arange(10, dtype=np.float32)
+    arr = wp.array(data=values, device=device)
+
+    indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+    iarr = wp.indexedarray1d(arr, [indices])
+
+    expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+
+    s = IndexedArrayStruct()
+    s.iarr = iarr
+    struct_arr = wp.array([s], dtype=IndexedArrayStruct, device=device)
+
+    wp.launch(kernel_indexedarray_in_struct_array, dim=iarr.size, inputs=[struct_arr, expected_arr], device=device)
+    wp.synchronize_device(device)
+
+
+def test_indexedarray_in_struct_numpy(test, device):
+    values = np.arange(4, dtype=np.float32)
+    arr = wp.array(data=values, device=device)
+
+    indices = wp.array([0, 2], dtype=int, device=device)
+    iarr = wp.indexedarray1d(arr, [indices])
+
+    s = IndexedArrayStruct()
+    s.iarr = iarr
+
+    # Just ensure these are functional for structs embedding indexedarray_t
+    dtype = IndexedArrayStruct.numpy_dtype()
+    value = s.numpy_value()
+
+    test.assertIsInstance(dtype, dict)
+    test.assertEqual(dtype["names"], ["iarr"])
+    test.assertEqual(len(value), 1)
+
+
+def test_indexedarray_in_struct_to_device_transfer(test, device):
+    # This test only applies to CUDA target devices.
+    if not wp.is_cuda_available() or not wp.get_device(device).is_cuda:
+        test.skipTest("Requires CUDA")
+
+    # Create the indexedarray on CPU, then move the struct to CUDA.
+    values = np.arange(10, dtype=np.float32)
+    arr_cpu = wp.array(data=values, device="cpu")
+    indices_cpu = wp.array([1, 3, 5, 7, 9], dtype=int, device="cpu")
+    iarr_cpu = wp.indexedarray1d(arr_cpu, [indices_cpu])
+
+    s = IndexedArrayStruct()
+    s.iarr = iarr_cpu
+
+    s_cuda = s.to(device)
+    test.assertIsInstance(s_cuda.iarr, wp.indexedarray)
+    test.assertTrue(all(x is None for x in s_cuda.iarr.indices))
+    test.assertEqual(s_cuda.iarr.shape, iarr_cpu.shape)
+
+    expected_values = np.array([1, 3, 5, 7, 9], dtype=np.float32)
+    expected_arr = wp.array(data=expected_values, dtype=float, device=device)
+
+    wp.launch(kernel_indexedarray_in_struct, dim=s_cuda.iarr.size, inputs=[s_cuda, expected_arr], device=device)
+    # After the kernel: a[i] = 2*a[i] then atomic_add(a, i, 1) => 2*expected + 1
+    result = s_cuda.iarr.numpy()
+    assert_np_equal(result, 2.0 * expected_values + 1.0)
+
+
 @wp.kernel
 def kernel_2d(a: wp.indexedarray2d(dtype=float), expected: wp.array2d(dtype=float)):
     i, j = wp.tid()
@@ -1121,6 +1267,22 @@ class TestIndexedArray(unittest.TestCase):
 add_function_test(TestIndexedArray, "test_indexedarray_fill_vector", test_indexedarray_fill_vector, devices=devices)
 add_function_test(TestIndexedArray, "test_indexedarray_fill_matrix", test_indexedarray_fill_matrix, devices=devices)
 add_function_test(TestIndexedArray, "test_indexedarray_fill_struct", test_indexedarray_fill_struct, devices=devices)
+add_function_test(TestIndexedArray, "test_indexedarray_in_struct", test_indexedarray_in_struct, devices=devices)
+add_function_test(
+    TestIndexedArray, "test_indexedarray_in_nested_struct", test_indexedarray_in_nested_struct, devices=devices
+)
+add_function_test(
+    TestIndexedArray, "test_indexedarray_in_struct_array", test_indexedarray_in_struct_array, devices=devices
+)
+add_function_test(
+    TestIndexedArray, "test_indexedarray_in_struct_numpy", test_indexedarray_in_struct_numpy, devices=devices
+)
+add_function_test(
+    TestIndexedArray,
+    "test_indexedarray_in_struct_to_device_transfer",
+    test_indexedarray_in_struct_to_device_transfer,
+    devices=devices,
+)
 
 
 if __name__ == "__main__":