Add support for indexed arrays in structs and related kernels #1328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

FabienPean-Virtonomy wants to merge 1 commit into NVIDIA:main from Virtonomy:FabienPean/indexedarray_in_struct

+243 −1

CHANGELOG.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -30,6 +30,8 @@ @@
       based on occupancy ([GH-1270](https://github.com/NVIDIA/warp/issues/1270)).
     - Add `module_options` dict parameter to `@wp.kernel` for inline module-level compilation options
       on `"unique"` modules ([GH-1250](https://github.com/NVIDIA/warp/issues/1250)).
+    - Add support for `wp.indexedarray` fields in `@wp.struct` (assignment, device transfer, and NumPy structured values)
+      ([GH-1327](https://github.com/NVIDIA/warp/issues/1327)).
     ### Removed
@@ Expand Down @@

warp/_src/codegen.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -237,6 +237,11 @@ def to(self, device): @@
                 if matches_array_class(var.type, array):
                     # array_t
                     setattr(dst, name, value.to(device))
+                elif matches_array_class(var.type, indexedarray):
+                    # indexedarray_t
+                    # `.to` returns an array if on different device, force to identity indexedarray
+                    cloned = value.to(device)
+                    setattr(dst, name, cloned if isinstance(cloned, indexedarray) else indexedarray(cloned))
                 elif isinstance(var.type, Struct):
                     # nested struct
                     new_struct = var.type()
@@ Expand Down Expand Up / @@ -264,6 +269,9 @@ def numpy_value(self): @@
                 if matches_array_class(var.type, array):
                     # array_t
                     npvalue.append(value.numpy_value())
+                elif matches_array_class(var.type, indexedarray):
+                    # indexedarray_t
+                    npvalue.append(value.numpy_value())
                 elif isinstance(var.type, Struct):
                     # nested struct
                     npvalue.append(value.numpy_value())
@@ Expand Down Expand Up @@
             return lambda ctype: var_type.instance_type(ctype=getattr(ctype, field))
         elif matches_array_class(var_type, warp._src.types.array):
             return lambda ctype: None
+        elif matches_array_class(var_type, warp._src.types.indexedarray):
+            return lambda ctype: None
         elif _is_texture_type(var_type):
             return lambda ctype: None
         elif issubclass(var_type, ctypes.Array):
@@ Expand Down Expand Up / @@ -329,6 +339,27 @@ def set_array_value(inst, value): @@
             cls.__setattr__(inst, field, value)
+        def set_indexedarray_value(inst, value):
+            if value is None:
+                setattr(inst._ctype, field, var_type.__ctype__())
+            else:
+                assert isinstance(value, indexedarray)
+                assert types_equal(value.dtype, var_type.dtype), (
+                    f"assign to struct member variable {field} failed, expected type {type_repr(var_type.dtype)}, got type {type_repr(value.dtype)}"
+                )
+                setattr(inst._ctype, field, value.__ctype__())
+            # workaround to prevent gradient buffers being garbage collected
+            # (indexedarray_t embeds an array_t)
+            grad_attr = "_" + field + "_grad"
+            if value is not None and value.data is not None and value.data.requires_grad:
+                cls.__setattr__(inst, grad_attr, value.data.grad)
+            else:
+                # clear any previous keepalive
+                cls.__setattr__(inst, grad_attr, None)
+            cls.__setattr__(inst, field, value)
         def set_struct_value(inst, value):
             getattr(inst, field).assign(value)
@@ Expand Down Expand Up / @@ -388,6 +419,8 @@ def set_texture_value(inst, value): @@
         if matches_array_class(var_type, array):
             return set_array_value
+        elif matches_array_class(var_type, indexedarray):
+            return set_indexedarray_value
         elif isinstance(var_type, Struct):
             return set_struct_value
         elif _is_texture_type(var_type):
@@ Expand Down Expand Up @@
             for label, var in self.vars.items():
                 if matches_array_class(var.type, array):
                     fields.append((label, array_t))
+                elif matches_array_class(var.type, indexedarray):
+                    fields.append((label, indexedarray_t))
                 elif isinstance(var.type, Struct):
                     fields.append((label, var.type.ctype))
                 elif issubclass(var.type, ctypes.Array):
@@ Expand Down Expand Up / @@ -533,6 +568,9 @@ def numpy_dtype(self): @@
                 if matches_array_class(var.type, array):
                     # array_t
                     formats.append(array_t.numpy_dtype())
+                elif matches_array_class(var.type, indexedarray):
+                    # indexedarray_t
+                    formats.append(indexedarray_t.numpy_dtype())
                 elif isinstance(var.type, Struct):
                     # nested struct
                     formats.append(var.type.numpy_dtype())
@@ Expand Down Expand Up / @@ -566,6 +604,9 @@ def from_ptr(self, ptr): @@
                     # no easy way to make a backref.
                     # Instead, we just create a stub annotation, which is not a fully usable array object.
                     setattr(instance, name, array(dtype=var.type.dtype, ndim=var.type.ndim))
+                elif matches_array_class(var.type, indexedarray):
+                    # Same as regular arrays: return an annotation stub only.
+                    setattr(instance, name, indexedarray(dtype=var.type.dtype, ndim=var.type.ndim))
                 elif isinstance(var.type, Struct):
                     # nested struct
                     value = var.type.from_ptr(ptr + offset)
@@ Expand Down @@

warp/_src/types.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2184,7 +2184,8 @@ class indexedarray_t(ctypes.Structure): @@
         def __init__(self, data, indices, shape):
             if data is None:
-                self.data = array().__ctype__()
+                ndim = len(shape)
+                self.data = array_t(data=0, grad=0, ndim=ndim, shape=(0,) * ndim, strides=(0,) * ndim)
                 for i in range(ARRAY_MAX_DIMS):
                     self.indices[i] = ctypes.c_void_p(None)
                     self.shape[i] = 0
@@ Expand All / @@ -2197,6 +2198,38 @@ def __init__(self, data, indices, shape): @@
                         self.indices[i] = ctypes.c_void_p(None)
                     self.shape[i] = shape[i]
+        # structured type description used when indexedarray_t is packed in a struct and shared via numpy structured array.
+        @classmethod
+        def numpy_dtype(cls):
+            return cls._numpy_dtype_
+        # structured value used when indexedarray_t is packed in a struct and shared via a numpy structured array
+        def numpy_value(self):
+            # pointers are represented as unsigned 64-bit integers
+            indices = []
+            for i in range(ARRAY_MAX_DIMS):
+                v = self.indices[i]
+                # v may be a ctypes.c_void_p instance
+                if isinstance(v, ctypes.c_void_p):
+                    indices.append(0 if v.value is None else int(v.value))
+                else:
+                    indices.append(0 if v is None else int(v))
+            return (self.data.numpy_value(), indices, list(self.shape))
+    # NOTE: must match indexedarray_t._fields_
+    indexedarray_t._numpy_dtype_ = {
+        "names": ["data", "indices", "shape"],
+        "formats": [array_t.numpy_dtype(), f"{ARRAY_MAX_DIMS}u8", f"{ARRAY_MAX_DIMS}i4"],
+        "offsets": [
+            indexedarray_t.data.offset,
+            indexedarray_t.indices.offset,
+            indexedarray_t.shape.offset,
+        ],
+        "itemsize": ctypes.sizeof(indexedarray_t),
+    }
     class tuple_t:
         """Used during codegen to store multiple values into a single variable."""
@@ Expand Down @@

warp/native/array.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -1275,6 +1275,10 @@ CUDA_CALLABLE inline void adj_where( @@
     // atomic add the whole struct onto an array (e.g.: during backwards pass)
     template <typename T> CUDA_CALLABLE inline void atomic_add(array_t<T>*, array_t<T>) { }
+    // stub for the case where we have an indexed array inside a struct and
+    // atomic add the whole struct onto an array (e.g.: during backwards pass)
+    template <typename T> CUDA_CALLABLE inline void atomic_add(indexedarray_t<T>*, indexedarray_t<T>) { }
     // for float and vector types this is just an alias for an atomic add
     template <typename T> CUDA_CALLABLE inline void adj_atomic_add(T* buf, T value) { atomic_add(buf, value); }
@@ Expand Down @@

warp/tests/test_indexedarray.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -42,6 +42,152 @@ def test_indexedarray_1d(test, device): @@
         wp.launch(kernel_1d, dim=iarr.size, inputs=[iarr, expected_arr], device=device)
+    @wp.struct
+    class IndexedArrayStruct:
+        iarr: wp.indexedarray(dtype=float)
+    @wp.struct
+    class NestedIndexedArrayStruct:
+        inner: IndexedArrayStruct
+    @wp.kernel
+    def kernel_indexedarray_in_struct(arg: IndexedArrayStruct, expected: wp.array(dtype=float)):
+        i = wp.tid()
+        wp.expect_eq(arg.iarr[i], expected[i])
+        arg.iarr[i] = 2.0 * arg.iarr[i]
+        wp.atomic_add(arg.iarr, i, 1.0)
+        wp.expect_eq(arg.iarr[i], 2.0 * expected[i] + 1.0)
+    @wp.kernel
+    def kernel_indexedarray_in_nested_struct(arg: NestedIndexedArrayStruct, expected: wp.array(dtype=float)):
+        i = wp.tid()
+        wp.expect_eq(arg.inner.iarr[i], expected[i])
+        arg.inner.iarr[i] = 2.0 * arg.inner.iarr[i]
+        wp.atomic_add(arg.inner.iarr, i, 1.0)
+        wp.expect_eq(arg.inner.iarr[i], 2.0 * expected[i] + 1.0)
+    @wp.kernel
+    def kernel_indexedarray_in_struct_array(args: wp.array(dtype=IndexedArrayStruct), expected: wp.array(dtype=float)):
+        i = wp.tid()
+        s = args[0]
+        wp.expect_eq(s.iarr[i], expected[i])
+        s.iarr[i] = 2.0 * s.iarr[i]
+        wp.atomic_add(s.iarr, i, 1.0)
+        wp.expect_eq(s.iarr[i], 2.0 * expected[i] + 1.0)
+    def test_indexedarray_in_struct(test, device):
+        values = np.arange(10, dtype=np.float32)
+        arr = wp.array(data=values, device=device)
+        indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+        iarr = wp.indexedarray1d(arr, [indices])
+        expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+        s = IndexedArrayStruct()
+        s.iarr = iarr
+        wp.launch(kernel_indexedarray_in_struct, dim=iarr.size, inputs=[s, expected_arr], device=device)
+        wp.synchronize_device(device)
+    def test_indexedarray_in_nested_struct(test, device):
+        values = np.arange(10, dtype=np.float32)
+        arr = wp.array(data=values, device=device)
+        indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+        iarr = wp.indexedarray1d(arr, [indices])
+        expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+        inner = IndexedArrayStruct()
+        inner.iarr = iarr
+        outer = NestedIndexedArrayStruct()
+        outer.inner = inner
+        wp.launch(kernel_indexedarray_in_nested_struct, dim=iarr.size, inputs=[outer, expected_arr], device=device)
+        wp.synchronize_device(device)
+    def test_indexedarray_in_struct_array(test, device):
+        values = np.arange(10, dtype=np.float32)
+        arr = wp.array(data=values, device=device)
+        indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
+        iarr = wp.indexedarray1d(arr, [indices])
+        expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
+        s = IndexedArrayStruct()
+        s.iarr = iarr
+        struct_arr = wp.array([s], dtype=IndexedArrayStruct, device=device)
+        wp.launch(kernel_indexedarray_in_struct_array, dim=iarr.size, inputs=[struct_arr, expected_arr], device=device)
+        wp.synchronize_device(device)
+    def test_indexedarray_in_struct_numpy(test, device):
+        values = np.arange(4, dtype=np.float32)
+        arr = wp.array(data=values, device=device)
+        indices = wp.array([0, 2], dtype=int, device=device)
+        iarr = wp.indexedarray1d(arr, [indices])
+        s = IndexedArrayStruct()
+        s.iarr = iarr
+        # Just ensure these are functional for structs embedding indexedarray_t
+        dtype = IndexedArrayStruct.numpy_dtype()
+        value = s.numpy_value()
+        test.assertIsInstance(dtype, dict)
+        test.assertEqual(dtype["names"], ["iarr"])
+        test.assertEqual(len(value), 1)
+    def test_indexedarray_in_struct_to_device_transfer(test, device):
+        # This test only applies to CUDA target devices.
+        if not wp.is_cuda_available() or not wp.get_device(device).is_cuda:
+            test.skipTest("Requires CUDA")
+        # Create the indexedarray on CPU, then move the struct to CUDA.
+        values = np.arange(10, dtype=np.float32)
+        arr_cpu = wp.array(data=values, device="cpu")
+        indices_cpu = wp.array([1, 3, 5, 7, 9], dtype=int, device="cpu")
+        iarr_cpu = wp.indexedarray1d(arr_cpu, [indices_cpu])
+        s = IndexedArrayStruct()
+        s.iarr = iarr_cpu
+        s_cuda = s.to(device)
+        test.assertIsInstance(s_cuda.iarr, wp.indexedarray)
+        test.assertTrue(all(x is None for x in s_cuda.iarr.indices))
+        test.assertEqual(s_cuda.iarr.shape, iarr_cpu.shape)
+        expected_values = np.array([1, 3, 5, 7, 9], dtype=np.float32)
+        expected_arr = wp.array(data=expected_values, dtype=float, device=device)
+        wp.launch(kernel_indexedarray_in_struct, dim=s_cuda.iarr.size, inputs=[s_cuda, expected_arr], device=device)
+        # After the kernel: a[i] = 2*a[i] then atomic_add(a, i, 1) => 2*expected + 1
+        result = s_cuda.iarr.numpy()
+        assert_np_equal(result, 2.0 * expected_values + 1.0)
     @wp.kernel
     def kernel_2d(a: wp.indexedarray2d(dtype=float), expected: wp.array2d(dtype=float)):
         i, j = wp.tid()
@@ Expand Down Expand Up / @@ -1121,6 +1267,22 @@ class TestIndexedArray(unittest.TestCase): @@
     add_function_test(TestIndexedArray, "test_indexedarray_fill_vector", test_indexedarray_fill_vector, devices=devices)
     add_function_test(TestIndexedArray, "test_indexedarray_fill_matrix", test_indexedarray_fill_matrix, devices=devices)
     add_function_test(TestIndexedArray, "test_indexedarray_fill_struct", test_indexedarray_fill_struct, devices=devices)
+    add_function_test(TestIndexedArray, "test_indexedarray_in_struct", test_indexedarray_in_struct, devices=devices)
+    add_function_test(
+        TestIndexedArray, "test_indexedarray_in_nested_struct", test_indexedarray_in_nested_struct, devices=devices
+    )
+    add_function_test(
+        TestIndexedArray, "test_indexedarray_in_struct_array", test_indexedarray_in_struct_array, devices=devices
+    )
+    add_function_test(
+        TestIndexedArray, "test_indexedarray_in_struct_numpy", test_indexedarray_in_struct_numpy, devices=devices
+    )
+    add_function_test(
+        TestIndexedArray,
+        "test_indexedarray_in_struct_to_device_transfer",
+        test_indexedarray_in_struct_to_device_transfer,
+        devices=devices,
+    )
     if __name__ == "__main__":
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add support for indexed arrays in structs and related kernels #1328

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Add support for indexed arrays in structs and related kernels #1328

Are you sure you want to change the base?

Add support for indexed arrays in structs and related kernels #1328

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!

Uh oh!