Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
based on occupancy ([GH-1270](https://github.com/NVIDIA/warp/issues/1270)).
- Add `module_options` dict parameter to `@wp.kernel` for inline module-level compilation options
on `"unique"` modules ([GH-1250](https://github.com/NVIDIA/warp/issues/1250)).
- Add support for `wp.indexedarray` fields in `@wp.struct` (assignment, device transfer, and NumPy structured values)
([GH-1327](https://github.com/NVIDIA/warp/issues/1327)).

### Removed

Expand Down
41 changes: 41 additions & 0 deletions warp/_src/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ def to(self, device):
if matches_array_class(var.type, array):
# array_t
setattr(dst, name, value.to(device))
elif matches_array_class(var.type, indexedarray):
# indexedarray_t
# `.to` returns an array if on different device, force to identity indexedarray
cloned = value.to(device)
setattr(dst, name, cloned if isinstance(cloned, indexedarray) else indexedarray(cloned))
elif isinstance(var.type, Struct):
# nested struct
new_struct = var.type()
Expand Down Expand Up @@ -264,6 +269,9 @@ def numpy_value(self):
if matches_array_class(var.type, array):
# array_t
npvalue.append(value.numpy_value())
elif matches_array_class(var.type, indexedarray):
# indexedarray_t
npvalue.append(value.numpy_value())
elif isinstance(var.type, Struct):
# nested struct
npvalue.append(value.numpy_value())
Expand Down Expand Up @@ -299,6 +307,8 @@ def _make_struct_field_constructor(field: str, var_type: type):
return lambda ctype: var_type.instance_type(ctype=getattr(ctype, field))
elif matches_array_class(var_type, warp._src.types.array):
return lambda ctype: None
elif matches_array_class(var_type, warp._src.types.indexedarray):
return lambda ctype: None
elif _is_texture_type(var_type):
return lambda ctype: None
elif issubclass(var_type, ctypes.Array):
Expand Down Expand Up @@ -329,6 +339,27 @@ def set_array_value(inst, value):

cls.__setattr__(inst, field, value)

def set_indexedarray_value(inst, value):
if value is None:
setattr(inst._ctype, field, var_type.__ctype__())
else:
assert isinstance(value, indexedarray)
assert types_equal(value.dtype, var_type.dtype), (
f"assign to struct member variable {field} failed, expected type {type_repr(var_type.dtype)}, got type {type_repr(value.dtype)}"
)
setattr(inst._ctype, field, value.__ctype__())

# workaround to prevent gradient buffers being garbage collected
# (indexedarray_t embeds an array_t)
grad_attr = "_" + field + "_grad"
if value is not None and value.data is not None and value.data.requires_grad:
cls.__setattr__(inst, grad_attr, value.data.grad)
else:
# clear any previous keepalive
cls.__setattr__(inst, grad_attr, None)

cls.__setattr__(inst, field, value)

def set_struct_value(inst, value):
getattr(inst, field).assign(value)

Expand Down Expand Up @@ -388,6 +419,8 @@ def set_texture_value(inst, value):

if matches_array_class(var_type, array):
return set_array_value
elif matches_array_class(var_type, indexedarray):
return set_indexedarray_value
elif isinstance(var_type, Struct):
return set_struct_value
elif _is_texture_type(var_type):
Expand Down Expand Up @@ -418,6 +451,8 @@ def __init__(self, key: str, cls: type, module: warp._src.context.Module):
for label, var in self.vars.items():
if matches_array_class(var.type, array):
fields.append((label, array_t))
elif matches_array_class(var.type, indexedarray):
fields.append((label, indexedarray_t))
elif isinstance(var.type, Struct):
fields.append((label, var.type.ctype))
elif issubclass(var.type, ctypes.Array):
Expand Down Expand Up @@ -533,6 +568,9 @@ def numpy_dtype(self):
if matches_array_class(var.type, array):
# array_t
formats.append(array_t.numpy_dtype())
elif matches_array_class(var.type, indexedarray):
# indexedarray_t
formats.append(indexedarray_t.numpy_dtype())
elif isinstance(var.type, Struct):
# nested struct
formats.append(var.type.numpy_dtype())
Expand Down Expand Up @@ -566,6 +604,9 @@ def from_ptr(self, ptr):
# no easy way to make a backref.
# Instead, we just create a stub annotation, which is not a fully usable array object.
setattr(instance, name, array(dtype=var.type.dtype, ndim=var.type.ndim))
elif matches_array_class(var.type, indexedarray):
# Same as regular arrays: return an annotation stub only.
setattr(instance, name, indexedarray(dtype=var.type.dtype, ndim=var.type.ndim))
elif isinstance(var.type, Struct):
# nested struct
value = var.type.from_ptr(ptr + offset)
Expand Down
35 changes: 34 additions & 1 deletion warp/_src/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2184,7 +2184,8 @@ class indexedarray_t(ctypes.Structure):

def __init__(self, data, indices, shape):
if data is None:
self.data = array().__ctype__()
ndim = len(shape)
self.data = array_t(data=0, grad=0, ndim=ndim, shape=(0,) * ndim, strides=(0,) * ndim)
for i in range(ARRAY_MAX_DIMS):
self.indices[i] = ctypes.c_void_p(None)
self.shape[i] = 0
Expand All @@ -2197,6 +2198,38 @@ def __init__(self, data, indices, shape):
self.indices[i] = ctypes.c_void_p(None)
self.shape[i] = shape[i]

# structured type description used when indexedarray_t is packed in a struct and shared via numpy structured array.
@classmethod
def numpy_dtype(cls):
return cls._numpy_dtype_

# structured value used when indexedarray_t is packed in a struct and shared via a numpy structured array
def numpy_value(self):
# pointers are represented as unsigned 64-bit integers
indices = []
for i in range(ARRAY_MAX_DIMS):
v = self.indices[i]
# v may be a ctypes.c_void_p instance
if isinstance(v, ctypes.c_void_p):
indices.append(0 if v.value is None else int(v.value))
else:
indices.append(0 if v is None else int(v))

return (self.data.numpy_value(), indices, list(self.shape))


# NOTE: must match indexedarray_t._fields_
indexedarray_t._numpy_dtype_ = {
"names": ["data", "indices", "shape"],
"formats": [array_t.numpy_dtype(), f"{ARRAY_MAX_DIMS}u8", f"{ARRAY_MAX_DIMS}i4"],
"offsets": [
indexedarray_t.data.offset,
indexedarray_t.indices.offset,
indexedarray_t.shape.offset,
],
"itemsize": ctypes.sizeof(indexedarray_t),
}


class tuple_t:
"""Used during codegen to store multiple values into a single variable."""
Expand Down
4 changes: 4 additions & 0 deletions warp/native/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,10 @@ CUDA_CALLABLE inline void adj_where(
// atomic add the whole struct onto an array (e.g.: during backwards pass)
template <typename T> CUDA_CALLABLE inline void atomic_add(array_t<T>*, array_t<T>) { }

// stub for the case where we have an indexed array inside a struct and
// atomic add the whole struct onto an array (e.g.: during backwards pass)
template <typename T> CUDA_CALLABLE inline void atomic_add(indexedarray_t<T>*, indexedarray_t<T>) { }

// for float and vector types this is just an alias for an atomic add
template <typename T> CUDA_CALLABLE inline void adj_atomic_add(T* buf, T value) { atomic_add(buf, value); }

Expand Down
162 changes: 162 additions & 0 deletions warp/tests/test_indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,152 @@ def test_indexedarray_1d(test, device):
wp.launch(kernel_1d, dim=iarr.size, inputs=[iarr, expected_arr], device=device)


@wp.struct
class IndexedArrayStruct:
iarr: wp.indexedarray(dtype=float)


@wp.struct
class NestedIndexedArrayStruct:
inner: IndexedArrayStruct


@wp.kernel
def kernel_indexedarray_in_struct(arg: IndexedArrayStruct, expected: wp.array(dtype=float)):
i = wp.tid()

wp.expect_eq(arg.iarr[i], expected[i])

arg.iarr[i] = 2.0 * arg.iarr[i]
wp.atomic_add(arg.iarr, i, 1.0)

wp.expect_eq(arg.iarr[i], 2.0 * expected[i] + 1.0)


@wp.kernel
def kernel_indexedarray_in_nested_struct(arg: NestedIndexedArrayStruct, expected: wp.array(dtype=float)):
i = wp.tid()

wp.expect_eq(arg.inner.iarr[i], expected[i])

arg.inner.iarr[i] = 2.0 * arg.inner.iarr[i]
wp.atomic_add(arg.inner.iarr, i, 1.0)

wp.expect_eq(arg.inner.iarr[i], 2.0 * expected[i] + 1.0)


@wp.kernel
def kernel_indexedarray_in_struct_array(args: wp.array(dtype=IndexedArrayStruct), expected: wp.array(dtype=float)):
i = wp.tid()

s = args[0]
wp.expect_eq(s.iarr[i], expected[i])

s.iarr[i] = 2.0 * s.iarr[i]
wp.atomic_add(s.iarr, i, 1.0)

wp.expect_eq(s.iarr[i], 2.0 * expected[i] + 1.0)


def test_indexedarray_in_struct(test, device):
values = np.arange(10, dtype=np.float32)
arr = wp.array(data=values, device=device)

indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
iarr = wp.indexedarray1d(arr, [indices])

expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)

s = IndexedArrayStruct()
s.iarr = iarr

wp.launch(kernel_indexedarray_in_struct, dim=iarr.size, inputs=[s, expected_arr], device=device)
wp.synchronize_device(device)


def test_indexedarray_in_nested_struct(test, device):
values = np.arange(10, dtype=np.float32)
arr = wp.array(data=values, device=device)

indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
iarr = wp.indexedarray1d(arr, [indices])

expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)

inner = IndexedArrayStruct()
inner.iarr = iarr

outer = NestedIndexedArrayStruct()
outer.inner = inner

wp.launch(kernel_indexedarray_in_nested_struct, dim=iarr.size, inputs=[outer, expected_arr], device=device)
wp.synchronize_device(device)


def test_indexedarray_in_struct_array(test, device):
values = np.arange(10, dtype=np.float32)
arr = wp.array(data=values, device=device)

indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
iarr = wp.indexedarray1d(arr, [indices])

expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)

s = IndexedArrayStruct()
s.iarr = iarr
struct_arr = wp.array([s], dtype=IndexedArrayStruct, device=device)

wp.launch(kernel_indexedarray_in_struct_array, dim=iarr.size, inputs=[struct_arr, expected_arr], device=device)
wp.synchronize_device(device)


def test_indexedarray_in_struct_numpy(test, device):
values = np.arange(4, dtype=np.float32)
arr = wp.array(data=values, device=device)

indices = wp.array([0, 2], dtype=int, device=device)
iarr = wp.indexedarray1d(arr, [indices])

s = IndexedArrayStruct()
s.iarr = iarr

# Just ensure these are functional for structs embedding indexedarray_t
dtype = IndexedArrayStruct.numpy_dtype()
value = s.numpy_value()

test.assertIsInstance(dtype, dict)
test.assertEqual(dtype["names"], ["iarr"])
test.assertEqual(len(value), 1)


def test_indexedarray_in_struct_to_device_transfer(test, device):
# This test only applies to CUDA target devices.
if not wp.is_cuda_available() or not wp.get_device(device).is_cuda:
test.skipTest("Requires CUDA")

# Create the indexedarray on CPU, then move the struct to CUDA.
values = np.arange(10, dtype=np.float32)
arr_cpu = wp.array(data=values, device="cpu")
indices_cpu = wp.array([1, 3, 5, 7, 9], dtype=int, device="cpu")
iarr_cpu = wp.indexedarray1d(arr_cpu, [indices_cpu])

s = IndexedArrayStruct()
s.iarr = iarr_cpu

s_cuda = s.to(device)
test.assertIsInstance(s_cuda.iarr, wp.indexedarray)
test.assertTrue(all(x is None for x in s_cuda.iarr.indices))
test.assertEqual(s_cuda.iarr.shape, iarr_cpu.shape)

expected_values = np.array([1, 3, 5, 7, 9], dtype=np.float32)
expected_arr = wp.array(data=expected_values, dtype=float, device=device)

wp.launch(kernel_indexedarray_in_struct, dim=s_cuda.iarr.size, inputs=[s_cuda, expected_arr], device=device)
# After the kernel: a[i] = 2*a[i] then atomic_add(a, i, 1) => 2*expected + 1
result = s_cuda.iarr.numpy()
assert_np_equal(result, 2.0 * expected_values + 1.0)


@wp.kernel
def kernel_2d(a: wp.indexedarray2d(dtype=float), expected: wp.array2d(dtype=float)):
i, j = wp.tid()
Expand Down Expand Up @@ -1121,6 +1267,22 @@ class TestIndexedArray(unittest.TestCase):
add_function_test(TestIndexedArray, "test_indexedarray_fill_vector", test_indexedarray_fill_vector, devices=devices)
add_function_test(TestIndexedArray, "test_indexedarray_fill_matrix", test_indexedarray_fill_matrix, devices=devices)
add_function_test(TestIndexedArray, "test_indexedarray_fill_struct", test_indexedarray_fill_struct, devices=devices)
add_function_test(TestIndexedArray, "test_indexedarray_in_struct", test_indexedarray_in_struct, devices=devices)
add_function_test(
TestIndexedArray, "test_indexedarray_in_nested_struct", test_indexedarray_in_nested_struct, devices=devices
)
add_function_test(
TestIndexedArray, "test_indexedarray_in_struct_array", test_indexedarray_in_struct_array, devices=devices
)
add_function_test(
TestIndexedArray, "test_indexedarray_in_struct_numpy", test_indexedarray_in_struct_numpy, devices=devices
)
add_function_test(
TestIndexedArray,
"test_indexedarray_in_struct_to_device_transfer",
test_indexedarray_in_struct_to_device_transfer,
devices=devices,
)


if __name__ == "__main__":
Expand Down