Skip to content

Commit 3271964

Browse files
authored
Merge branch 'main' into warn-fork-multiprocessing
2 parents fbdd56d + 5c42278 commit 3271964

File tree

4 files changed

+202
-80
lines changed

4 files changed

+202
-80
lines changed

cuda_core/tests/conftest.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
except ImportError:
1313
from cuda import cuda as driver
1414

15+
import cuda.core.experimental
1516
from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device
1617
from cuda.core.experimental._utils.cuda_utils import handle_return
1718

@@ -28,7 +29,7 @@ def session_setup():
2829
@pytest.fixture(scope="function")
2930
def init_cuda():
3031
# TODO: rename this to e.g. init_context
31-
device = Device()
32+
device = Device(0)
3233
device.set_current()
3334

3435
# Set option to avoid spin-waiting on synchronization.
@@ -83,7 +84,7 @@ def pop_all_contexts():
8384
def ipc_device():
8485
"""Obtains a device suitable for IPC-enabled mempool tests, or skips."""
8586
# Check if IPC is supported on this platform/device
86-
device = Device()
87+
device = Device(0)
8788
device.set_current()
8889

8990
if not device.properties.memory_pools_supported:
@@ -113,7 +114,7 @@ def ipc_memory_resource(ipc_device):
113114
@pytest.fixture
114115
def mempool_device():
115116
"""Obtains a device suitable for mempool tests, or skips."""
116-
device = Device()
117+
device = Device(0)
117118
device.set_current()
118119

119120
if not device.properties.memory_pools_supported:
@@ -122,4 +123,34 @@ def mempool_device():
122123
return device
123124

124125

126+
def _mempool_device_impl(num):
127+
num_devices = len(cuda.core.experimental.system.devices)
128+
if num_devices < num:
129+
pytest.skip(f"Test requires at least {num} GPUs")
130+
131+
devs = [Device(i) for i in range(num)]
132+
for i in reversed(range(num)):
133+
devs[i].set_current() # ends with device 0 current
134+
135+
if not all(devs[i].can_access_peer(j) for i in range(num) for j in range(num)):
136+
pytest.skip("Test requires GPUs with peer access")
137+
138+
if not all(devs[i].properties.memory_pools_supported for i in range(num)):
139+
pytest.skip("Device does not support mempool operations")
140+
141+
return devs
142+
143+
144+
@pytest.fixture
145+
def mempool_device_x2():
146+
"""Fixture that provides two devices if available, otherwise skips test."""
147+
return _mempool_device_impl(2)
148+
149+
150+
@pytest.fixture
151+
def mempool_device_x3():
152+
"""Fixture that provides three devices if available, otherwise skips test."""
153+
return _mempool_device_impl(3)
154+
155+
125156
skipif_need_cuda_headers = pytest.mark.skipif(helpers.CUDA_INCLUDE_PATH is None, reason="need CUDA header")
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import multiprocessing as mp
5+
6+
import pytest
7+
from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
8+
from cuda.core.experimental._utils.cuda_utils import CUDAError
9+
from helpers.buffers import PatternGen
10+
11+
CHILD_TIMEOUT_SEC = 20
12+
NBYTES = 64
13+
POOL_SIZE = 2097152
14+
15+
16+
class TestPeerAccessNotPreservedOnImport:
17+
"""
18+
Verify that peer access settings are not preserved when a memory resource
19+
is sent to another process via IPC, and that peer access can be set after import.
20+
"""
21+
22+
def test_main(self, mempool_device_x2):
23+
dev0, dev1 = mempool_device_x2
24+
25+
# Parent Process - Create and Configure MR
26+
dev1.set_current()
27+
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
28+
mr = DeviceMemoryResource(dev1, options=options)
29+
mr.peer_accessible_by = [dev0]
30+
assert mr.peer_accessible_by == (0,)
31+
32+
# Spawn child process
33+
process = mp.Process(target=self.child_main, args=(mr,))
34+
process.start()
35+
process.join(timeout=CHILD_TIMEOUT_SEC)
36+
assert process.exitcode == 0
37+
38+
# Verify parent's MR still has peer access set (independent state)
39+
assert mr.peer_accessible_by == (0,)
40+
mr.close()
41+
42+
def child_main(self, mr):
43+
Device(1).set_current()
44+
assert mr.is_mapped is True
45+
assert mr.device_id == 1
46+
assert mr.peer_accessible_by == ()
47+
mr.peer_accessible_by = [0]
48+
assert mr.peer_accessible_by == (0,)
49+
mr.peer_accessible_by = []
50+
assert mr.peer_accessible_by == ()
51+
mr.close()
52+
53+
54+
class TestBufferPeerAccessAfterImport:
55+
"""
56+
Verify that buffers imported via IPC can be accessed from peer devices after
57+
setting peer access on the imported memory resource, and that access can be revoked.
58+
"""
59+
60+
@pytest.mark.parametrize("grant_access_in_parent", [True, False])
61+
def test_main(self, mempool_device_x2, grant_access_in_parent):
62+
dev0, dev1 = mempool_device_x2
63+
64+
# Parent Process - Create MR and Buffer
65+
dev1.set_current()
66+
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
67+
mr = DeviceMemoryResource(dev1, options=options)
68+
if grant_access_in_parent:
69+
mr.peer_accessible_by = [dev0]
70+
assert mr.peer_accessible_by == (0,)
71+
else:
72+
assert mr.peer_accessible_by == ()
73+
buffer = mr.allocate(NBYTES)
74+
pgen = PatternGen(dev1, NBYTES)
75+
pgen.fill_buffer(buffer, seed=False)
76+
77+
# Spawn child process
78+
process = mp.Process(target=self.child_main, args=(mr, buffer))
79+
process.start()
80+
process.join(timeout=CHILD_TIMEOUT_SEC)
81+
assert process.exitcode == 0
82+
83+
buffer.close()
84+
mr.close()
85+
86+
def child_main(self, mr, buffer):
87+
# Verify MR and buffer are mapped
88+
Device(1).set_current()
89+
assert mr.is_mapped is True
90+
assert buffer.is_mapped is True
91+
assert mr.device_id == 1
92+
assert buffer.device_id == 1
93+
94+
# Test 1: Buffer accessible from resident device (dev1) - should always work
95+
dev1 = Device(1)
96+
dev1.set_current()
97+
PatternGen(dev1, NBYTES).verify_buffer(buffer, seed=False)
98+
99+
# Test 2: Buffer NOT accessible from dev0 initially (peer access not preserved)
100+
dev0 = Device(0)
101+
dev0.set_current()
102+
with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"):
103+
PatternGen(dev0, NBYTES).verify_buffer(buffer, seed=False)
104+
105+
# Test 3: Set peer access and verify buffer becomes accessible
106+
dev1.set_current()
107+
mr.peer_accessible_by = [0]
108+
assert mr.peer_accessible_by == (0,)
109+
dev0.set_current()
110+
PatternGen(dev0, NBYTES).verify_buffer(buffer, seed=False)
111+
112+
# Test 4: Revoke peer access and verify buffer becomes inaccessible
113+
dev1.set_current()
114+
mr.peer_accessible_by = []
115+
assert mr.peer_accessible_by == ()
116+
dev0.set_current()
117+
with pytest.raises(CUDAError, match="CUDA_ERROR_INVALID_VALUE"):
118+
PatternGen(dev0, NBYTES).verify_buffer(buffer, seed=False)
119+
120+
buffer.close()
121+
mr.close()

cuda_core/tests/memory_ipc/test_serialize.py

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def child_main(self, pipe, _):
122122
buffer.close()
123123

124124

125-
def test_object_passing(ipc_device, ipc_memory_resource):
125+
class TestObjectPassing:
126126
"""
127127
Test sending objects as arguments when starting a process.
128128
@@ -131,61 +131,61 @@ def test_object_passing(ipc_device, ipc_memory_resource):
131131
in multiprocessing (e.g., Queue) work.
132132
"""
133133

134-
# Define the objects.
135-
device = ipc_device
136-
mr = ipc_memory_resource
137-
alloc_handle = mr.get_allocation_handle()
138-
buffer = mr.allocate(NBYTES)
139-
buffer_desc = buffer.get_ipc_descriptor()
140-
141-
pgen = PatternGen(device, NBYTES)
142-
pgen.fill_buffer(buffer, seed=False)
134+
def test_main(self, ipc_device, ipc_memory_resource):
135+
# Define the objects.
136+
device = ipc_device
137+
mr = ipc_memory_resource
138+
alloc_handle = mr.get_allocation_handle()
139+
buffer = mr.allocate(NBYTES)
140+
buffer_desc = buffer.get_ipc_descriptor()
143141

144-
# Start the child process.
145-
process = mp.Process(target=child_main, args=(alloc_handle, mr, buffer_desc, buffer))
146-
process.start()
147-
process.join(timeout=CHILD_TIMEOUT_SEC)
148-
assert process.exitcode == 0
142+
pgen = PatternGen(device, NBYTES)
143+
pgen.fill_buffer(buffer, seed=False)
149144

150-
pgen.verify_buffer(buffer, seed=True)
151-
buffer.close()
145+
# Start the child process.
146+
process = mp.Process(target=self.child_main, args=(alloc_handle, mr, buffer_desc, buffer))
147+
process.start()
148+
process.join(timeout=CHILD_TIMEOUT_SEC)
149+
assert process.exitcode == 0
152150

151+
pgen.verify_buffer(buffer, seed=True)
152+
buffer.close()
153153

154-
def child_main(alloc_handle, mr1, buffer_desc, buffer1):
155-
device = Device()
156-
device.set_current()
157-
mr2 = DeviceMemoryResource.from_allocation_handle(device, alloc_handle)
158-
pgen = PatternGen(device, NBYTES)
154+
def child_main(self, alloc_handle, mr1, buffer_desc, buffer1):
155+
device = Device()
156+
device.set_current()
157+
mr2 = DeviceMemoryResource.from_allocation_handle(device, alloc_handle)
158+
pgen = PatternGen(device, NBYTES)
159159

160-
# OK to build the buffer from either mr and the descriptor.
161-
# All buffer* objects point to the same memory.
162-
buffer2 = Buffer.from_ipc_descriptor(mr1, buffer_desc)
163-
buffer3 = Buffer.from_ipc_descriptor(mr2, buffer_desc)
160+
# OK to build the buffer from either mr and the descriptor.
161+
# All buffer* objects point to the same memory.
162+
buffer2 = Buffer.from_ipc_descriptor(mr1, buffer_desc)
163+
buffer3 = Buffer.from_ipc_descriptor(mr2, buffer_desc)
164164

165-
pgen.verify_buffer(buffer1, seed=False)
166-
pgen.verify_buffer(buffer2, seed=False)
167-
pgen.verify_buffer(buffer3, seed=False)
165+
pgen.verify_buffer(buffer1, seed=False)
166+
pgen.verify_buffer(buffer2, seed=False)
167+
pgen.verify_buffer(buffer3, seed=False)
168168

169-
# Modify 1.
170-
pgen.fill_buffer(buffer1, seed=True)
169+
# Modify 1.
170+
pgen.fill_buffer(buffer1, seed=True)
171171

172-
pgen.verify_buffer(buffer1, seed=True)
173-
pgen.verify_buffer(buffer2, seed=True)
174-
pgen.verify_buffer(buffer3, seed=True)
172+
pgen.verify_buffer(buffer1, seed=True)
173+
pgen.verify_buffer(buffer2, seed=True)
174+
pgen.verify_buffer(buffer3, seed=True)
175175

176-
# Modify 2.
177-
pgen.fill_buffer(buffer2, seed=False)
176+
# Modify 2.
177+
pgen.fill_buffer(buffer2, seed=False)
178178

179-
pgen.verify_buffer(buffer1, seed=False)
180-
pgen.verify_buffer(buffer2, seed=False)
181-
pgen.verify_buffer(buffer3, seed=False)
179+
pgen.verify_buffer(buffer1, seed=False)
180+
pgen.verify_buffer(buffer2, seed=False)
181+
pgen.verify_buffer(buffer3, seed=False)
182182

183-
# Modify 3.
184-
pgen.fill_buffer(buffer3, seed=True)
183+
# Modify 3.
184+
pgen.fill_buffer(buffer3, seed=True)
185185

186-
pgen.verify_buffer(buffer1, seed=True)
187-
pgen.verify_buffer(buffer2, seed=True)
188-
pgen.verify_buffer(buffer3, seed=True)
186+
pgen.verify_buffer(buffer1, seed=True)
187+
pgen.verify_buffer(buffer2, seed=True)
188+
pgen.verify_buffer(buffer3, seed=True)
189189

190-
# Close any one buffer.
191-
buffer1.close()
190+
# Close any one buffer.
191+
buffer1.close()

cuda_core/tests/test_memory_peer_access.py

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,13 @@
33

44
import cuda.core.experimental
55
import pytest
6-
from cuda.core.experimental import Device, DeviceMemoryResource
6+
from cuda.core.experimental import DeviceMemoryResource
77
from cuda.core.experimental._utils.cuda_utils import CUDAError
88
from helpers.buffers import PatternGen, compare_buffer_to_constant, make_scratch_buffer
99

1010
NBYTES = 1024
1111

1212

13-
def _mempool_device_impl(num):
14-
num_devices = len(cuda.core.experimental.system.devices)
15-
if num_devices < num:
16-
pytest.skip("Test requires at least {num} GPUs")
17-
18-
devs = [Device(i) for i in range(num)]
19-
for i in reversed(range(num)):
20-
devs[i].set_current()
21-
22-
if not all(devs[i].can_access_peer(j) for i in range(num) for j in range(num)):
23-
pytest.skip("Test requires GPUs with peer access")
24-
25-
if not all(devs[i].properties.memory_pools_supported for i in range(num)):
26-
pytest.skip("Device does not support mempool operations")
27-
28-
return devs
29-
30-
31-
@pytest.fixture
32-
def mempool_device_x2():
33-
"""Fixture that provides two devices if available, otherwise skips test."""
34-
return _mempool_device_impl(2)
35-
36-
37-
@pytest.fixture
38-
def mempool_device_x3():
39-
"""Fixture that provides three devices if available, otherwise skips test."""
40-
return _mempool_device_impl(3)
41-
42-
4313
def test_peer_access_basic(mempool_device_x2):
4414
"""Basic tests for dmr.peer_accessible_by."""
4515
dev0, dev1 = mempool_device_x2

0 commit comments

Comments
 (0)