@@ -226,7 +226,8 @@ struct Slab {
226
226
// We attempt to place these next to each other.
227
227
// TODO: We should coalesce these bits and use the result of `fetch_or` to
228
228
// search for free bits in parallel.
229
- for (uint64_t mask = ~0ull ; mask; mask = gpu::ballot (lane_mask, !result)) {
229
+ for (uint64_t mask = lane_mask; mask;
230
+ mask = gpu::ballot (lane_mask, !result)) {
230
231
uint32_t id = impl::lane_count (uniform & mask);
231
232
uint32_t index =
232
233
(gpu::broadcast_value (lane_mask, impl::xorshift32 (state)) + id) %
@@ -334,17 +335,17 @@ template <typename T> struct GuardPtr {
334
335
cpp::MemoryOrder::RELAXED,
335
336
cpp::MemoryOrder::RELAXED)) {
336
337
count = cpp::numeric_limits<uint64_t >::max ();
337
- T *mem = reinterpret_cast <T *>( impl::rpc_allocate (sizeof (T) ));
338
+ void *mem = impl::rpc_allocate (sizeof (T));
338
339
if (!mem)
339
340
return nullptr ;
340
341
new (mem) T (cpp::forward<Args>(args)...);
341
342
342
343
cpp::atomic_thread_fence (cpp::MemoryOrder::RELEASE);
343
- ptr.store (mem, cpp::MemoryOrder::RELAXED);
344
+ ptr.store (reinterpret_cast <T *>( mem) , cpp::MemoryOrder::RELAXED);
344
345
cpp::atomic_thread_fence (cpp::MemoryOrder::ACQUIRE);
345
346
if (!ref.acquire (n, count))
346
347
ref.reset (n, count);
347
- return mem;
348
+ return reinterpret_cast <T *>( mem) ;
348
349
}
349
350
350
351
if (!expected || expected == reinterpret_cast <T *>(SENTINEL))
0 commit comments