You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
After using cuda::ptx::mapa, the compiler incorrectly infers that the pointer is to local shared memory and not remote shared memory.
How to Reproduce
#include <cstdint>
__cluster_dims__(2)
__global__ void kernel(int *out) {
__shared__ int x;
if (blockIdx.x == 0) {
int* remote_x = static_cast<int*>(__cluster_map_shared_rank(&x, 1));
// remote_x is correctly considered to be in remote shared memory
*remote_x = 1;
__syncthreads();
}
}
__cluster_dims__(2)
__global__ void kernel_mapa(int* out) {
__shared__ int x;
if (blockIdx.x == 0) {
uint32_t remote_shared_ptr;
asm ("mapa.shared::cluster.u32 %0, %1, 1;"
: "=r"(remote_shared_ptr)
: "r"(static_cast<uint32_t>(__cvta_generic_to_shared(&x)))
: );
int *remote_x = reinterpret_cast<int*>(__cvta_shared_to_generic(remote_shared_ptr));
// remote_x is considered to be in local shared memory here, which is wrong. We would
// need something like this to make it work:
// int *remote_x = reinterpret_cast<int*>(__cvta_sharedCluster_to_generic(remote_shared_ptr));
// st.shared.u32 to remote dsmem:
// Invalid __shared__ write of size 4 bytes (Address 0x1000400 is not located in executing CTA)
*remote_x = 2;
__syncthreads();
}
}
Expected behavior
The compiler should correctly recognize that the pointer returned by mapa points to remote shared memory.
Is this a duplicate?
Type of Bug
Runtime Error
Component
libcu++
Describe the bug
After using
cuda::ptx::mapa
, the compiler incorrectly infers that the pointer is to local shared memory and not remote shared memory.How to Reproduce
Expected behavior
The compiler should correctly recognize that the pointer returned by
mapa
points to remote shared memory.Reproduction link
https://godbolt.org/z/Tox8srxbY
Operating System
No response
nvidia-smi output
No response
NVCC version
No response
The text was updated successfully, but these errors were encountered: