You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.
The program I debug will have core dump in the release version, but not in the debug version. The code for calling reduce is here
The call stack information printed by gdb debug is as follows (in part)
It looks like there was an error allocating the intermediate tem variable
#0 0x00007fedb0b38d5d in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5 #1 0x00007fedb0885bcc in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5 #2 0x00007fedb0885dec in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5 #3 0x00007fedb083e487 in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5 #4 0x00007fedb084812c in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5 #5 0x00007ffe49e4afa6 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 #6 0x00007ffe49d10e11 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 #7 0x00007ffe49eab3a7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1 #8 0x00007fffe73cd1ce in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0 #9 0x00007fffe73a0d1b in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0 #10 0x00007fffe73d9e23 in cudaMalloc () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0 #11 0x00007ffe59c76671 in void* thrust::cuda_cub::mallocthrust::cuda_cub::execute_on_stream(thrust::cuda_cub::execution_policythrust::cuda_cub::execute_on_stream&, unsigned long) ()
from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so #12 0x00007ffe59c767c7 in thrust::detail::temporary_allocator<unsigned char, thrust::cuda_cub::execute_on_stream>::allocate(unsigned long) ()
from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so #13 0x00007ffe59c76b72 in int thrust::cuda_cub::reduce_n<thrust::cuda_cub::execute_on_stream, thrust::device_ptr, long, int, thrust::plus >(thrust::cuda_cub::execution_policythrust::cuda_cub::execute_on_stream&, thrust::device_ptr, long, int, thrust::plus) () from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so #14 0x00007ffe59c84de6 in nv::merlin::HashTable<long, float, unsigned long, nv::merlin::Sm80>::insert_or_assign(unsigned long, long const*, float const*, unsigned long const*, CUstream_st*, bool)
() from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so
The text was updated successfully, but these errors were encountered:
The program I debug will have core dump in the release version, but not in the debug version. The code for calling reduce is here
The call stack information printed by gdb debug is as follows (in part)
It looks like there was an error allocating the intermediate tem variable
#0 0x00007fedb0b38d5d in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5
#1 0x00007fedb0885bcc in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5
#2 0x00007fedb0885dec in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5
#3 0x00007fedb083e487 in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5
#4 0x00007fedb084812c in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcupti.so.11.5
#5 0x00007ffe49e4afa6 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6 0x00007ffe49d10e11 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7 0x00007ffe49eab3a7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007fffe73cd1ce in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0
#9 0x00007fffe73a0d1b in ?? () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0
#10 0x00007fffe73d9e23 in cudaMalloc () from /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.11.0
#11 0x00007ffe59c76671 in void* thrust::cuda_cub::mallocthrust::cuda_cub::execute_on_stream(thrust::cuda_cub::execution_policythrust::cuda_cub::execute_on_stream&, unsigned long) ()
from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so
#12 0x00007ffe59c767c7 in thrust::detail::temporary_allocator<unsigned char, thrust::cuda_cub::execute_on_stream>::allocate(unsigned long) ()
from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so
#13 0x00007ffe59c76b72 in int thrust::cuda_cub::reduce_n<thrust::cuda_cub::execute_on_stream, thrust::device_ptr, long, int, thrust::plus >(thrust::cuda_cub::execution_policythrust::cuda_cub::execute_on_stream&, thrust::device_ptr, long, int, thrust::plus) () from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so
#14 0x00007ffe59c84de6 in nv::merlin::HashTable<long, float, unsigned long, nv::merlin::Sm80>::insert_or_assign(unsigned long, long const*, float const*, unsigned long const*, CUstream_st*, bool)
() from /usr/local/lib/python3.8/dist-packages/tensorflow_recommenders_addons/dynamic_embedding/core/_hkv_ops.so
The text was updated successfully, but these errors were encountered: