Skip to content

Commit

Permalink
Fixed inline assembly in thread_load.hpp
Browse files Browse the repository at this point in the history
  • Loading branch information
mfep committed Feb 1, 2022
1 parent f408deb commit 553855d
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ HIPCUB_DEVICE __forceinline__ T AsmThreadLoad(void * ptr)
HIPCUB_DEVICE __forceinline__ type AsmThreadLoad<cache_modifier, type>(void * ptr) \
{ \
interim_type retval; \
asm volatile(#asm_operator " %0, %1 " llvm_cache_modifier : "=" #output_modifier(retval) : "v"(ptr)); \
asm volatile("s_waitcnt " wait_cmd "(%0)" : : "I"(0x00)); \
asm volatile( \
#asm_operator " %0, %1 " llvm_cache_modifier "\n" \
"\ts_waitcnt " wait_cmd "(0)" : "=" #output_modifier(retval) : "v"(ptr) \
); \
return retval; \
}

Expand Down

0 comments on commit 553855d

Please sign in to comment.