diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index bbb0fd15ea4..3e11dda151a 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -24,21 +24,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - # Have to have -Wno-deprecated-declarations due to deprecated atomicAddNoRet - # Have to have -Wno-gnu-zero-variadic-macro-arguments to avoid - # amrex/Src/Base/AMReX_GpuLaunchGlobal.H:15:5: error: must specify at least one argument for '...' parameter of variadic macro [-Werror,-Wgnu-zero-variadic-macro-arguments] - # __launch_bounds__(amrex_launch_bounds_max_threads) - # ^ - # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:178:71: note: expanded from macro '__launch_bounds__' - # select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) - # ^ - # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here - # #define select_impl_(_1, _2, impl_, ...) impl_ # Have to remove "-fno-operator-names to avoid # /opt/rocm-6.1.0/include/rocprim/device/detail/device_adjacent_difference.hpp:198:26: error: token is not a valid binary operator in a preprocessor subexpression # 198 | #if defined(__gfx1102__) or defined(__gfx1030__) # | ~~~~~~~~~~~~~~~~~~~~~^~ - env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} + env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 @@ -92,21 +82,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - # Have to have -Wno-deprecated-declarations due to deprecated atomicAddNoRet - # Have to have -Wno-gnu-zero-variadic-macro-arguments to avoid - # amrex/Src/Base/AMReX_GpuLaunchGlobal.H:15:5: error: must specify at least one argument for '...' parameter of variadic macro [-Werror,-Wgnu-zero-variadic-macro-arguments] - # __launch_bounds__(amrex_launch_bounds_max_threads) - # ^ - # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:178:71: note: expanded from macro '__launch_bounds__' - # select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) - # ^ - # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here - # #define select_impl_(_1, _2, impl_, ...) impl_ # Have to remove "-fno-operator-names to avoid # /opt/rocm-6.1.0/include/rocprim/device/detail/device_adjacent_difference.hpp:198:26: error: token is not a valid binary operator in a preprocessor subexpression # 198 | #if defined(__gfx1102__) or defined(__gfx1030__) # | ~~~~~~~~~~~~~~~~~~~~~^~ - env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} + env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H index 4c94960cba6..e819b9e84ba 100644 --- a/Src/Base/AMReX_Scan.H +++ b/Src/Base/AMReX_Scan.H @@ -641,6 +641,10 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret using ScanTileState = rocprim::detail::lookback_scan_state; using OrderedBlockId = rocprim::detail::ordered_block_id; +#if (defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)) || \ + (defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR == 6) && \ + defined(HIP_VERSION_MINOR) && (HIP_VERSION_MINOR == 0)) + std::size_t nbytes_tile_state = rocprim::detail::align_size (ScanTileState::get_storage_size(nblocks)); std::size_t nbytes_block_id = OrderedBlockId::get_storage_size(); @@ -648,6 +652,22 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret auto dp = (char*)(The_Arena()->alloc(nbytes_tile_state+nbytes_block_id)); ScanTileState tile_state = ScanTileState::create(dp, nblocks); + +#else + + std::size_t nbytes_tile_state; + AMREX_HIP_SAFE_CALL(ScanTileState::get_storage_size(nblocks, stream, nbytes_tile_state)); + nbytes_tile_state = rocprim::detail::align_size(nbytes_tile_state); + + std::size_t nbytes_block_id = OrderedBlockId::get_storage_size(); + + auto dp = (char*)(The_Arena()->alloc(nbytes_tile_state+nbytes_block_id)); + + ScanTileState tile_state; + AMREX_HIP_SAFE_CALL(ScanTileState::create(tile_state, dp, nblocks, stream)); + +#endif + auto ordered_block_id = OrderedBlockId::create (reinterpret_cast(dp + nbytes_tile_state));