Skip to content

Commit

Permalink
[SYCL] Remove amrex::oneapi and update deprecated device descriptors (A…
Browse files Browse the repository at this point in the history
…MReX-Codes#2910)

* Remove amrex::oneapi in favour of standard features

* Change deprecated device descriptors
  • Loading branch information
nmnobre committed Aug 11, 2022
1 parent 1bda173 commit d295f22
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 21 deletions.
4 changes: 2 additions & 2 deletions Src/Base/AMReX_GpuDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,8 @@ Device::initialize_gpu ()
device_prop.warpSize = warp_size;
auto sgss = d.get_info<sycl::info::device::sub_group_sizes>();
device_prop.maxMemAllocSize = d.get_info<sycl::info::device::max_mem_alloc_size>();
device_prop.managedMemory = d.get_info<sycl::info::device::host_unified_memory>();
device_prop.concurrentManagedAccess = d.get_info<sycl::info::device::usm_shared_allocations>();
device_prop.managedMemory = d.has(sycl::aspect::usm_host_allocations);
device_prop.concurrentManagedAccess = d.has(sycl::aspect::usm_shared_allocations);
device_prop.maxParameterSize = d.get_info<sycl::info::device::max_parameter_size>();
{
amrex::Print() << "Device Properties:\n"
Expand Down
4 changes: 0 additions & 4 deletions Src/Base/AMReX_GpuQualifiers.H
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@

# include <CL/sycl.hpp>

namespace amrex {
namespace oneapi = sycl::ext::oneapi;
}

# define AMREX_REQUIRE_SUBGROUP_SIZE(x) \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Wattributes\"") \
Expand Down
8 changes: 4 additions & 4 deletions Src/Base/AMReX_GpuReduce.H
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ template <int warpSize, typename T, typename F>
struct warpReduce
{
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T operator() (T x, amrex::oneapi::sub_group const& sg) const noexcept
T operator() (T x, sycl::sub_group const& sg) const noexcept
{
for (int offset = warpSize/2; offset > 0; offset /= 2) {
T y = sg.shuffle_down(x, offset);
T y = sycl::shift_group_left(sg, x, offset);
x = F()(x,y);
}
return x;
Expand All @@ -71,7 +71,7 @@ T blockReduce (T x, WARPREDUCE && warp_reduce, T x0, Gpu::Handler const& h)
{
T* shared = (T*)h.local;
int tid = h.item->get_local_id(0);
amrex::oneapi::sub_group const& sg = h.item->get_sub_group();
sycl::sub_group const& sg = h.item->get_sub_group();
int lane = sg.get_local_id()[0];
int wid = sg.get_group_id()[0];
int numwarps = sg.get_group_range()[0];
Expand All @@ -94,7 +94,7 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void blockReduce_partial (T* dest, T x, WARPREDUCE && warp_reduce, ATOMICOP && atomic_op,
Gpu::Handler const& handler)
{
amrex::oneapi::sub_group const& sg = handler.item->get_sub_group();
sycl::sub_group const& sg = handler.item->get_sub_group();
int wid = sg.get_group_id()[0];
if ((wid+1)*warpSize <= handler.numActiveThreads) {
x = warp_reduce(x, sg); // full warp
Expand Down
22 changes: 11 additions & 11 deletions Src/Base/AMReX_Scan.H
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
amrex::launch(nblocks, nthreads, sm, stream,
[=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept
{
amrex::oneapi::sub_group const& sg = gh.item->get_sub_group();
sycl::sub_group const& sg = gh.item->get_sub_group();
int lane = sg.get_local_id()[0];
int warp = sg.get_group_id()[0];
int nwarps = sg.get_group_range()[0];
Expand Down Expand Up @@ -226,7 +226,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
T x = x0;
// Scan within a warp
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(x, i);
T s = sycl::shift_group_right(sg, x, i);
if (lane >= i) x += s;
}

Expand All @@ -244,7 +244,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
if (warp == 0) {
T y = (lane < nwarps) ? shared[lane] : 0;
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(y, i);
T s = sycl::shift_group_right(sg, y, i);
if (lane >= i) y += s;
}

Expand Down Expand Up @@ -277,7 +277,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
amrex::launch(1, nthreads, sm, stream,
[=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept
{
amrex::oneapi::sub_group const& sg = gh.item->get_sub_group();
sycl::sub_group const& sg = gh.item->get_sub_group();
int lane = sg.get_local_id()[0];
int warp = sg.get_group_id()[0];
int nwarps = sg.get_group_range()[0];
Expand All @@ -293,7 +293,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
T x = (offset < nblocks) ? blocksum_p[offset] : 0;
// Scan within a warp
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(x, i);
T s = sycl::shift_group_right(sg, x, i);
if (lane >= i) x += s;
}

Expand All @@ -311,7 +311,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
if (warp == 0) {
T y = (lane < nwarps) ? shared[lane] : 0;
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(y, i);
T s = sycl::shift_group_right(sg, y, i);
if (lane >= i) y += s;
}

Expand Down Expand Up @@ -417,7 +417,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
amrex::launch(nblocks, nthreads, sm, stream,
[=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept
{
amrex::oneapi::sub_group const& sg = gh.item->get_sub_group();
sycl::sub_group const& sg = gh.item->get_sub_group();
int lane = sg.get_local_id()[0];
int warp = sg.get_group_id()[0];
int nwarps = sg.get_group_range()[0];
Expand Down Expand Up @@ -472,7 +472,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
T x = x0;
// Scan within a warp
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(x, i);
T s = sycl::shift_group_right(sg, x, i);
if (lane >= i) x += s;
}

Expand All @@ -490,7 +490,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
if (warp == 0) {
T y = (lane < nwarps) ? shared[lane] : 0;
for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
T s = sg.shuffle_up(y, i);
T s = sycl::shift_group_right(sg, y, i);
if (lane >= i) y += s;
}

Expand Down Expand Up @@ -543,7 +543,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
// implement our own __ballot
unsigned status_bf = (stva.status == 'p') ? (0x1u << lane) : 0;
for (int i = 1; i < Gpu::Device::warp_size; i *= 2) {
status_bf |= sg.shuffle_xor(status_bf, i);
status_bf |= sycl::permute_group_by_xor(sg, status_bf, i);
}

bool stop_lookback = status_bf & 0x1u;
Expand All @@ -563,7 +563,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
}

for (int i = Gpu::Device::warp_size/2; i > 0; i /= 2) {
x += sg.shuffle_down(x,i);
x += sycl::shift_group_left(sg, x,i);
}
}

Expand Down

0 comments on commit d295f22

Please sign in to comment.