Skip to content

Commit

Permalink
Working on fixing accelerator support in HPX.Compute, still non-funct…
Browse files Browse the repository at this point in the history
…ional, though

- adding more HPX_HOST_DEVICE annotations
- adding explicit HPX_HOST_DEVICE copy/move constructors
  • Loading branch information
hkaiser committed Jul 4, 2016
1 parent 57a960a commit 516da8e
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 64 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -1004,7 +1004,7 @@ if(HPX_WITH_CUDA AND NOT HPX_WITH_CUDA_CLANG)
else()
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};-D_DEBUG;-O0;-g;-Xcompiler=-MDd,-Od,-Zi)
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};-D_DEBUG;-O0;-g;-G;-Xcompiler=-MDd;-Xcompiler=-Od;-Xcompiler=-Zi)
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ${CUDA_NVCC_FLAGS_RELWITHDEBINFO};-DNDEBUG;-O2;-g;-Xcompiler=-MD,-O2,-Zi)
set(CUDA_NVCC_FLAGS_MINSIZEREL ${CUDA_NVCC_FLAGS_MINSIZEREL};-DNDEBUG;-O1;-Xcompiler=-MD,-O1)
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE};-DNDEBUG;-O2;-Xcompiler=-MD,-Ox)
Expand Down
6 changes: 3 additions & 3 deletions hpx/compute/cuda/allocator.hpp
Expand Up @@ -170,7 +170,7 @@ namespace hpx { namespace compute { namespace cuda

detail::launch(
*target_, num_blocks, threads_per_block,
[] __device__ (T* p, std::size_t count, Args const&... args)
[] HPX_DEVICE (T* p, std::size_t count, Args const&... args)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < count)
Expand All @@ -189,7 +189,7 @@ namespace hpx { namespace compute { namespace cuda
{
detail::launch(
*target_, 1, 1,
[] __device__ (T* p, Args const&... args)
[] HPX_DEVICE (T* p, Args const&... args)
{
::new (p) T (std::forward<Args>(args)...);
},
Expand All @@ -206,7 +206,7 @@ namespace hpx { namespace compute { namespace cuda

detail::launch(
*target_, num_blocks, threads_per_block,
[] __device__ (T* p, std::size_t count)
[] HPX_DEVICE (T* p, std::size_t count)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < count)
Expand Down
4 changes: 1 addition & 3 deletions hpx/compute/cuda/default_executor.hpp
Expand Up @@ -105,11 +105,9 @@ namespace hpx { namespace compute { namespace cuda
shape_container_type shape_container(
boost::begin(shape), boost::end(shape), alloc_type(target_));

value_type const* p = &(*boost::begin(shape));
detail::launch(
target_, num_blocks, threads_per_block,
[] HPX_DEVICE (F f, value_type * p,
std::size_t count, Ts&... ts)
[] HPX_DEVICE (F f, value_type* p, std::size_t count, Ts const&... ts)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < count)
Expand Down
20 changes: 19 additions & 1 deletion hpx/compute/cuda/detail/launch.hpp
Expand Up @@ -23,6 +23,7 @@
#include <cstring>
#endif
#include <string>
#include <utility>

namespace hpx { namespace compute { namespace cuda { namespace detail
{
Expand All @@ -43,6 +44,24 @@ namespace hpx { namespace compute { namespace cuda { namespace detail
fun_type f_;
args_type args_;

HPX_HOST_DEVICE closure(fun_type && f, args_type && args)
: f_(std::move(f))
, args_(std::move(args))
{}

HPX_HOST_DEVICE closure(closure const& rhs)
: f_(rhs.f_)
, args_(rhs.args_)
{}

HPX_HOST_DEVICE closure(closure && rhs)
: f_(std::move(rhs.f_))
, args_(std::move(rhs.args_))
{}

HPX_DELETE_COPY_ASSIGN(closure);
HPX_DELETE_MOVE_ASSIGN(closure);

HPX_HOST_DEVICE void operator()()
{
// FIXME: is it possible to move the arguments?
Expand Down Expand Up @@ -100,7 +119,6 @@ namespace hpx { namespace compute { namespace cuda { namespace detail
}
};


// Launch any given function F with the given parameters. This function
// does not involve any device synchronization.
template <typename DimType, typename F, typename ...Ts>
Expand Down
30 changes: 16 additions & 14 deletions hpx/compute/cuda/target.hpp
Expand Up @@ -53,22 +53,24 @@ namespace hpx { namespace compute { namespace cuda

HPX_MOVABLE_ONLY(native_handle_type);

native_handle_type(int device = 0);
HPX_HOST_DEVICE native_handle_type(int device = 0);

~native_handle_type();
HPX_HOST_DEVICE ~native_handle_type();

native_handle_type(native_handle_type && rhs) HPX_NOEXCEPT;
HPX_HOST_DEVICE native_handle_type(
native_handle_type && rhs) HPX_NOEXCEPT;

native_handle_type& operator=(native_handle_type && rhs) HPX_NOEXCEPT;
HPX_HOST_DEVICE native_handle_type&
operator=(native_handle_type && rhs) HPX_NOEXCEPT;

cudaStream_t get_stream() const;
HPX_HOST_DEVICE cudaStream_t get_stream() const;

int get_device() const HPX_NOEXCEPT
HPX_HOST_DEVICE int get_device() const HPX_NOEXCEPT
{
return device_;
}

hpx::id_type const& get_locality() const HPX_NOEXCEPT
HPX_HOST_DEVICE hpx::id_type const& get_locality() const HPX_NOEXCEPT
{
return locality_;
}
Expand All @@ -83,31 +85,31 @@ namespace hpx { namespace compute { namespace cuda
};

// Constructs default target
target() HPX_NOEXCEPT {}
HPX_HOST_DEVICE target() HPX_NOEXCEPT {}

// Constructs target from a given device ID
explicit target(int device)
explicit HPX_HOST_DEVICE target(int device)
: handle_(device)
{}

target(target && rhs) HPX_NOEXCEPT
HPX_HOST_DEVICE target(target && rhs) HPX_NOEXCEPT
: handle_(std::move(rhs.handle_))
{}

target& operator=(target && rhs) HPX_NOEXCEPT
HPX_HOST_DEVICE target& operator=(target && rhs) HPX_NOEXCEPT
{
handle_ = std::move(rhs.handle_);
return *this;
}

native_handle_type const& native_handle() const
HPX_HOST_DEVICE native_handle_type const& native_handle() const
{
return handle_;
}

void synchronize() const;
HPX_HOST_DEVICE void synchronize() const;

hpx::future<void> get_future() const;
HPX_HOST_DEVICE hpx::future<void> get_future() const;

private:
#if !defined(__CUDA_ARCH__)
Expand Down
46 changes: 40 additions & 6 deletions hpx/compute/cuda/target_ptr.hpp
Expand Up @@ -39,7 +39,7 @@ namespace hpx { namespace compute { namespace cuda
#endif
typedef std::ptrdiff_t difference_type;

target_ptr()
HPX_HOST_DEVICE target_ptr()
: p_(nullptr)
, tgt_(nullptr)
{}
Expand All @@ -49,20 +49,37 @@ namespace hpx { namespace compute { namespace cuda
, tgt_(&tgt)
{}

HPX_HOST_DEVICE
target_ptr(target_ptr const& rhs)
: p_(rhs.p_)
, tgt_(rhs.tgt_)
{}

HPX_HOST_DEVICE
target_ptr& operator=(target_ptr const& rhs)
{
p_ = rhs.p_;
tgt_ = rhs.tgt_;
return *this;
}

HPX_HOST_DEVICE
target_ptr const& operator++()
{
HPX_ASSERT(p_);
++p_;
return *this;
}

HPX_HOST_DEVICE
target_ptr const& operator--()
{
HPX_ASSERT(p_);
--p_;
return *this;
}

HPX_HOST_DEVICE
target_ptr operator++(int)
{
target_ptr tmp(*this);
Expand All @@ -71,6 +88,7 @@ namespace hpx { namespace compute { namespace cuda
return tmp;
}

HPX_HOST_DEVICE
target_ptr operator--(int)
{
target_ptr tmp(*this);
Expand All @@ -79,85 +97,101 @@ namespace hpx { namespace compute { namespace cuda
return tmp;
}

HPX_HOST_DEVICE
explicit operator bool() const
{
return p_ != nullptr;
}

HPX_HOST_DEVICE
friend bool operator==(target_ptr const& lhs, std::nullptr_t)
{
return lhs.p_ == nullptr;
}

HPX_HOST_DEVICE
friend bool operator!=(target_ptr const& lhs, std::nullptr_t)
{
return lhs.p_ != nullptr;
}

HPX_HOST_DEVICE
friend bool operator==(std::nullptr_t, target_ptr const& rhs)
{
return nullptr == rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator!=(std::nullptr_t, target_ptr const& rhs)
{
return nullptr != rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator==(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ == rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator!=(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ != rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator<(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ < rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator>(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ > rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator<=(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ <= rhs.p_;
}

HPX_HOST_DEVICE
friend bool operator>=(target_ptr const& lhs, target_ptr const& rhs)
{
return lhs.p_ >= rhs.p_;
}

HPX_HOST_DEVICE
target_ptr& operator+=(std::ptrdiff_t offset)
{
HPX_ASSERT(p_);
p_ += offset;
return *this;
}

HPX_HOST_DEVICE
target_ptr& operator-=(std::ptrdiff_t offset)
{
HPX_ASSERT(p_);
p_ -= offset;
return *this;
}

HPX_HOST_DEVICE
std::ptrdiff_t operator-(target_ptr const& other) const
{
return p_ - other.p_;
}

HPX_HOST_DEVICE
target_ptr operator-(std::ptrdiff_t offset) const
{
return target_ptr(p_ - offset, *tgt_);
}

HPX_HOST_DEVICE
target_ptr operator+(std::ptrdiff_t offset) const
{
return target_ptr(p_ + offset, *tgt_);
Expand All @@ -169,27 +203,27 @@ namespace hpx { namespace compute { namespace cuda
// return *p_;
// }

T& operator*()
HPX_DEVICE T& operator*()
{
return *p_;
}

T const& operator[](std::ptrdiff_t offset) const
HPX_DEVICE T const& operator[](std::ptrdiff_t offset) const
{
return *(p_ + offset);
}

T& operator[](std::ptrdiff_t offset)
HPX_DEVICE T& operator[](std::ptrdiff_t offset)
{
return *(p_ + offset);
}

operator T*() const
HPX_DEVICE operator T*() const
{
return p_;
}

T* operator->() const
HPX_DEVICE T* operator->() const
{
return p_;
}
Expand Down

0 comments on commit 516da8e

Please sign in to comment.