Skip to content
Permalink
Browse files

Update cublas demo with renamed copy functions

  • Loading branch information...
biddisco committed Oct 8, 2019
1 parent 8300df4 commit e331330b6a536019776078b68b8f2870f5dff718
Showing with 15 additions and 15 deletions.
  1. +3 −3 examples/compute/cuda/cublas_matmul.cpp
  2. +12 −12 examples/compute/cuda/cuda_future_helper.h
@@ -251,10 +251,10 @@ void matrixMultiply(sMatrixSize &matrix_size, std::size_t device, std::size_t it

// adding async copy operations into the stream before cublas calls puts
// the copies in the queue before the matrix operations.
cublas.copy_apply(
cublas.memcpy_apply(
d_A, h_A.data(), size_A*sizeof(T), cudaMemcpyHostToDevice);

auto copy_future = cublas.copy_async(
auto copy_future = cublas.memcpy_async(
d_B, h_B.data(), size_B*sizeof(T), cudaMemcpyHostToDevice);

// we can call get_future multiple times on the cublas helper.
@@ -312,7 +312,7 @@ void matrixMultiply(sMatrixSize &matrix_size, std::size_t device, std::size_t it

#ifndef HPX_CUBLAS_DEMO_WITH_ALLOCATOR
// when the matrix operations complete, copy the result to the host
auto copy_finished = cublas.copy_async(
auto copy_finished = cublas.memcpy_async(
h_CUBLAS.data(), d_C, size_C*sizeof(T), cudaMemcpyDeviceToHost);

#endif
@@ -254,35 +254,35 @@ namespace hpx { namespace compute { namespace util
}

// -------------------------------------------------------------------------
// utility function for setting memory on the GPU, async and apply versions
// utility function for memory copies to/from the GPU, async and apply versions
template <typename... Args>
hpx::future<void> memset_async(Args&&... args) {
return async(cudaMemsetAsync, std::forward<Args>(args)...);
hpx::future<void> memcpy_async(Args&&... args) {
return async(cudaMemcpyAsync, std::forward<Args>(args)...);
}

template <typename... Args>
auto memset_apply(Args&&... args)
auto memcpy_apply(Args&&... args)
#if !defined(HPX_HAVE_CXX14_RETURN_TYPE_DEDUCTION)
-> decltype(apply(cudaMemsetAsync, std::forward<Args>(args)...))
-> decltype(apply(cudaMemcpyAsync, std::forward<Args>(args)...))
#endif
{
return apply(cudaMemsetAsync, std::forward<Args>(args)...);
return apply(cudaMemcpyAsync, std::forward<Args>(args)...);
}

// -------------------------------------------------------------------------
// utility function for memory copies to/from the GPU, async and apply versions
// utility function for setting memory on the GPU, async and apply versions
template <typename... Args>
hpx::future<void> memcpy_async(Args&&... args) {
return async(cudaMemcpyAsync, std::forward<Args>(args)...);
hpx::future<void> memset_async(Args&&... args) {
return async(cudaMemsetAsync, std::forward<Args>(args)...);
}

template <typename... Args>
auto memcpy_apply(Args&&... args)
auto memset_apply(Args&&... args)
#if !defined(HPX_HAVE_CXX14_RETURN_TYPE_DEDUCTION)
-> decltype(apply(cudaMemcpyAsync, std::forward<Args>(args)...))
-> decltype(apply(cudaMemsetAsync, std::forward<Args>(args)...))
#endif
{
return apply(cudaMemcpyAsync, std::forward<Args>(args)...);
return apply(cudaMemsetAsync, std::forward<Args>(args)...);
}

// -------------------------------------------------------------------------

0 comments on commit e331330

Please sign in to comment.
You can’t perform that action at this time.