Skip to content

Commit

Permalink
Get things compiling / linking using driver API
Browse files Browse the repository at this point in the history
Change-Id: I24d9d9510c8164dea36d83028b3c4bdbddbe2d85
  • Loading branch information
wesm committed Aug 28, 2017
1 parent 5d686fe commit f3c724e
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 51 deletions.
26 changes: 7 additions & 19 deletions cpp/src/arrow/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,28 +79,16 @@ set(ARROW_GPU_SRCS

set(ARROW_GPU_SHARED_LINK_LIBS
arrow_shared
${CUDA_LIBRARIES}
${CUDA_CUDA_LIBRARY}
)

add_library(arrow_gpu_objlib OBJECT
${ARROW_GPU_SRCS}
ADD_ARROW_LIB(arrow_gpu
SOURCES ${ARROW_GPU_SRCS}
SHARED_LINK_FLAGS ""
SHARED_LINK_LIBS ${ARROW_GPU_SHARED_LINK_LIBS}
STATIC_LINK_LIBS ""
)
set_property(TARGET arrow_gpu_objlib PROPERTY POSITION_INDEPENDENT_CODE 1)

if (ARROW_BUILD_SHARED)
cuda_add_library(arrow_gpu_shared SHARED $<TARGET_OBJECTS:arrow_gpu_objlib>)
install(TARGETS arrow_gpu_shared
RUNTIME DESTINATION bin
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

if (ARROW_BUILD_STATIC)
add_library(arrow_gpu_static STATIC $<TARGET_OBJECTS:arrow_gpu_objlib>)
install(TARGETS arrow_gpu_static
RUNTIME DESTINATION bin
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

install(FILES
cuda_common.h
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/gpu/cuda-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t tot
CudaDeviceManager* manager;
ABORT_NOT_OK(CudaDeviceManager::GetInstance(&manager));
std::shared_ptr<CudaContext> context;
ABORT_NOT_OK(manager->CreateContext(kGpuNumber, &context));
ABORT_NOT_OK(manager->GetContext(kGpuNumber, &context));

std::shared_ptr<CudaBuffer> device_buffer;
ABORT_NOT_OK(context->Allocate(total_bytes, &device_buffer));
ABORT_NOT_OK(AllocateCudaBuffer(total_bytes, context, &device_buffer));
CudaBufferWriter writer(device_buffer);

if (buffer_size > 0) {
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/arrow/gpu/cuda-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TestCudaBufferBase : public ::testing::Test {
public:
void SetUp() {
ASSERT_OK(CudaDeviceManager::GetInstance(&manager_));
ASSERT_OK(manager_->CreateContext(kGpuNumber, &context_));
ASSERT_OK(manager_->GetContext(kGpuNumber, &context_));
}

protected:
Expand All @@ -53,7 +53,7 @@ class TestCudaBuffer : public TestCudaBufferBase {
TEST_F(TestCudaBuffer, Allocate) {
const int64_t kSize = 100;
std::shared_ptr<CudaBuffer> buffer;
ASSERT_OK(context_->Allocate(kSize, &buffer));
ASSERT_OK(AllocateCudaBuffer(kSize, context_, &buffer));
ASSERT_EQ(kSize, buffer->size());
}

Expand All @@ -68,7 +68,7 @@ void AssertCudaBufferEquals(const CudaBuffer& buffer, const uint8_t* host_data,
TEST_F(TestCudaBuffer, CopyFromHost) {
const int64_t kSize = 1000;
std::shared_ptr<CudaBuffer> device_buffer;
ASSERT_OK(context_->Allocate(kSize, &device_buffer));
ASSERT_OK(AllocateCudaBuffer(kSize, context_, &device_buffer));

std::shared_ptr<PoolBuffer> host_buffer;
ASSERT_OK(test::MakeRandomBytePoolBuffer(kSize, default_memory_pool(), &host_buffer));
Expand All @@ -86,7 +86,7 @@ class TestCudaBufferWriter : public TestCudaBufferBase {
}

void Allocate(const int64_t size) {
ASSERT_OK(context_->Allocate(size, &device_buffer_));
ASSERT_OK(AllocateCudaBuffer(size, context_, &device_buffer_));
writer_.reset(new CudaBufferWriter(device_buffer_));
}

Expand Down Expand Up @@ -198,7 +198,7 @@ TEST_F(TestCudaBufferReader, Basics) {
std::shared_ptr<CudaBuffer> device_buffer;

const int64_t size = 1000;
ASSERT_OK(context_->Allocate(size, &device_buffer));
ASSERT_OK(AllocateCudaBuffer(size, context_, &device_buffer));

std::shared_ptr<PoolBuffer> buffer;
ASSERT_OK(test::MakeRandomBytePoolBuffer(1000, default_memory_pool(), &buffer));
Expand Down
63 changes: 44 additions & 19 deletions cpp/src/arrow/gpu/cuda_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,14 @@
#include <cstdint>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include <cuda.h>

#include "arrow/gpu/cuda_common.h"
#include "arrow/gpu/cuda_memory.h"

namespace arrow {
namespace gpu {

Expand All @@ -39,7 +44,7 @@ class CudaContext::CudaContextImpl {

Status Init(const CudaDevice& device) {
device_ = device;
CU_RETURN_NOT_OK(cuCtxCreate(context_, 0, device_.handle));
CU_RETURN_NOT_OK(cuCtxCreate(&context_, 0, device_.handle));
is_open_ = true;
return Status::OK();
}
Expand All @@ -58,22 +63,23 @@ class CudaContext::CudaContextImpl {
CU_RETURN_NOT_OK(cuCtxSetCurrent(context_));

CUdeviceptr data;
CU_RETURN_NOT_OK(cuMemAlloc(&data, nbytes));
CU_RETURN_NOT_OK(cuMemAlloc(&data, static_cast<size_t>(nbytes)));
*out = reinterpret_cast<uint8_t*>(data);
return Status::OK();
}

Status CopyHostToDevice(uint8_t* dst, const uint8_t* src, int64_t nbytes) {
CU_RETURN_NOT_OK(cuCtxSetCurrent(context_));
CU_RETURN_NOT_OK(cuMemcpyDtoH(reinterpret_cast<CUdeviceptr>(dst),
src, nbytes));
return Statsu::OK();
CU_RETURN_NOT_OK(cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(dst),
reinterpret_cast<const void*>(src),
static_cast<size_t>(nbytes)));
return Status::OK();
}

Status CopyDeviceToHost(uint8_t* dst, const uint8_t* src, int64_t nbytes) {
CU_RETURN_NOT_OK(cuCtxSetCurrent(context_));
CU_RETURN_NOT_OK(cuMemcpyHtoD(src, reinterpret_cast<const CUdeviceptr>(src),
nbytes));
CU_RETURN_NOT_OK(cuMemcpyDtoH(dst, reinterpret_cast<const CUdeviceptr>(src),
static_cast<size_t>(nbytes)));
return Status::OK();
}

Expand Down Expand Up @@ -113,7 +119,8 @@ class CudaDeviceManager::CudaDeviceManagerImpl {

Status AllocateHost(int64_t nbytes, uint8_t** out) {
CU_RETURN_NOT_OK(cuMemHostAlloc(reinterpret_cast<void**>(out),
nbytes, CU_MEMHOSTALLOC_PORTABLE));
static_cast<size_t>(nbytes),
CU_MEMHOSTALLOC_PORTABLE));
host_bytes_allocated_ += nbytes;
return Status::OK();
}
Expand All @@ -134,9 +141,16 @@ class CudaDeviceManager::CudaDeviceManagerImpl {
return Status::OK();
}

Status CreateContext(int device_number, std::shared_ptr<CudaContext>* out) {
*out = std::shared_ptr<CudaContext>(new CudaContext());
return (*out)->impl_->Init(devices_[i]);
Status GetContext(int device_number, std::shared_ptr<CudaContext>* out) {
auto it = contexts_.find(device_number);
if (it == contexts_.end()) {
auto ctx = std::shared_ptr<CudaContext>(new CudaContext());
RETURN_NOT_OK(ctx->impl_->Init(devices_[device_number]));
contexts_[device_number] = *out = ctx;
} else {
*out = it->second;
}
return Status::OK();
}

int num_devices() const { return num_devices_; }
Expand All @@ -145,13 +159,18 @@ class CudaDeviceManager::CudaDeviceManagerImpl {
int num_devices_;
std::vector<CudaDevice> devices_;

// device_number -> CudaContext
std::unordered_map<int, std::shared_ptr<CudaContext>> contexts_;

int host_bytes_allocated_;
};

CudaDeviceManager::CudaDeviceManager() {
impl_.reset(new CudaDeviceManagerImpl());
}

std::unique_ptr<CudaDeviceManager> CudaDeviceManager::instance_ = nullptr;

Status CudaDeviceManager::GetInstance(CudaDeviceManager** manager) {
if (!instance_) {
instance_.reset(new CudaDeviceManager());
Expand All @@ -161,21 +180,21 @@ Status CudaDeviceManager::GetInstance(CudaDeviceManager** manager) {
return Status::OK();
}

Status CudaDeviceManager::Create(int device_number,
std::shared_ptr<CudaContext>* out) {
return impl_->Create(device_number, out);
Status CudaDeviceManager::GetContext(int device_number,
std::shared_ptr<CudaContext>* out) {
return impl_->GetContext(device_number, out);
}

Status CudaDeviceManager::AllocateHost(int64_t nbytes,
std::shared_ptr<CudaHostBuffer>* out) {
uint8_t* data;
uint8_t* data = nullptr;
RETURN_NOT_OK(impl_->AllocateHost(nbytes, &data));
*out = std::shared_ptr<CudaHostBuffer>(data, nbytes);
*out = std::make_shared<CudaHostBuffer>(data, nbytes);
return Status::OK();
}

Status CudaDeviceManager::FreeHost(uint8_t* data, int64_t nbytes) {
return impl_->FreeHost(data, nbytes));
return impl_->FreeHost(data, nbytes);
}

int CudaDeviceManager::num_devices() const {
Expand All @@ -185,8 +204,14 @@ int CudaDeviceManager::num_devices() const {
// ----------------------------------------------------------------------
// CudaContext public API

Status CudaContext::Allocate(int64_t nbytes, std::shared_ptr<CudaBuffer>* out) {
return impl_->AllocateHost(nbytes, out);
CudaContext::CudaContext() {
impl_.reset(new CudaContextImpl());
}

CudaContext::~CudaContext() {}

Status CudaContext::Allocate(int64_t nbytes, uint8_t** out) {
return impl_->Allocate(nbytes, out);
}

Status CudaContext::CopyHostToDevice(uint8_t* dst, const uint8_t* src, int64_t nbytes) {
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/gpu/cuda_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,17 @@ class ARROW_EXPORT CudaDeviceManager {
public:
static Status GetInstance(CudaDeviceManager** manager);

/// \brief Create a CUDA driver context for a particular device
Status CreateContext(int gpu_number, std::shared_ptr<CudaContext>* ctx);
/// \brief Get the CUDA driver context for a particular device
Status GetContext(int gpu_number, std::shared_ptr<CudaContext>* ctx);

Status AllocateHost(int64_t nbytes, std::shared_ptr<CudaHostBuffer>* buffer);
Status FreeHost(uint8_t* data, int64_t nbytes);

int num_devices() const;

private:
std::unique_ptr<CudaDeviceManager> instance_;
CudaDeviceManager();
static std::unique_ptr<CudaDeviceManager> instance_;

class CudaDeviceManagerImpl;
std::unique_ptr<CudaDeviceManagerImpl> impl_;
Expand All @@ -67,7 +68,7 @@ class ARROW_EXPORT CudaContext {
Status CopyHostToDevice(uint8_t* dst, const uint8_t* src, int64_t nbytes);
Status CopyDeviceToHost(uint8_t* dst, const uint8_t* src, int64_t nbytes);

Status Allocate(int64_t nbytes, std::shared_ptr<CudaBuffer>* buffer);
Status Allocate(int64_t nbytes, uint8_t** out);
Status Free(uint8_t* device_ptr, int64_t nbytes);

int64_t bytes_allocated() const;
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/arrow/gpu/cuda_memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,12 @@ Status CudaBuffer::CopyFromHost(const int64_t position, const uint8_t* data,
}

Status AllocateCudaBuffer(const int64_t size,
std::shared_ptr<CudaContext>& context,
const std::shared_ptr<CudaContext>& context,
std::shared_ptr<CudaBuffer>* out) {
return context->Allocate(size, out);
uint8_t* data = nullptr;
RETURN_NOT_OK(context->Allocate(size, &data));
*out = std::make_shared<CudaBuffer>(data, size, context);
return Status::OK();
}

CudaHostBuffer::~CudaHostBuffer() {
Expand Down

0 comments on commit f3c724e

Please sign in to comment.