Skip to content

Commit

Permalink
vec/qf - initial valid/borrowed/owned split for data
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremylt committed Dec 7, 2021
1 parent 239c650 commit 99553ae
Show file tree
Hide file tree
Showing 9 changed files with 507 additions and 532 deletions.
149 changes: 85 additions & 64 deletions backends/cuda/ceed-cuda-qfunctioncontext.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ static inline size_t bytes(const CeedQFunctionContext ctx) {
//------------------------------------------------------------------------------
// Sync host to device
//------------------------------------------------------------------------------
static inline int CeedQFunctionContextSyncH2D_Cuda(
const CeedQFunctionContext ctx) {
static inline int CeedQFunctionContextSyncH2D_Cuda(const CeedQFunctionContext
ctx) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
Expand All @@ -43,14 +43,15 @@ static inline int CeedQFunctionContextSyncH2D_Cuda(

ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx),
cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr);

return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Sync device to host
//------------------------------------------------------------------------------
static inline int CeedQFunctionContextSyncD2H_Cuda(
const CeedQFunctionContext ctx) {
static inline int CeedQFunctionContextSyncD2H_Cuda(const CeedQFunctionContext
ctx) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
Expand All @@ -59,6 +60,7 @@ static inline int CeedQFunctionContextSyncD2H_Cuda(

ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx),
cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr);

return CEED_ERROR_SUCCESS;
}

Expand All @@ -72,62 +74,63 @@ static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx,
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);

impl->d_data = NULL;
ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
switch (cmode) {
case CEED_COPY_VALUES: {
if(!impl->h_data) {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); CeedChkBackend(ierr);
impl->h_data = impl->h_data_allocated;
}
ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr);
impl->h_data_borrowed = NULL;
impl->h_data = impl->h_data_owned;
memcpy(impl->h_data, data, bytes(ctx));
} break;
case CEED_OWN_POINTER:
ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr);
impl->h_data_allocated = data;
impl->h_data_owned = data;
impl->h_data_borrowed = NULL;
impl->h_data = data;
break;
case CEED_USE_POINTER:
ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr);
impl->h_data_borrowed = data;
impl->h_data = data;
break;
}
impl->memState = CEED_CUDA_HOST_SYNC;

return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Set data from device
//------------------------------------------------------------------------------
static int CeedQFunctionContextSetDataDevice_Cuda(const CeedQFunctionContext
ctx,
const CeedCopyMode cmode, CeedScalar *data) {
ctx, const CeedCopyMode cmode, CeedScalar *data) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);

impl->h_data = NULL;
ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
switch (cmode) {
case CEED_COPY_VALUES:
if (!impl->d_data) {
ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data = impl->d_data_allocated;
}
ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data_borrowed = NULL;
impl->d_data = impl->d_data_owned;
ierr = cudaMemcpy(impl->d_data, data, bytes(ctx),
cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr);
break;
case CEED_OWN_POINTER:
ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr);
impl->d_data_allocated = data;
impl->d_data_owned = data;
impl->d_data_borrowed = NULL;
impl->d_data = data;
break;
case CEED_USE_POINTER:
ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr);
impl->d_data_allocated = NULL;
impl->d_data_owned = NULL;
impl->d_data_borrowed = data;
impl->d_data = data;
break;
}
impl->memState = CEED_CUDA_DEVICE_SYNC;

return CEED_ERROR_SUCCESS;
}

Expand All @@ -147,7 +150,8 @@ static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
case CEED_MEM_DEVICE:
return CeedQFunctionContextSetDataDevice_Cuda(ctx, cmode, data);
}
return 1;

return CEED_ERROR_UNSUPPORTED;
}

//------------------------------------------------------------------------------
Expand All @@ -160,42 +164,46 @@ static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx,
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
if(impl->h_data == NULL && impl->d_data == NULL)

if (!impl->h_data && !impl->d_data)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set");
// LCOV_EXCL_STOP

// Sync array to requested memtype and update pointer
switch (mtype) {
case CEED_MEM_HOST:
if (impl->h_data == NULL) {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated);
CeedChkBackend(ierr);
impl->h_data = impl->h_data_allocated;
}
if (impl->memState == CEED_CUDA_DEVICE_SYNC) {
if (!impl->h_data_borrowed)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"No host context data set with CeedQFunctionContextSetData and CEED_USE_POINTER");
// LCOV_EXCL_STOP

if (!impl->h_data && impl->d_data) {
impl->h_data = impl->h_data_borrowed;
ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_CUDA_HOST_SYNC;
*(void **)data = impl->h_data;
*(void **)data = impl->h_data_borrowed;
impl->h_data_borrowed = NULL;
impl->h_data = NULL;
impl->h_data_allocated = NULL;
break;
case CEED_MEM_DEVICE:
if (impl->d_data == NULL) {
ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data = impl->d_data_allocated;
}
if (impl->memState == CEED_CUDA_HOST_SYNC) {
if (!impl->d_data_borrowed)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"No device context data set with CeedQFunctionContextSetData and CEED_USE_POINTER");
// LCOV_EXCL_STOP

if (!impl->d_data && impl->h_data) {
impl->d_data = impl->d_data_borrowed;
ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_CUDA_DEVICE_SYNC;
*(void **)data = impl->d_data;
*(void **)data = impl->d_data_borrowed;
impl->d_data_borrowed = NULL;
impl->d_data = NULL;
impl->d_data_allocated = NULL;
break;
}

return CEED_ERROR_SUCCESS;
}

Expand All @@ -209,38 +217,49 @@ static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx,
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
if(impl->h_data == NULL && impl->d_data == NULL)
if (!impl->h_data && !impl->d_data)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set");
// LCOV_EXCL_STOP

// Sync array to requested memtype and update pointer
switch (mtype) {
case CEED_MEM_HOST:
if (impl->h_data == NULL) {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated);
CeedChkBackend(ierr);
impl->h_data = impl->h_data_allocated;
if (!impl->h_data) {
if (impl->h_data_borrowed) {
impl->h_data = impl->h_data_borrowed;
} else if (impl->h_data_owned) {
impl->h_data = impl->h_data_owned;
} else {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned);
CeedChkBackend(ierr);
impl->h_data = impl->h_data_owned;
}
if (impl->d_data) {
ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr);
}
}
if (impl->memState == CEED_CUDA_DEVICE_SYNC) {
ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_CUDA_HOST_SYNC;
*(void **)data = impl->h_data;
break;
case CEED_MEM_DEVICE:
if (impl->d_data == NULL) {
ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data = impl->d_data_allocated;
}
if (impl->memState == CEED_CUDA_HOST_SYNC) {
ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr);
if (!impl->d_data) {
if (impl->d_data_borrowed) {
impl->d_data = impl->d_data_borrowed;
} else if (impl->d_data_owned) {
impl->d_data = impl->d_data_owned;
} else {
ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data = impl->d_data_owned;
}
if (impl->h_data) {
ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr);
}
}
impl->memState = CEED_CUDA_DEVICE_SYNC;
*(void **)data = impl->d_data;
break;
}

return CEED_ERROR_SUCCESS;
}

Expand All @@ -262,9 +281,10 @@ static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) {
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);

ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr);
ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr);
ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr);
ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr);
ierr = CeedFree(&impl); CeedChkBackend(ierr);

return CEED_ERROR_SUCCESS;
}

Expand All @@ -287,9 +307,10 @@ int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) {
CeedQFunctionContextRestoreData_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy",
CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr);

ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
impl->memState = CEED_CUDA_NONE_SYNC;
ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr);

return CEED_ERROR_SUCCESS;
}
//------------------------------------------------------------------------------
Loading

0 comments on commit 99553ae

Please sign in to comment.