diff --git a/backends/blocked/ceed-blocked-operator.c b/backends/blocked/ceed-blocked-operator.c index 17d2c11ab4..116d400f34 100644 --- a/backends/blocked/ceed-blocked-operator.c +++ b/backends/blocked/ceed-blocked-operator.c @@ -483,8 +483,9 @@ static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, // Output Evecs for (CeedInt i=0; ie_vecs_full[i+impl->num_inputs], CEED_MEM_HOST, - &e_data_full[i + num_input_fields]); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs], + CEED_MEM_HOST, &e_data_full[i + num_input_fields]); + CeedChkBackend(ierr); } // Loop through elements @@ -524,7 +525,8 @@ static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, for (CeedInt i=0; ie_vecs_full[i+impl->num_inputs], - &e_data_full[i + num_input_fields]); CeedChkBackend(ierr); + &e_data_full[i + num_input_fields]); + CeedChkBackend(ierr); // Get output vector ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); CeedChkBackend(ierr); @@ -652,7 +654,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked( &l_vec); CeedChkBackend(ierr); impl->qf_l_vec = l_vec; } - ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); // Build objects if needed CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; diff --git a/backends/cuda-shared/ceed-cuda-shared-basis.c b/backends/cuda-shared/ceed-cuda-shared-basis.c index 955c9e2cfe..5fcc74d438 100644 --- a/backends/cuda-shared/ceed-cuda-shared-basis.c +++ b/backends/cuda-shared/ceed-cuda-shared-basis.c @@ -788,7 +788,7 @@ int CeedBasisApplyTensor_Cuda_shared(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose mode if (tmode == CEED_TRANSPOSE) { diff --git a/backends/cuda/ceed-cuda-basis.c b/backends/cuda/ceed-cuda-basis.c index 0799de380f..5ba0d1987b 100644 --- a/backends/cuda/ceed-cuda-basis.c +++ b/backends/cuda/ceed-cuda-basis.c @@ -349,7 +349,7 @@ int CeedBasisApply_Cuda(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose operation if (tmode == CEED_TRANSPOSE) { @@ -438,7 +438,7 @@ int CeedBasisApplyNonTensor_Cuda(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose operation if (tmode == CEED_TRANSPOSE) { diff --git a/backends/cuda/ceed-cuda-operator.c b/backends/cuda/ceed-cuda-operator.c index b75bc9d853..a89a321347 100644 --- a/backends/cuda/ceed-cuda-operator.c +++ b/backends/cuda/ceed-cuda-operator.c @@ -82,7 +82,7 @@ static int CeedOperatorDestroy_Cuda(CeedOperator op) { // Setup infields or outfields //------------------------------------------------------------------------------ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, - bool inOrOut, CeedVector *evecs, + bool isinput, CeedVector *evecs, CeedVector *qvecs, CeedInt starte, CeedInt numfields, CeedInt Q, CeedInt numelements) { @@ -97,15 +97,15 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, bool strided; bool skiprestrict; - if (inOrOut) { - ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); + if (isinput) { + ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); CeedChkBackend(ierr); - ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); + ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); CeedChkBackend(ierr); } else { - ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); + ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); CeedChkBackend(ierr); - ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); + ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); CeedChkBackend(ierr); } @@ -125,7 +125,7 @@ static int CeedOperatorSetupFields_Cuda(CeedQFunction qf, CeedOperator op, // CEED_STRIDES_BACKEND. // First, check whether the field is input or output: - if (!inOrOut) { + if (isinput) { // Check for passive input: ierr = CeedOperatorFieldGetVector(opfields[i], &fieldvec); CeedChkBackend(ierr); if (fieldvec != CEED_VECTOR_ACTIVE) { @@ -225,13 +225,13 @@ static int CeedOperatorSetup_Cuda(CeedOperator op) { // Set up infield and outfield evecs and qvecs // Infields - ierr = CeedOperatorSetupFields_Cuda(qf, op, 0, + ierr = CeedOperatorSetupFields_Cuda(qf, op, true, impl->evecs, impl->qvecsin, 0, numinputfields, Q, numelements); CeedChkBackend(ierr); // Outfields - ierr = CeedOperatorSetupFields_Cuda(qf, op, 1, + ierr = CeedOperatorSetupFields_Cuda(qf, op, false, impl->evecs, impl->qvecsout, numinputfields, numoutputfields, Q, numelements); CeedChkBackend(ierr); @@ -435,8 +435,8 @@ static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector invec, CeedChkBackend(ierr); if (emode == CEED_EVAL_NONE) { // Set the output Q-Vector to use the E-Vector data directly. - ierr = CeedVectorGetArray(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, - &edata[i + numinputfields]); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, + &edata[i + numinputfields]); CeedChkBackend(ierr); ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_DEVICE, CEED_USE_POINTER, edata[i + numinputfields]); CeedChkBackend(ierr); @@ -1148,10 +1148,11 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, ierr = CeedVectorSetValue(elemdiag, 0.0); CeedChkBackend(ierr); // Assemble element operator diagonals - CeedScalar *elemdiagarray, *assembledqfarray; + CeedScalar *elemdiagarray; + const CeedScalar *assembledqfarray; ierr = CeedVectorGetArray(elemdiag, CEED_MEM_DEVICE, &elemdiagarray); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(assembledqf, CEED_MEM_DEVICE, &assembledqfarray); + ierr = CeedVectorGetArrayRead(assembledqf, CEED_MEM_DEVICE, &assembledqfarray); CeedChkBackend(ierr); CeedInt nelem; ierr = CeedElemRestrictionGetNumElements(diagrstr, &nelem); @@ -1177,7 +1178,7 @@ static inline int CeedOperatorAssembleDiagonalCore_Cuda(CeedOperator op, // Restore arrays ierr = CeedVectorRestoreArray(elemdiag, &elemdiagarray); CeedChkBackend(ierr); - ierr = CeedVectorRestoreArray(assembledqf, &assembledqfarray); + ierr = CeedVectorRestoreArrayRead(assembledqf, &assembledqfarray); CeedChkBackend(ierr); // Assemble local operator diagonal diff --git a/backends/cuda/ceed-cuda-qfunction.c b/backends/cuda/ceed-cuda-qfunction.c index 292a88d22a..fb8cf06834 100644 --- a/backends/cuda/ceed-cuda-qfunction.c +++ b/backends/cuda/ceed-cuda-qfunction.c @@ -48,7 +48,7 @@ static int CeedQFunctionApply_Cuda(CeedQFunction qf, CeedInt Q, CeedChkBackend(ierr); } for (CeedInt i = 0; i < numoutputfields; i++) { - ierr = CeedVectorGetArray(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); + ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); CeedChkBackend(ierr); } diff --git a/backends/cuda/ceed-cuda-qfunctioncontext.c b/backends/cuda/ceed-cuda-qfunctioncontext.c index 61f130ef2f..3aa5ed71c7 100644 --- a/backends/cuda/ceed-cuda-qfunctioncontext.c +++ b/backends/cuda/ceed-cuda-qfunctioncontext.c @@ -41,8 +41,25 @@ static inline int CeedQFunctionContextSyncH2D_Cuda( CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + if (!impl->h_data) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid host data to sync to device"); + // LCOV_EXCL_STOP + + if (impl->d_data_borrowed) { + impl->d_data = impl->d_data_borrowed; + } else if (impl->d_data_owned) { + impl->d_data = impl->d_data_owned; + } else { + ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); + CeedChk_Cu(ceed, ierr); + impl->d_data = impl->d_data_owned; + } + ierr = cudaMemcpy(impl->d_data, impl->h_data, bytes(ctx), cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); + return CEED_ERROR_SUCCESS; } @@ -57,8 +74,112 @@ static inline int CeedQFunctionContextSyncD2H_Cuda( CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + if (!impl->d_data) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid device data to sync to host"); + // LCOV_EXCL_STOP + + if (impl->h_data_borrowed) { + impl->h_data = impl->h_data_borrowed; + } else if (impl->h_data_owned) { + impl->h_data = impl->h_data_owned; + } else { + ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); + CeedChkBackend(ierr); + impl->h_data = impl->h_data_owned; + } + ierr = cudaMemcpy(impl->h_data, impl->d_data, bytes(ctx), cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Sync data of type +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextSync_Cuda( + const CeedQFunctionContext ctx, CeedMemType mtype) { + switch (mtype) { + case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Cuda(ctx); + case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Cuda(ctx); + } + return CEED_ERROR_UNSUPPORTED; +} + +//------------------------------------------------------------------------------ +// Set all pointers as invalid +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextSetAllInvalid_Cuda( + const CeedQFunctionContext ctx) { + int ierr; + CeedQFunctionContext_Cuda *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + impl->h_data = NULL; + impl->d_data = NULL; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if ctx has valid data +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextHasValidData_Cuda( + const CeedQFunctionContext ctx, bool *has_valid_data) { + int ierr; + CeedQFunctionContext_Cuda *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + *has_valid_data = !!impl->h_data || !!impl->d_data; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if ctx has borrowed data +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextHasBorrowedDataOfType_Cuda( + const CeedQFunctionContext ctx, CeedMemType mtype, + bool *has_borrowed_data_of_type) { + int ierr; + CeedQFunctionContext_Cuda *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_borrowed_data_of_type = !!impl->h_data_borrowed; + break; + case CEED_MEM_DEVICE: + *has_borrowed_data_of_type = !!impl->d_data_borrowed; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if data of given type needs sync +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextNeedSync_Cuda( + const CeedQFunctionContext ctx, CeedMemType mtype, bool *need_sync) { + int ierr; + CeedQFunctionContext_Cuda *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + bool has_valid_data = true; + ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); + CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + *need_sync = has_valid_data && !impl->h_data; + break; + case CEED_MEM_DEVICE: + *need_sync = has_valid_data && !impl->d_data; + break; + } + return CEED_ERROR_SUCCESS; } @@ -66,136 +187,124 @@ static inline int CeedQFunctionContextSyncD2H_Cuda( // Set data from host //------------------------------------------------------------------------------ static int CeedQFunctionContextSetDataHost_Cuda(const CeedQFunctionContext ctx, - const CeedCopyMode cmode, - CeedScalar *data) { + const CeedCopyMode cmode, void *data) { int ierr; CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: { - if(!impl->h_data) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } + ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr); + impl->h_data_borrowed = NULL; + impl->h_data = impl->h_data_owned; memcpy(impl->h_data, data, bytes(ctx)); } break; case CEED_OWN_POINTER: - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); - impl->h_data_allocated = data; + impl->h_data_owned = data; + impl->h_data_borrowed = NULL; impl->h_data = data; break; case CEED_USE_POINTER: - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); + impl->h_data_borrowed = data; impl->h_data = data; break; } - impl->memState = CEED_CUDA_HOST_SYNC; + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Set data from device //------------------------------------------------------------------------------ -static int CeedQFunctionContextSetDataDevice_Cuda(const CeedQFunctionContext - ctx, - const CeedCopyMode cmode, CeedScalar *data) { +static int CeedQFunctionContextSetDataDevice_Cuda( + const CeedQFunctionContext ctx, const CeedCopyMode cmode, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); + impl->d_data_owned = NULL; switch (cmode) { case CEED_COPY_VALUES: - if (!impl->d_data) { - ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Cu(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } + ierr = cudaMalloc((void **)&impl->d_data_owned, bytes(ctx)); + CeedChk_Cu(ceed, ierr); + impl->d_data_borrowed = NULL; + impl->d_data = impl->d_data_owned; ierr = cudaMemcpy(impl->d_data, data, bytes(ctx), cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr); break; case CEED_OWN_POINTER: - ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr); - impl->d_data_allocated = data; + impl->d_data_owned = data; + impl->d_data_borrowed = NULL; impl->d_data = data; break; case CEED_USE_POINTER: - ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr); - impl->d_data_allocated = NULL; + impl->d_data_owned = NULL; + impl->d_data_borrowed = data; impl->d_data = data; break; } - impl->memState = CEED_CUDA_DEVICE_SYNC; + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ -// Set the array used by a user context, -// freeing any previously allocated array if applicable +// Set the data used by a user context, +// freeing any previously allocated data if applicable //------------------------------------------------------------------------------ static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx, - const CeedMemType mtype, const CeedCopyMode cmode, CeedScalar *data) { + const CeedMemType mtype, const CeedCopyMode cmode, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: return CeedQFunctionContextSetDataHost_Cuda(ctx, cmode, data); case CEED_MEM_DEVICE: return CeedQFunctionContextSetDataDevice_Cuda(ctx, cmode, data); } - return 1; + + return CEED_ERROR_UNSUPPORTED; } //------------------------------------------------------------------------------ // Take data //------------------------------------------------------------------------------ static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx, - const CeedMemType mtype, CeedScalar *data) { + const CeedMemType mtype, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - if(impl->h_data == NULL && impl->d_data == NULL) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); - // LCOV_EXCL_STOP - // Sync array to requested memtype and update pointer + // Sync data to requested memtype + bool need_sync = false; + ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mtype, &need_sync); + CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedQFunctionContextSync_Cuda(ctx, mtype); CeedChkBackend(ierr); + } + + // Update pointer switch (mtype) { case CEED_MEM_HOST: - if (impl->h_data == NULL) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); - CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } - if (impl->memState == CEED_CUDA_DEVICE_SYNC) { - ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_CUDA_HOST_SYNC; - *(void **)data = impl->h_data; + *(void **)data = impl->h_data_borrowed; + impl->h_data_borrowed = NULL; impl->h_data = NULL; - impl->h_data_allocated = NULL; break; case CEED_MEM_DEVICE: - if (impl->d_data == NULL) { - ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Cu(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } - if (impl->memState == CEED_CUDA_HOST_SYNC) { - ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_CUDA_DEVICE_SYNC; - *(void **)data = impl->d_data; + *(void **)data = impl->d_data_borrowed; + impl->d_data_borrowed = NULL; impl->d_data = NULL; - impl->d_data_allocated = NULL; break; } + return CEED_ERROR_SUCCESS; } @@ -203,52 +312,50 @@ static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx, // Get data //------------------------------------------------------------------------------ static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx, - const CeedMemType mtype, CeedScalar *data) { + const CeedMemType mtype, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - if(impl->h_data == NULL && impl->d_data == NULL) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); - // LCOV_EXCL_STOP - // Sync array to requested memtype and update pointer + // Sync data to requested memtype + bool need_sync = false; + ierr = CeedQFunctionContextNeedSync_Cuda(ctx, mtype, &need_sync); + CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedQFunctionContextSync_Cuda(ctx, mtype); CeedChkBackend(ierr); + } + + // Update pointer switch (mtype) { case CEED_MEM_HOST: - if (impl->h_data == NULL) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); - CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } - if (impl->memState == CEED_CUDA_DEVICE_SYNC) { - ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_CUDA_HOST_SYNC; *(void **)data = impl->h_data; break; case CEED_MEM_DEVICE: - if (impl->d_data == NULL) { - ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Cu(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } - if (impl->memState == CEED_CUDA_HOST_SYNC) { - ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_CUDA_DEVICE_SYNC; *(void **)data = impl->d_data; break; } + + // Mark only pointer for requested memory as valid + ierr = CeedQFunctionContextSetAllInvalid_Cuda(ctx); CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + impl->h_data = *(void **)data; + break; + case CEED_MEM_DEVICE: + impl->d_data = *(void **)data; + break; + } + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Restore data obtained using CeedQFunctionContextGetData() //------------------------------------------------------------------------------ -static int CeedQFunctionContextRestoreData_Cuda(const CeedQFunctionContext - ctx) { +static int CeedQFunctionContextRestoreData_Cuda( + const CeedQFunctionContext ctx) { return CEED_ERROR_SUCCESS; } @@ -262,9 +369,10 @@ static int CeedQFunctionContextDestroy_Cuda(const CeedQFunctionContext ctx) { CeedQFunctionContext_Cuda *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - ierr = cudaFree(impl->d_data_allocated); CeedChk_Cu(ceed, ierr); - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); + ierr = cudaFree(impl->d_data_owned); CeedChk_Cu(ceed, ierr); + ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); ierr = CeedFree(&impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } @@ -277,6 +385,13 @@ int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) { Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", + CeedQFunctionContextHasValidData_Cuda); + CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, + "HasBorrowedDataOfType", + CeedQFunctionContextHasBorrowedDataOfType_Cuda); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", @@ -287,9 +402,10 @@ int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) { CeedQFunctionContextRestoreData_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", CeedQFunctionContextDestroy_Cuda); CeedChkBackend(ierr); + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); - impl->memState = CEED_CUDA_NONE_SYNC; ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ diff --git a/backends/cuda/ceed-cuda-restriction.c b/backends/cuda/ceed-cuda-restriction.c index 7177510c79..a76fd9512a 100644 --- a/backends/cuda/ceed-cuda-restriction.c +++ b/backends/cuda/ceed-cuda-restriction.c @@ -145,7 +145,13 @@ static int CeedElemRestrictionApply_Cuda(CeedElemRestriction r, const CeedScalar *d_u; CeedScalar *d_v; ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + if (tmode == CEED_TRANSPOSE) { + // Sum into for transpose mode, e-vec to l-vec + ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + } else { + // Overwrite for notranspose mode, l-vec to e-vec + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + } // Restrict if (tmode == CEED_NOTRANSPOSE) { diff --git a/backends/cuda/ceed-cuda-vector.c b/backends/cuda/ceed-cuda-vector.c index b98efb0d31..b09207dda3 100644 --- a/backends/cuda/ceed-cuda-vector.c +++ b/backends/cuda/ceed-cuda-vector.c @@ -39,11 +39,28 @@ static inline int CeedVectorSyncH2D_Cuda(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + if (!impl->h_array) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid host data to sync to device"); + // LCOV_EXCL_STOP + + if (impl->d_array_borrowed) { + impl->d_array = impl->d_array_borrowed; + } else if (impl->d_array_owned) { + impl->d_array = impl->d_array_owned; + } else { + ierr = cudaMalloc((void **)&impl->d_array_owned, bytes(vec)); + CeedChk_Cu(ceed, ierr); + impl->d_array = impl->d_array_owned; + } - ierr = cudaMemcpy(data->d_array, data->h_array, bytes(vec), + ierr = cudaMemcpy(impl->d_array, impl->h_array, bytes(vec), cudaMemcpyHostToDevice); CeedChk_Cu(ceed, ierr); + return CEED_ERROR_SUCCESS; } @@ -54,11 +71,133 @@ static inline int CeedVectorSyncD2H_Cuda(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + if (!impl->d_array) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid device data to sync to host"); + // LCOV_EXCL_STOP + + if (impl->h_array_borrowed) { + impl->h_array = impl->h_array_borrowed; + } else if (impl->h_array_owned) { + impl->h_array = impl->h_array_owned; + } else { + CeedInt length; + ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); + ierr = CeedCalloc(length, &impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array = impl->h_array_owned; + } - ierr = cudaMemcpy(data->h_array, data->d_array, bytes(vec), + ierr = cudaMemcpy(impl->h_array, impl->d_array, bytes(vec), cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Sync arrays +//------------------------------------------------------------------------------ +static inline int CeedVectorSync_Cuda(const CeedVector vec, CeedMemType mtype) { + switch (mtype) { + case CEED_MEM_HOST: return CeedVectorSyncD2H_Cuda(vec); + case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Cuda(vec); + } + return CEED_ERROR_UNSUPPORTED; +} + +//------------------------------------------------------------------------------ +// Set all pointers as invalid +//------------------------------------------------------------------------------ +static inline int CeedVectorSetAllInvalid_Cuda(const CeedVector vec) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + impl->h_array = NULL; + impl->d_array = NULL; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if CeedVector has any valid pointer +//------------------------------------------------------------------------------ +static inline int CeedVectorHasValidArray_Cuda(const CeedVector vec, + bool *has_valid_array) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + *has_valid_array = !!impl->h_array || !!impl->d_array; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if has array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorHasArrayOfType_Cuda(const CeedVector vec, + CeedMemType mtype, bool *has_array_of_type) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_array_of_type = !!impl->h_array_borrowed || !!impl->h_array_owned; + break; + case CEED_MEM_DEVICE: + *has_array_of_type = !!impl->d_array_borrowed || !!impl->d_array_owned; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if has borrowed array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorHasBorrowedArrayOfType_Cuda(const CeedVector vec, + CeedMemType mtype, bool *has_borrowed_array_of_type) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_borrowed_array_of_type = !!impl->h_array_borrowed; + break; + case CEED_MEM_DEVICE: + *has_borrowed_array_of_type = !!impl->d_array_borrowed; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if is any array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorNeedSync_Cuda(const CeedVector vec, + CeedMemType mtype, bool *need_sync) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool has_valid_array = false; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + *need_sync = has_valid_array && !impl->h_array; + break; + case CEED_MEM_DEVICE: + *need_sync = has_valid_array && !impl->d_array; + break; + } + return CEED_ERROR_SUCCESS; } @@ -66,34 +205,36 @@ static inline int CeedVectorSyncD2H_Cuda(const CeedVector vec) { // Set array from host //------------------------------------------------------------------------------ static int CeedVectorSetArrayHost_Cuda(const CeedVector vec, - const CeedCopyMode cmode, - CeedScalar *array) { + const CeedCopyMode cmode, CeedScalar *array) { int ierr; - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: { CeedInt length; - if(!data->h_array) { + if (!impl->h_array_owned) { ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; + ierr = CeedMalloc(length, &impl->h_array_owned); CeedChkBackend(ierr); } + impl->h_array_borrowed = NULL; + impl->h_array = impl->h_array_owned; if (array) - memcpy(data->h_array, array, bytes(vec)); + memcpy(impl->h_array, array, bytes(vec)); } break; case CEED_OWN_POINTER: - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - data->h_array_allocated = array; - data->h_array = array; + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array_owned = array; + impl->h_array_borrowed = NULL; + impl->h_array = array; break; case CEED_USE_POINTER: - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - data->h_array = array; + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array_borrowed = array; + impl->h_array = array; break; } - data->memState = CEED_CUDA_HOST_SYNC; + return CEED_ERROR_SUCCESS; } @@ -105,33 +246,35 @@ static int CeedVectorSetArrayDevice_Cuda(const CeedVector vec, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: - if (!data->d_array) { - ierr = cudaMalloc((void **)&data->d_array_allocated, bytes(vec)); + if (!impl->d_array_owned) { + ierr = cudaMalloc((void **)&impl->d_array_owned, bytes(vec)); CeedChk_Cu(ceed, ierr); - data->d_array = data->d_array_allocated; + impl->d_array = impl->d_array_owned; } if (array) { - ierr = cudaMemcpy(data->d_array, array, bytes(vec), + ierr = cudaMemcpy(impl->d_array, array, bytes(vec), cudaMemcpyDeviceToDevice); CeedChk_Cu(ceed, ierr); } break; case CEED_OWN_POINTER: - ierr = cudaFree(data->d_array_allocated); CeedChk_Cu(ceed, ierr); - data->d_array_allocated = array; - data->d_array = array; + ierr = cudaFree(impl->d_array_owned); CeedChk_Cu(ceed, ierr); + impl->d_array_owned = array; + impl->d_array_borrowed = NULL; + impl->d_array = array; break; case CEED_USE_POINTER: - ierr = cudaFree(data->d_array_allocated); CeedChk_Cu(ceed, ierr); - data->d_array_allocated = NULL; - data->d_array = array; + ierr = cudaFree(impl->d_array_owned); CeedChk_Cu(ceed, ierr); + impl->d_array_owned = NULL; + impl->d_array_borrowed = array; + impl->d_array = array; break; } - data->memState = CEED_CUDA_DEVICE_SYNC; + return CEED_ERROR_SUCCESS; } @@ -141,54 +284,22 @@ static int CeedVectorSetArrayDevice_Cuda(const CeedVector vec, //------------------------------------------------------------------------------ static int CeedVectorSetArray_Cuda(const CeedVector vec, const CeedMemType mtype, - const CeedCopyMode cmode, - CeedScalar *array) { + const CeedCopyMode cmode, CeedScalar *array) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + ierr = CeedVectorSetAllInvalid_Cuda(vec); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: return CeedVectorSetArrayHost_Cuda(vec, cmode, array); case CEED_MEM_DEVICE: return CeedVectorSetArrayDevice_Cuda(vec, cmode, array); } - return 1; -} - -//------------------------------------------------------------------------------ -// Vector Take Array -//------------------------------------------------------------------------------ -static int CeedVectorTakeArray_Cuda(CeedVector vec, CeedMemType mtype, - CeedScalar **array) { - int ierr; - CeedVector_Cuda *impl; - ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - - switch(mtype) { - case CEED_MEM_HOST: - if (impl->memState == CEED_CUDA_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Cuda(vec); CeedChkBackend(ierr); - } - (*array) = impl->h_array; - impl->h_array = NULL; - impl->h_array_allocated = NULL; - impl->memState = CEED_CUDA_HOST_SYNC; - break; - case CEED_MEM_DEVICE: - if (impl->memState == CEED_CUDA_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Cuda(vec); CeedChkBackend(ierr); - } - (*array) = impl->d_array; - impl->d_array = NULL; - impl->d_array_allocated = NULL; - impl->memState = CEED_CUDA_DEVICE_SYNC; - break; - } - return CEED_ERROR_SUCCESS; + return CEED_ERROR_UNSUPPORTED; } //------------------------------------------------------------------------------ @@ -214,135 +325,175 @@ static int CeedVectorSetValue_Cuda(CeedVector vec, CeedScalar val) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(data->memState) { - case CEED_CUDA_HOST_SYNC: - ierr = CeedHostSetValue_Cuda(data->h_array, length, val); CeedChkBackend(ierr); - break; - case CEED_CUDA_NONE_SYNC: - /* - Handles the case where SetValue is used without SetArray. - Default allocation then happens on the GPU. - */ - if (data->d_array == NULL) { - ierr = cudaMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Cu(ceed, ierr); - data->d_array = data->d_array_allocated; + if (!impl->d_array && !impl->h_array) { + if (impl->d_array_borrowed) { + impl->d_array = impl->d_array_borrowed; + } else if (impl->h_array_borrowed) { + impl->h_array = impl->h_array_borrowed; + } else if (impl->d_array_owned) { + impl->d_array = impl->d_array_owned; + } else if (impl->h_array_owned) { + impl->h_array = impl->h_array_owned; + } else { + ierr = CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL); + CeedChkBackend(ierr); } - data->memState = CEED_CUDA_DEVICE_SYNC; - ierr = CeedDeviceSetValue_Cuda(data->d_array, length, val); - CeedChkBackend(ierr); - break; - case CEED_CUDA_DEVICE_SYNC: - ierr = CeedDeviceSetValue_Cuda(data->d_array, length, val); + } + if (impl->d_array) { + ierr = CeedDeviceSetValue_Cuda(impl->d_array, length, val); CeedChkBackend(ierr); + impl->h_array = NULL; + } + if (impl->h_array) { + ierr = CeedHostSetValue_Cuda(impl->h_array, length, val); CeedChkBackend(ierr); + impl->d_array = NULL; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Vector Take Array +//------------------------------------------------------------------------------ +static int CeedVectorTakeArray_Cuda(CeedVector vec, CeedMemType mtype, + CeedScalar **array) { + int ierr; + Ceed ceed; + ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + // Sync array to requested memtype + bool need_sync = false; + ierr = CeedVectorNeedSync_Cuda(vec, mtype, &need_sync); CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedVectorSync_Cuda(vec, mtype); CeedChkBackend(ierr); + } + + // Update pointer + switch (mtype) { + case CEED_MEM_HOST: + (*array) = impl->h_array_borrowed; + impl->h_array_borrowed = NULL; + impl->h_array = NULL; break; - case CEED_CUDA_BOTH_SYNC: - ierr = CeedHostSetValue_Cuda(data->h_array, length, val); CeedChkBackend(ierr); - ierr = CeedDeviceSetValue_Cuda(data->d_array, length, val); - CeedChkBackend(ierr); + case CEED_MEM_DEVICE: + (*array) = impl->d_array_borrowed; + impl->d_array_borrowed = NULL; + impl->d_array = NULL; break; } + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ -// Get read-only access to a vector via the specified mtype memory type -// on which to access the array. If the backend uses a different memory type, -// this will perform a copy (possibly cached). +// Core logic for array syncronization for GetArray. +// If a different memory type is most up to date, this will perform a copy //------------------------------------------------------------------------------ -static int CeedVectorGetArrayRead_Cuda(const CeedVector vec, - const CeedMemType mtype, - const CeedScalar **array) { +static int CeedVectorGetArrayCore_Cuda(const CeedVector vec, + const CeedMemType mtype, CeedScalar **array) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool need_sync = false, has_array_of_type = true; + ierr = CeedVectorNeedSync_Cuda(vec, mtype, &need_sync); CeedChkBackend(ierr); + ierr = CeedVectorHasArrayOfType_Cuda(vec, mtype, &has_array_of_type); + CeedChkBackend(ierr); + if (need_sync) { + // Sync array to requested memtype + ierr = CeedVectorSync_Cuda(vec, mtype); CeedChkBackend(ierr); + } - // Sync array to requested memtype and update pointer + // Update pointer switch (mtype) { case CEED_MEM_HOST: - if(data->h_array==NULL) { - CeedInt length; - ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); - CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; - } - if(data->memState==CEED_CUDA_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Cuda(vec); - CeedChkBackend(ierr); - data->memState = CEED_CUDA_BOTH_SYNC; - } - *array = data->h_array; + *array = impl->h_array; break; case CEED_MEM_DEVICE: - if (data->d_array==NULL) { - ierr = cudaMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Cu(ceed, ierr); - data->d_array = data->d_array_allocated; - } - if (data->memState==CEED_CUDA_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Cuda(vec); - CeedChkBackend(ierr); - data->memState = CEED_CUDA_BOTH_SYNC; - } - *array = data->d_array; + *array = impl->d_array; break; } + return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// Get read-only access to a vector via the specified mtype +//------------------------------------------------------------------------------ +static int CeedVectorGetArrayRead_Cuda(const CeedVector vec, + const CeedMemType mtype, const CeedScalar **array) { + return CeedVectorGetArrayCore_Cuda(vec, mtype, (CeedScalar **)array); +} //------------------------------------------------------------------------------ -// Get array +// Get read/write access to a vector via the specified memtype //------------------------------------------------------------------------------ static int CeedVectorGetArray_Cuda(const CeedVector vec, - const CeedMemType mtype, - CeedScalar **array) { + const CeedMemType mtype, CeedScalar **array) { int ierr; - Ceed ceed; - ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - // Sync array to requested memtype and update pointer + ierr = CeedVectorGetArrayCore_Cuda(vec, mtype, array); CeedChkBackend(ierr); + + ierr = CeedVectorSetAllInvalid_Cuda(vec); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: - if(data->h_array==NULL) { - CeedInt length; - ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); - CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; - } - if(data->memState==CEED_CUDA_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Cuda(vec); CeedChkBackend(ierr); - } - data->memState = CEED_CUDA_HOST_SYNC; - *array = data->h_array; + impl->h_array = *array; break; case CEED_MEM_DEVICE: - if (data->d_array==NULL) { - ierr = cudaMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Cu(ceed, ierr); - data->d_array = data->d_array_allocated; - } - if (data->memState==CEED_CUDA_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Cuda(vec); CeedChkBackend(ierr); - } - data->memState = CEED_CUDA_DEVICE_SYNC; - *array = data->d_array; + impl->d_array = *array; break; } + return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// Get write access to a vector via the specified memtype +//------------------------------------------------------------------------------ +static int CeedVectorGetArrayWrite_Cuda(const CeedVector vec, + const CeedMemType mtype, CeedScalar **array) { + int ierr; + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool has_array_of_type = true; + ierr = CeedVectorHasArrayOfType_Cuda(vec, mtype, &has_array_of_type); + CeedChkBackend(ierr); + if (!has_array_of_type) { + // Allocate if array is not yet allocated + ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); + CeedChkBackend(ierr); + } else { + // Select dirty array + switch (mtype) { + case CEED_MEM_HOST: + if (impl->h_array_borrowed) + impl->h_array = impl->h_array_borrowed; + else + impl->h_array = impl->h_array_owned; + break; + case CEED_MEM_DEVICE: + if (impl->d_array_borrowed) + impl->d_array = impl->d_array_borrowed; + else + impl->d_array = impl->d_array_owned; + } + } + + return CeedVectorGetArray_Cuda(vec, mtype, array); +} + //------------------------------------------------------------------------------ // Restore an array obtained using CeedVectorGetArrayRead() //------------------------------------------------------------------------------ @@ -365,8 +516,8 @@ static int CeedVectorNorm_Cuda(CeedVector vec, CeedNormType type, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); cublasHandle_t handle; @@ -404,7 +555,7 @@ static int CeedVectorNorm_Cuda(CeedVector vec, CeedNormType type, } CeedChk_Cublas(ceed, ierr); CeedScalar normNoAbs; - ierr = cudaMemcpy(&normNoAbs, data->d_array+indx-1, sizeof(CeedScalar), + ierr = cudaMemcpy(&normNoAbs, impl->d_array+indx-1, sizeof(CeedScalar), cudaMemcpyDeviceToHost); CeedChk_Cu(ceed, ierr); *norm = fabs(normNoAbs); break; @@ -437,28 +588,19 @@ static int CeedVectorReciprocal_Cuda(CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(data->memState) { - case CEED_CUDA_HOST_SYNC: - ierr = CeedHostReciprocal_Cuda(data->h_array, length); CeedChkBackend(ierr); - break; - case CEED_CUDA_DEVICE_SYNC: - ierr = CeedDeviceReciprocal_Cuda(data->d_array, length); CeedChkBackend(ierr); - break; - case CEED_CUDA_BOTH_SYNC: - ierr = CeedDeviceReciprocal_Cuda(data->d_array, length); CeedChkBackend(ierr); - data->memState = CEED_CUDA_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_CUDA_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP + if (impl->d_array) { + ierr = CeedDeviceReciprocal_Cuda(impl->d_array, length); CeedChkBackend(ierr); } + if (impl->h_array) { + ierr = CeedHostReciprocal_Cuda(impl->h_array, length); CeedChkBackend(ierr); + } + return CEED_ERROR_SUCCESS; } @@ -485,31 +627,20 @@ static int CeedVectorScale_Cuda(CeedVector x, CeedScalar alpha) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(x, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *x_data; - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); + CeedVector_Cuda *x_impl; + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(x, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(x_data->memState) { - case CEED_CUDA_HOST_SYNC: - ierr = CeedHostScale_Cuda(x_data->h_array, alpha, length); - CeedChkBackend(ierr); - break; - case CEED_CUDA_DEVICE_SYNC: - ierr = CeedDeviceScale_Cuda(x_data->d_array, alpha, length); - CeedChkBackend(ierr); - break; - case CEED_CUDA_BOTH_SYNC: - ierr = CeedDeviceScale_Cuda(x_data->d_array, alpha, length); + if (x_impl->d_array) { + ierr = CeedDeviceScale_Cuda(x_impl->d_array, alpha, length); CeedChkBackend(ierr); - x_data->memState = CEED_CUDA_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_CUDA_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP } + if (x_impl->h_array) { + ierr = CeedHostScale_Cuda(x_impl->h_array, alpha, length); CeedChkBackend(ierr); + } + return CEED_ERROR_SUCCESS; } @@ -536,35 +667,24 @@ static int CeedVectorAXPY_Cuda(CeedVector y, CeedScalar alpha, CeedVector x) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(y, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *y_data, *x_data; - ierr = CeedVectorGetData(y, &y_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); + CeedVector_Cuda *y_impl, *x_impl; + ierr = CeedVectorGetData(y, &y_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(y, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(y_data->memState) { - case CEED_CUDA_HOST_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedHostAXPY_Cuda(y_data->h_array, alpha, x_data->h_array, length); - CeedChkBackend(ierr); - break; - case CEED_CUDA_DEVICE_SYNC: + if (y_impl->d_array) { ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDeviceAXPY_Cuda(y_data->d_array, alpha, x_data->d_array, length); + ierr = CeedDeviceAXPY_Cuda(y_impl->d_array, alpha, x_impl->d_array, length); CeedChkBackend(ierr); - break; - case CEED_CUDA_BOTH_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDeviceAXPY_Cuda(y_data->d_array, alpha, x_data->d_array, length); + } + if (y_impl->h_array) { + ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedHostAXPY_Cuda(y_impl->h_array, alpha, x_impl->h_array, length); CeedChkBackend(ierr); - y_data->memState = CEED_CUDA_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_CUDA_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP } + return CEED_ERROR_SUCCESS; } @@ -592,46 +712,32 @@ static int CeedVectorPointwiseMult_Cuda(CeedVector w, CeedVector x, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(w, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *w_data, *x_data, *y_data; - ierr = CeedVectorGetData(w, &w_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(y, &y_data); CeedChkBackend(ierr); + CeedVector_Cuda *w_impl, *x_impl, *y_impl; + ierr = CeedVectorGetData(w, &w_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(y, &y_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(w, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(w_data->memState) { - case CEED_CUDA_HOST_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedHostPointwiseMult_Cuda(w_data->h_array, x_data->h_array, - y_data->h_array, length); - CeedChkBackend(ierr); - break; - case CEED_CUDA_DEVICE_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Cuda(w_data->d_array, x_data->d_array, - y_data->d_array, length); - CeedChkBackend(ierr); - break; - case CEED_CUDA_BOTH_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Cuda(w_data->d_array, x_data->d_array, - y_data->d_array, length); - CeedChkBackend(ierr); - w_data->memState = CEED_CUDA_DEVICE_SYNC; - break; - case CEED_CUDA_NONE_SYNC: + if (!w_impl->d_array && !w_impl->h_array) { ierr = CeedVectorSetValue(w, 0.0); CeedChkBackend(ierr); + } + if (w_impl->d_array) { ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Cuda(w_data->d_array, x_data->d_array, - y_data->d_array, length); + ierr = CeedDevicePointwiseMult_Cuda(w_impl->d_array, x_impl->d_array, + y_impl->d_array, length); + CeedChkBackend(ierr); + } + if (w_impl->h_array) { + ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedVectorSyncArray(y, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedHostPointwiseMult_Cuda(w_impl->h_array, x_impl->h_array, + y_impl->h_array, length); CeedChkBackend(ierr); - break; } + return CEED_ERROR_SUCCESS; } @@ -642,12 +748,13 @@ static int CeedVectorDestroy_Cuda(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Cuda *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Cuda *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + ierr = cudaFree(impl->d_array_owned); CeedChk_Cu(ceed, ierr); + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + ierr = CeedFree(&impl); CeedChkBackend(ierr); - ierr = cudaFree(data->d_array_allocated); CeedChk_Cu(ceed, ierr); - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - ierr = CeedFree(&data); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -655,21 +762,29 @@ static int CeedVectorDestroy_Cuda(const CeedVector vec) { // Create a vector of the specified length (does not allocate memory) //------------------------------------------------------------------------------ int CeedVectorCreate_Cuda(CeedInt n, CeedVector vec) { - CeedVector_Cuda *data; + CeedVector_Cuda *impl; int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", + CeedVectorHasValidArray_Cuda); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", + CeedVectorHasBorrowedArrayOfType_Cuda); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetValue", - (int (*)())(CeedVectorSetValue_Cuda)); CeedChkBackend(ierr); + (int (*)())(CeedVectorSetValue_Cuda)); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Cuda); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", + CeedVectorGetArrayWrite_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArray", CeedVectorRestoreArray_Cuda); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArrayRead", @@ -687,8 +802,8 @@ int CeedVectorCreate_Cuda(CeedInt n, CeedVector vec) { ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Cuda); CeedChkBackend(ierr); - ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); - ierr = CeedVectorSetData(vec, data); CeedChkBackend(ierr); - data->memState = CEED_CUDA_NONE_SYNC; + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); + ierr = CeedVectorSetData(vec, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } diff --git a/backends/cuda/ceed-cuda.h b/backends/cuda/ceed-cuda.h index 2b17713ac1..417450c5eb 100644 --- a/backends/cuda/ceed-cuda.h +++ b/backends/cuda/ceed-cuda.h @@ -70,19 +70,13 @@ static const char *cublasGetErrorName(cublasStatus_t error) { } // LCOV_EXCL_STOP -typedef enum { - CEED_CUDA_HOST_SYNC, - CEED_CUDA_DEVICE_SYNC, - CEED_CUDA_BOTH_SYNC, - CEED_CUDA_NONE_SYNC -} CeedCudaSyncState; - typedef struct { CeedScalar *h_array; - CeedScalar *h_array_allocated; + CeedScalar *h_array_borrowed; + CeedScalar *h_array_owned; CeedScalar *d_array; - CeedScalar *d_array_allocated; - CeedCudaSyncState memState; + CeedScalar *d_array_borrowed; + CeedScalar *d_array_owned; } CeedVector_Cuda; typedef struct { @@ -118,11 +112,12 @@ typedef struct { } CeedQFunction_Cuda; typedef struct { - CeedScalar *h_data; - CeedScalar *h_data_allocated; - CeedScalar *d_data; - CeedScalar *d_data_allocated; - CeedCudaSyncState memState; + void *h_data; + void *h_data_borrowed; + void *h_data_owned; + void *d_data; + void *d_data_borrowed; + void *d_data_owned; } CeedQFunctionContext_Cuda; typedef struct { diff --git a/backends/hip-shared/ceed-hip-shared-basis.c b/backends/hip-shared/ceed-hip-shared-basis.c index a9798d6566..f6a18c5d64 100644 --- a/backends/hip-shared/ceed-hip-shared-basis.c +++ b/backends/hip-shared/ceed-hip-shared-basis.c @@ -874,7 +874,7 @@ int CeedBasisApplyTensor_Hip_shared(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose mode if (tmode == CEED_TRANSPOSE) { diff --git a/backends/hip/ceed-hip-basis.c b/backends/hip/ceed-hip-basis.c index 87b0b024df..98a75c7125 100644 --- a/backends/hip/ceed-hip-basis.c +++ b/backends/hip/ceed-hip-basis.c @@ -359,7 +359,7 @@ int CeedBasisApply_Hip(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose operation if (tmode == CEED_TRANSPOSE) { @@ -451,7 +451,7 @@ int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt nelem, if (emode != CEED_EVAL_WEIGHT) { ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); } - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); // Clear v for transpose operation if (tmode == CEED_TRANSPOSE) { diff --git a/backends/hip/ceed-hip-operator.c b/backends/hip/ceed-hip-operator.c index 68e4c66af8..ca77d8bac7 100644 --- a/backends/hip/ceed-hip-operator.c +++ b/backends/hip/ceed-hip-operator.c @@ -82,7 +82,7 @@ static int CeedOperatorDestroy_Hip(CeedOperator op) { // Setup infields or outfields //------------------------------------------------------------------------------ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, - bool inOrOut, CeedVector *evecs, + bool isinput, CeedVector *evecs, CeedVector *qvecs, CeedInt starte, CeedInt numfields, CeedInt Q, CeedInt numelements) { @@ -97,15 +97,15 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, bool strided; bool skiprestrict; - if (inOrOut) { - ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); + if (isinput) { + ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); CeedChkBackend(ierr); - ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); + ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); CeedChkBackend(ierr); } else { - ierr = CeedOperatorGetFields(op, NULL, &opfields, NULL, NULL); + ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &opfields); CeedChkBackend(ierr); - ierr = CeedQFunctionGetFields(qf, NULL, &qffields, NULL, NULL); + ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qffields); CeedChkBackend(ierr); } @@ -125,7 +125,7 @@ static int CeedOperatorSetupFields_Hip(CeedQFunction qf, CeedOperator op, // CEED_STRIDES_BACKEND. // First, check whether the field is input or output: - if (!inOrOut) { + if (isinput) { // Check for passive input: ierr = CeedOperatorFieldGetVector(opfields[i], &fieldvec); CeedChkBackend(ierr); if (fieldvec != CEED_VECTOR_ACTIVE) { @@ -224,13 +224,13 @@ static int CeedOperatorSetup_Hip(CeedOperator op) { // Set up infield and outfield evecs and qvecs // Infields - ierr = CeedOperatorSetupFields_Hip(qf, op, 0, + ierr = CeedOperatorSetupFields_Hip(qf, op, true, impl->evecs, impl->qvecsin, 0, numinputfields, Q, numelements); CeedChkBackend(ierr); // Outfields - ierr = CeedOperatorSetupFields_Hip(qf, op, 1, + ierr = CeedOperatorSetupFields_Hip(qf, op, false, impl->evecs, impl->qvecsout, numinputfields, numoutputfields, Q, numelements); CeedChkBackend(ierr); @@ -433,8 +433,8 @@ static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector invec, CeedChkBackend(ierr); if (emode == CEED_EVAL_NONE) { // Set the output Q-Vector to use the E-Vector data directly. - ierr = CeedVectorGetArray(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, - &edata[i + numinputfields]); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(impl->evecs[i + impl->numein], CEED_MEM_DEVICE, + &edata[i + numinputfields]); CeedChkBackend(ierr); ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_DEVICE, CEED_USE_POINTER, edata[i + numinputfields]); CeedChkBackend(ierr); @@ -1146,10 +1146,11 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, ierr = CeedVectorSetValue(elemdiag, 0.0); CeedChkBackend(ierr); // Assemble element operator diagonals - CeedScalar *elemdiagarray, *assembledqfarray; + CeedScalar *elemdiagarray; + const CeedScalar *assembledqfarray; ierr = CeedVectorGetArray(elemdiag, CEED_MEM_DEVICE, &elemdiagarray); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(assembledqf, CEED_MEM_DEVICE, &assembledqfarray); + ierr = CeedVectorGetArrayRead(assembledqf, CEED_MEM_DEVICE, &assembledqfarray); CeedChkBackend(ierr); CeedInt nelem; ierr = CeedElemRestrictionGetNumElements(diagrstr, &nelem); @@ -1175,7 +1176,7 @@ static inline int CeedOperatorAssembleDiagonalCore_Hip(CeedOperator op, // Restore arrays ierr = CeedVectorRestoreArray(elemdiag, &elemdiagarray); CeedChkBackend(ierr); - ierr = CeedVectorRestoreArray(assembledqf, &assembledqfarray); + ierr = CeedVectorRestoreArrayRead(assembledqf, &assembledqfarray); CeedChkBackend(ierr); // Assemble local operator diagonal diff --git a/backends/hip/ceed-hip-qfunction.c b/backends/hip/ceed-hip-qfunction.c index 5d227741f4..7ccab054e6 100644 --- a/backends/hip/ceed-hip-qfunction.c +++ b/backends/hip/ceed-hip-qfunction.c @@ -50,7 +50,7 @@ static int CeedQFunctionApply_Hip(CeedQFunction qf, CeedInt Q, CeedChkBackend(ierr); } for (CeedInt i = 0; i < numoutputfields; i++) { - ierr = CeedVectorGetArray(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); + ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_DEVICE, &data->fields.outputs[i]); CeedChkBackend(ierr); } diff --git a/backends/hip/ceed-hip-qfunctioncontext.c b/backends/hip/ceed-hip-qfunctioncontext.c index 3dbc133203..4c5d450776 100644 --- a/backends/hip/ceed-hip-qfunctioncontext.c +++ b/backends/hip/ceed-hip-qfunctioncontext.c @@ -41,8 +41,25 @@ static inline int CeedQFunctionContextSyncH2D_Hip( CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + if (!impl->h_data) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid host data to sync to device"); + // LCOV_EXCL_STOP + + if (impl->d_data_borrowed) { + impl->d_data = impl->d_data_borrowed; + } else if (impl->d_data_owned) { + impl->d_data = impl->d_data_owned; + } else { + ierr = hipMalloc((void **)&impl->d_data_owned, bytes(ctx)); + CeedChk_Hip(ceed, ierr); + impl->d_data = impl->d_data_owned; + } + ierr = hipMemcpy(impl->d_data, impl->h_data, bytes(ctx), hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); + return CEED_ERROR_SUCCESS; } @@ -57,8 +74,112 @@ static inline int CeedQFunctionContextSyncD2H_Hip( CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + if (!impl->d_data) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid device data to sync to host"); + // LCOV_EXCL_STOP + + if (impl->h_data_borrowed) { + impl->h_data = impl->h_data_borrowed; + } else if (impl->h_data_owned) { + impl->h_data = impl->h_data_owned; + } else { + ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); + CeedChkBackend(ierr); + impl->h_data = impl->h_data_owned; + } + ierr = hipMemcpy(impl->h_data, impl->d_data, bytes(ctx), hipMemcpyDeviceToHost); CeedChk_Hip(ceed, ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Sync data of type +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextSync_Hip(const CeedQFunctionContext ctx, + CeedMemType mtype) { + switch (mtype) { + case CEED_MEM_HOST: return CeedQFunctionContextSyncD2H_Hip(ctx); + case CEED_MEM_DEVICE: return CeedQFunctionContextSyncH2D_Hip(ctx); + } + return CEED_ERROR_UNSUPPORTED; +} + +//------------------------------------------------------------------------------ +// Set all pointers as invalid +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextSetAllInvalid_Hip( + const CeedQFunctionContext ctx) { + int ierr; + CeedQFunctionContext_Hip *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + impl->h_data = NULL; + impl->d_data = NULL; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check for valid data +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextHasValidData_Hip( + const CeedQFunctionContext ctx, bool *has_valid_data) { + int ierr; + CeedQFunctionContext_Hip *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + *has_valid_data = !!impl->h_data || !!impl->d_data; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if ctx has borrowed data +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextHasBorrowedDataOfType_Hip( + const CeedQFunctionContext ctx, CeedMemType mtype, + bool *has_borrowed_data_of_type) { + int ierr; + CeedQFunctionContext_Hip *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_borrowed_data_of_type = !!impl->h_data_borrowed; + break; + case CEED_MEM_DEVICE: + *has_borrowed_data_of_type = !!impl->d_data_borrowed; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if data of given type needs sync +//------------------------------------------------------------------------------ +static inline int CeedQFunctionContextNeedSync_Hip( + const CeedQFunctionContext ctx, CeedMemType mtype, bool *need_sync) { + int ierr; + CeedQFunctionContext_Hip *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + + bool has_valid_data = true; + ierr = CeedQFunctionContextHasValidData_Hip(ctx, &has_valid_data); + CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + *need_sync = has_valid_data && !impl->h_data; + break; + case CEED_MEM_DEVICE: + *need_sync = has_valid_data && !impl->d_data; + break; + } + return CEED_ERROR_SUCCESS; } @@ -66,30 +187,30 @@ static inline int CeedQFunctionContextSyncD2H_Hip( // Set data from host //------------------------------------------------------------------------------ static int CeedQFunctionContextSetDataHost_Hip(const CeedQFunctionContext ctx, - const CeedCopyMode cmode, CeedScalar *data) { + const CeedCopyMode cmode, void *data) { int ierr; CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: { - if(!impl->h_data) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } + ierr = CeedMalloc(bytes(ctx), &impl->h_data_owned); CeedChkBackend(ierr); + impl->h_data_borrowed = NULL; + impl->h_data = impl->h_data_owned; memcpy(impl->h_data, data, bytes(ctx)); } break; case CEED_OWN_POINTER: - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); - impl->h_data_allocated = data; + impl->h_data_owned = data; + impl->h_data_borrowed = NULL; impl->h_data = data; break; case CEED_USE_POINTER: - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); + impl->h_data_borrowed = data; impl->h_data = data; break; } - impl->memState = CEED_HIP_HOST_SYNC; + return CEED_ERROR_SUCCESS; } @@ -97,103 +218,93 @@ static int CeedQFunctionContextSetDataHost_Hip(const CeedQFunctionContext ctx, // Set data from device //------------------------------------------------------------------------------ static int CeedQFunctionContextSetDataDevice_Hip(const CeedQFunctionContext ctx, - const CeedCopyMode cmode, CeedScalar *data) { + const CeedCopyMode cmode, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); + ierr = hipFree(impl->d_data_owned); CeedChk_Hip(ceed, ierr); + impl->d_data_owned = NULL; switch (cmode) { case CEED_COPY_VALUES: - if (!impl->d_data) { - ierr = hipMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Hip(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } + ierr = hipMalloc((void **)&impl->d_data_owned, bytes(ctx)); + CeedChk_Hip(ceed, ierr); + impl->d_data_borrowed = NULL; + impl->d_data = impl->d_data_owned; ierr = hipMemcpy(impl->d_data, data, bytes(ctx), hipMemcpyDeviceToDevice); CeedChk_Hip(ceed, ierr); break; case CEED_OWN_POINTER: - ierr = hipFree(impl->d_data_allocated); CeedChk_Hip(ceed, ierr); - impl->d_data_allocated = data; + impl->d_data_owned = data; + impl->d_data_borrowed = NULL; impl->d_data = data; break; case CEED_USE_POINTER: - ierr = hipFree(impl->d_data_allocated); CeedChk_Hip(ceed, ierr); - impl->d_data_allocated = NULL; + impl->d_data_owned = NULL; + impl->d_data_borrowed = data; impl->d_data = data; break; } - impl->memState = CEED_HIP_DEVICE_SYNC; + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ -// Set the array used by a user context, -// freeing any previously allocated array if applicable +// Set the data used by a user context, +// freeing any previously allocated data if applicable //------------------------------------------------------------------------------ static int CeedQFunctionContextSetData_Hip(const CeedQFunctionContext ctx, - const CeedMemType mtype, const CeedCopyMode cmode, CeedScalar *data) { + const CeedMemType mtype, const CeedCopyMode cmode, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + ierr = CeedQFunctionContextSetAllInvalid_Hip(ctx); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: return CeedQFunctionContextSetDataHost_Hip(ctx, cmode, data); case CEED_MEM_DEVICE: return CeedQFunctionContextSetDataDevice_Hip(ctx, cmode, data); } - return 1; + + return CEED_ERROR_UNSUPPORTED; } //------------------------------------------------------------------------------ // Take data //------------------------------------------------------------------------------ static int CeedQFunctionContextTakeData_Hip(const CeedQFunctionContext ctx, - const CeedMemType mtype, CeedScalar *data) { + const CeedMemType mtype, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - if(impl->h_data == NULL && impl->d_data == NULL) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); - // LCOV_EXCL_STOP - // Sync array to requested memtype and update pointer + // Sync data to requested memtype + bool need_sync = false; + ierr = CeedQFunctionContextNeedSync_Hip(ctx, mtype, &need_sync); + CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedQFunctionContextSync_Hip(ctx, mtype); CeedChkBackend(ierr); + } + + // Update pointer switch (mtype) { case CEED_MEM_HOST: - if (impl->h_data == NULL) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); - CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } - if (impl->memState == CEED_HIP_DEVICE_SYNC) { - ierr = CeedQFunctionContextSyncD2H_Hip(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_HIP_HOST_SYNC; - *(void **)data = impl->h_data; + *(void **)data = impl->h_data_borrowed; + impl->h_data_borrowed = NULL; impl->h_data = NULL; - impl->h_data_allocated = NULL; break; case CEED_MEM_DEVICE: - if (impl->d_data == NULL) { - ierr = hipMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Hip(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } - if (impl->memState == CEED_HIP_HOST_SYNC) { - ierr = CeedQFunctionContextSyncH2D_Hip(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_HIP_DEVICE_SYNC; - *(void **)data = impl->d_data; + *(void **)data = impl->d_data_borrowed; + impl->d_data_borrowed = NULL; impl->d_data = NULL; - impl->d_data_allocated = NULL; break; } + return CEED_ERROR_SUCCESS; } @@ -201,44 +312,42 @@ static int CeedQFunctionContextTakeData_Hip(const CeedQFunctionContext ctx, // Get data //------------------------------------------------------------------------------ static int CeedQFunctionContextGetData_Hip(const CeedQFunctionContext ctx, - const CeedMemType mtype, CeedScalar *data) { + const CeedMemType mtype, void *data) { int ierr; Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - if(impl->h_data == NULL && impl->d_data == NULL) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); - // LCOV_EXCL_STOP - // Sync array to requested memtype and update pointer + // Sync data to requested memtype + bool need_sync = false; + ierr = CeedQFunctionContextNeedSync_Hip(ctx, mtype, &need_sync); + CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedQFunctionContextSync_Hip(ctx, mtype); CeedChkBackend(ierr); + } + + // Sync data to requested memtype and update pointer switch (mtype) { case CEED_MEM_HOST: - if (impl->h_data == NULL) { - ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated); - CeedChkBackend(ierr); - impl->h_data = impl->h_data_allocated; - } - if (impl->memState == CEED_HIP_DEVICE_SYNC) { - ierr = CeedQFunctionContextSyncD2H_Hip(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_HIP_HOST_SYNC; *(void **)data = impl->h_data; break; case CEED_MEM_DEVICE: - if (impl->d_data == NULL) { - ierr = hipMalloc((void **)&impl->d_data_allocated, bytes(ctx)); - CeedChk_Hip(ceed, ierr); - impl->d_data = impl->d_data_allocated; - } - if (impl->memState == CEED_HIP_HOST_SYNC) { - ierr = CeedQFunctionContextSyncH2D_Hip(ctx); CeedChkBackend(ierr); - } - impl->memState = CEED_HIP_DEVICE_SYNC; *(void **)data = impl->d_data; break; } + + // Mark only pointer for requested memory as valid + ierr = CeedQFunctionContextSetAllInvalid_Hip(ctx); CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + impl->h_data = *(void **)data; + break; + case CEED_MEM_DEVICE: + impl->d_data = *(void **)data; + break; + } + return CEED_ERROR_SUCCESS; } @@ -259,9 +368,10 @@ static int CeedQFunctionContextDestroy_Hip(const CeedQFunctionContext ctx) { CeedQFunctionContext_Hip *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - ierr = hipFree(impl->d_data_allocated); CeedChk_Hip(ceed, ierr); - ierr = CeedFree(&impl->h_data_allocated); CeedChkBackend(ierr); + ierr = hipFree(impl->d_data_owned); CeedChk_Hip(ceed, ierr); + ierr = CeedFree(&impl->h_data_owned); CeedChkBackend(ierr); ierr = CeedFree(&impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } @@ -274,6 +384,13 @@ int CeedQFunctionContextCreate_Hip(CeedQFunctionContext ctx) { Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", + CeedQFunctionContextHasValidData_Hip); + CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, + "HasBorrowedDataOfType", + CeedQFunctionContextHasBorrowedDataOfType_Hip); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", CeedQFunctionContextSetData_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", @@ -284,9 +401,10 @@ int CeedQFunctionContextCreate_Hip(CeedQFunctionContext ctx) { CeedQFunctionContextRestoreData_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", CeedQFunctionContextDestroy_Hip); CeedChkBackend(ierr); + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); - impl->memState = CEED_HIP_NONE_SYNC; ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ diff --git a/backends/hip/ceed-hip-restriction.c b/backends/hip/ceed-hip-restriction.c index c3f35ecd42..20976c8b8a 100644 --- a/backends/hip/ceed-hip-restriction.c +++ b/backends/hip/ceed-hip-restriction.c @@ -144,7 +144,13 @@ static int CeedElemRestrictionApply_Hip(CeedElemRestriction r, const CeedScalar *d_u; CeedScalar *d_v; ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + if (tmode == CEED_TRANSPOSE) { + // Sum into for transpose mode, e-vec to l-vec + ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + } else { + // Overwrite for notranspose mode, l-vec to e-vec + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); + } // Restrict if (tmode == CEED_NOTRANSPOSE) { diff --git a/backends/hip/ceed-hip-vector.c b/backends/hip/ceed-hip-vector.c index 7741601911..b69e4b43fe 100644 --- a/backends/hip/ceed-hip-vector.c +++ b/backends/hip/ceed-hip-vector.c @@ -39,11 +39,28 @@ static inline int CeedVectorSyncH2D_Hip(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + if (!impl->h_array) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid host data to sync to device"); + // LCOV_EXCL_STOP + + if (impl->d_array_borrowed) { + impl->d_array = impl->d_array_borrowed; + } else if (impl->d_array_owned) { + impl->d_array = impl->d_array_owned; + } else { + ierr = hipMalloc((void **)&impl->d_array_owned, bytes(vec)); + CeedChk_Hip(ceed, ierr); + impl->d_array = impl->d_array_owned; + } - ierr = hipMemcpy(data->d_array, data->h_array, bytes(vec), + ierr = hipMemcpy(impl->d_array, impl->h_array, bytes(vec), hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); + return CEED_ERROR_SUCCESS; } @@ -54,11 +71,133 @@ static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + if (!impl->d_array) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "No valid device data to sync to host"); + // LCOV_EXCL_STOP + + if (impl->h_array_borrowed) { + impl->h_array = impl->h_array_borrowed; + } else if (impl->h_array_owned) { + impl->h_array = impl->h_array_owned; + } else { + CeedInt length; + ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); + ierr = CeedCalloc(length, &impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array = impl->h_array_owned; + } - ierr = hipMemcpy(data->h_array, data->d_array, bytes(vec), + ierr = hipMemcpy(impl->h_array, impl->d_array, bytes(vec), hipMemcpyDeviceToHost); CeedChk_Hip(ceed, ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Sync arrays +//------------------------------------------------------------------------------ +static inline int CeedVectorSync_Hip(const CeedVector vec, CeedMemType mtype) { + switch (mtype) { + case CEED_MEM_HOST: return CeedVectorSyncD2H_Hip(vec); + case CEED_MEM_DEVICE: return CeedVectorSyncH2D_Hip(vec); + } + return CEED_ERROR_UNSUPPORTED; +} + +//------------------------------------------------------------------------------ +// Set all pointers as invalid +//------------------------------------------------------------------------------ +static inline int CeedVectorSetAllInvalid_Hip(const CeedVector vec) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + impl->h_array = NULL; + impl->d_array = NULL; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if CeedVector has any valid pointers +//------------------------------------------------------------------------------ +static inline int CeedVectorHasValidArray_Hip(const CeedVector vec, + bool *has_valid_array) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + *has_valid_array = !!impl->h_array || !!impl->d_array; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if has any array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorHasArrayOfType_Hip(const CeedVector vec, + CeedMemType mtype, bool *has_array_of_type) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_array_of_type = !!impl->h_array_borrowed || !!impl->h_array_owned; + break; + case CEED_MEM_DEVICE: + *has_array_of_type = !!impl->d_array_borrowed || !!impl->d_array_owned; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if has borrowed array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec, + CeedMemType mtype, bool *has_borrowed_array_of_type) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + switch (mtype) { + case CEED_MEM_HOST: + *has_borrowed_array_of_type = !!impl->h_array_borrowed; + break; + case CEED_MEM_DEVICE: + *has_borrowed_array_of_type = !!impl->d_array_borrowed; + break; + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Sync array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorNeedSync_Hip(const CeedVector vec, + CeedMemType mtype, bool *need_sync) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool has_valid_array = false; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChkBackend(ierr); + switch (mtype) { + case CEED_MEM_HOST: + *need_sync = has_valid_array && !impl->h_array; + break; + case CEED_MEM_DEVICE: + *need_sync = has_valid_array && !impl->d_array; + break; + } + return CEED_ERROR_SUCCESS; } @@ -66,34 +205,36 @@ static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) { // Set array from host //------------------------------------------------------------------------------ static int CeedVectorSetArrayHost_Hip(const CeedVector vec, - const CeedCopyMode cmode, - CeedScalar *array) { + const CeedCopyMode cmode, CeedScalar *array) { int ierr; - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: { CeedInt length; - if(!data->h_array) { + if (!impl->h_array_owned) { ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; + ierr = CeedMalloc(length, &impl->h_array_owned); CeedChkBackend(ierr); } + impl->h_array_borrowed = NULL; + impl->h_array = impl->h_array_owned; if (array) - memcpy(data->h_array, array, bytes(vec)); + memcpy(impl->h_array, array, bytes(vec)); } break; case CEED_OWN_POINTER: - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - data->h_array_allocated = array; - data->h_array = array; + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array_owned = array; + impl->h_array_borrowed = NULL; + impl->h_array = array; break; case CEED_USE_POINTER: - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - data->h_array = array; + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + impl->h_array_borrowed = array; + impl->h_array = array; break; } - data->memState = CEED_HIP_HOST_SYNC; + return CEED_ERROR_SUCCESS; } @@ -105,33 +246,36 @@ static int CeedVectorSetArrayDevice_Hip(const CeedVector vec, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); switch (cmode) { case CEED_COPY_VALUES: - if (!data->d_array) { - ierr = hipMalloc((void **)&data->d_array_allocated, bytes(vec)); + if (!impl->d_array_owned) { + ierr = hipMalloc((void **)&impl->d_array_owned, bytes(vec)); CeedChk_Hip(ceed, ierr); - data->d_array = data->d_array_allocated; } + impl->d_array_borrowed = NULL; + impl->d_array = impl->d_array_owned; if (array) { - ierr = hipMemcpy(data->d_array, array, bytes(vec), + ierr = hipMemcpy(impl->d_array, array, bytes(vec), hipMemcpyDeviceToDevice); CeedChk_Hip(ceed, ierr); } break; case CEED_OWN_POINTER: - ierr = hipFree(data->d_array_allocated); CeedChk_Hip(ceed, ierr); - data->d_array_allocated = array; - data->d_array = array; + ierr = hipFree(impl->d_array_owned); CeedChk_Hip(ceed, ierr); + impl->d_array_owned = array; + impl->d_array_borrowed = NULL; + impl->d_array = array; break; case CEED_USE_POINTER: - ierr = hipFree(data->d_array_allocated); CeedChk_Hip(ceed, ierr); - data->d_array_allocated = NULL; - data->d_array = array; + ierr = hipFree(impl->d_array_owned); CeedChk_Hip(ceed, ierr); + impl->d_array_owned = NULL; + impl->d_array_borrowed = array; + impl->d_array = array; break; } - data->memState = CEED_HIP_DEVICE_SYNC; + return CEED_ERROR_SUCCESS; } @@ -139,56 +283,23 @@ static int CeedVectorSetArrayDevice_Hip(const CeedVector vec, // Set the array used by a vector, // freeing any previously allocated array if applicable //------------------------------------------------------------------------------ -static int CeedVectorSetArray_Hip(const CeedVector vec, - const CeedMemType mtype, - const CeedCopyMode cmode, - CeedScalar *array) { +static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mtype, + const CeedCopyMode cmode, CeedScalar *array) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + ierr = CeedVectorSetAllInvalid_Hip(vec); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: return CeedVectorSetArrayHost_Hip(vec, cmode, array); case CEED_MEM_DEVICE: return CeedVectorSetArrayDevice_Hip(vec, cmode, array); } - return 1; -} - -//------------------------------------------------------------------------------ -// Vector Take Array -//------------------------------------------------------------------------------ -static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mtype, - CeedScalar **array) { - int ierr; - CeedVector_Hip *impl; - ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - - switch(mtype) { - case CEED_MEM_HOST: - if (impl->memState == CEED_HIP_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Hip(vec); CeedChkBackend(ierr); - } - (*array) = impl->h_array; - impl->h_array = NULL; - impl->h_array_allocated = NULL; - impl->memState = CEED_HIP_HOST_SYNC; - break; - case CEED_MEM_DEVICE: - if (impl->memState == CEED_HIP_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Hip(vec); CeedChkBackend(ierr); - } - (*array) = impl->d_array; - impl->d_array = NULL; - impl->d_array_allocated = NULL; - impl->memState = CEED_HIP_DEVICE_SYNC; - break; - } - return CEED_ERROR_SUCCESS; + return CEED_ERROR_UNSUPPORTED; } //------------------------------------------------------------------------------ @@ -213,132 +324,172 @@ static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(data->memState) { - case CEED_HIP_HOST_SYNC: - ierr = CeedHostSetValue_Hip(data->h_array, length, val); CeedChkBackend(ierr); - break; - case CEED_HIP_NONE_SYNC: - /* - Handles the case where SetValue is used without SetArray. - Default allocation then happens on the GPU. - */ - if (data->d_array == NULL) { - ierr = hipMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Hip(ceed, ierr); - data->d_array = data->d_array_allocated; + if (!impl->d_array && !impl->h_array) { + if (impl->d_array_borrowed) { + impl->d_array = impl->d_array_borrowed; + } else if (impl->h_array_borrowed) { + impl->h_array = impl->h_array_borrowed; + } else if (impl->d_array_owned) { + impl->d_array = impl->d_array_owned; + } else if (impl->h_array_owned) { + impl->h_array = impl->h_array_owned; + } else { + ierr = CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL); + CeedChkBackend(ierr); } - data->memState = CEED_HIP_DEVICE_SYNC; - ierr = CeedDeviceSetValue_Hip(data->d_array, length, val); CeedChkBackend(ierr); - break; - case CEED_HIP_DEVICE_SYNC: - ierr = CeedDeviceSetValue_Hip(data->d_array, length, val); CeedChkBackend(ierr); + } + if (impl->d_array) { + ierr = CeedDeviceSetValue_Hip(impl->d_array, length, val); CeedChkBackend(ierr); + } + if (impl->h_array) { + ierr = CeedHostSetValue_Hip(impl->h_array, length, val); CeedChkBackend(ierr); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Vector Take Array +//------------------------------------------------------------------------------ +static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mtype, + CeedScalar **array) { + int ierr; + Ceed ceed; + ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + // Sync array to requested memtype + bool need_sync = false; + ierr = CeedVectorNeedSync_Hip(vec, mtype, &need_sync); CeedChkBackend(ierr); + if (need_sync) { + ierr = CeedVectorSync_Hip(vec, mtype); CeedChkBackend(ierr); + } + + // Update pointer + switch (mtype) { + case CEED_MEM_HOST: + (*array) = impl->h_array_borrowed; + impl->h_array_borrowed = NULL; + impl->h_array = NULL; break; - case CEED_HIP_BOTH_SYNC: - ierr = CeedHostSetValue_Hip(data->h_array, length, val); CeedChkBackend(ierr); - ierr = CeedDeviceSetValue_Hip(data->d_array, length, val); CeedChkBackend(ierr); + case CEED_MEM_DEVICE: + (*array) = impl->d_array_borrowed; + impl->d_array_borrowed = NULL; + impl->d_array = NULL; break; } + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ -// Get read-only access to a vector via the specified mtype memory type -// on which to access the array. If the backend uses a different memory type, -// this will perform a copy (possibly cached). +// Core logic for array syncronization for GetArray. +// If a different memory type is most up to date, this will perform a copy //------------------------------------------------------------------------------ -static int CeedVectorGetArrayRead_Hip(const CeedVector vec, - const CeedMemType mtype, - const CeedScalar **array) { +static int CeedVectorGetArrayCore_Hip(const CeedVector vec, + const CeedMemType mtype, CeedScalar **array) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool need_sync = false; + ierr = CeedVectorNeedSync_Hip(vec, mtype, &need_sync); CeedChkBackend(ierr); + CeedChkBackend(ierr); + if (need_sync) { + // Sync array to requested memtype + ierr = CeedVectorSync_Hip(vec, mtype); CeedChkBackend(ierr); + } - // Sync array to requested memtype and update pointer + // Update pointer switch (mtype) { case CEED_MEM_HOST: - if(data->h_array==NULL) { - CeedInt length; - ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); - CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; - } - if(data->memState==CEED_HIP_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Hip(vec); - CeedChkBackend(ierr); - data->memState = CEED_HIP_BOTH_SYNC; - } - *array = data->h_array; + *array = impl->h_array; break; case CEED_MEM_DEVICE: - if (data->d_array==NULL) { - ierr = hipMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Hip(ceed, ierr); - data->d_array = data->d_array_allocated; - } - if (data->memState==CEED_HIP_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Hip(vec); - CeedChkBackend(ierr); - data->memState = CEED_HIP_BOTH_SYNC; - } - *array = data->d_array; + *array = impl->d_array; break; } + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ -// Get array +// Get read-only access to a vector via the specified mtype +//------------------------------------------------------------------------------ +static int CeedVectorGetArrayRead_Hip(const CeedVector vec, + const CeedMemType mtype, const CeedScalar **array) { + return CeedVectorGetArrayCore_Hip(vec, mtype, (CeedScalar **)array); +} + +//------------------------------------------------------------------------------ +// Get read/write access to a vector via the specified mtype //------------------------------------------------------------------------------ -static int CeedVectorGetArray_Hip(const CeedVector vec, - const CeedMemType mtype, +static int CeedVectorGetArray_Hip(const CeedVector vec, const CeedMemType mtype, CeedScalar **array) { int ierr; - Ceed ceed; - ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - // Sync array to requested memtype and update pointer + ierr = CeedVectorGetArrayCore_Hip(vec, mtype, array); CeedChkBackend(ierr); + + ierr = CeedVectorSetAllInvalid_Hip(vec); CeedChkBackend(ierr); switch (mtype) { case CEED_MEM_HOST: - if(data->h_array==NULL) { - CeedInt length; - ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); - ierr = CeedMalloc(length, &data->h_array_allocated); - CeedChkBackend(ierr); - data->h_array = data->h_array_allocated; - } - if(data->memState==CEED_HIP_DEVICE_SYNC) { - ierr = CeedVectorSyncD2H_Hip(vec); CeedChkBackend(ierr); - } - data->memState = CEED_HIP_HOST_SYNC; - *array = data->h_array; + impl->h_array = *array; break; case CEED_MEM_DEVICE: - if (data->d_array==NULL) { - ierr = hipMalloc((void **)&data->d_array_allocated, bytes(vec)); - CeedChk_Hip(ceed, ierr); - data->d_array = data->d_array_allocated; - } - if (data->memState==CEED_HIP_HOST_SYNC) { - ierr = CeedVectorSyncH2D_Hip(vec); CeedChkBackend(ierr); - } - data->memState = CEED_HIP_DEVICE_SYNC; - *array = data->d_array; + impl->d_array = *array; break; } + return CEED_ERROR_SUCCESS; } +//------------------------------------------------------------------------------ +// Get write access to a vector via the specified mtype +//------------------------------------------------------------------------------ +static int CeedVectorGetArrayWrite_Hip(const CeedVector vec, + const CeedMemType mtype, CeedScalar **array) { + int ierr; + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + bool has_array_of_type = true; + ierr = CeedVectorHasArrayOfType_Hip(vec, mtype, &has_array_of_type); + CeedChkBackend(ierr); + if (!has_array_of_type) { + // Allocate if array is not yet allocated + ierr = CeedVectorSetArray(vec, mtype, CEED_COPY_VALUES, NULL); + CeedChkBackend(ierr); + } else { + // Select dirty array + switch (mtype) { + case CEED_MEM_HOST: + if (impl->h_array_borrowed) + impl->h_array = impl->h_array_borrowed; + else + impl->h_array = impl->h_array_owned; + break; + case CEED_MEM_DEVICE: + if (impl->d_array_borrowed) + impl->d_array = impl->d_array_borrowed; + else + impl->d_array = impl->d_array_owned; + } + } + + return CeedVectorGetArray_Hip(vec, mtype, array); +} + //------------------------------------------------------------------------------ // Restore an array obtained using CeedVectorGetArrayRead() //------------------------------------------------------------------------------ @@ -361,8 +512,8 @@ static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); hipblasHandle_t handle; @@ -400,7 +551,7 @@ static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, } CeedChk_Hipblas(ceed, ierr); CeedScalar normNoAbs; - ierr = hipMemcpy(&normNoAbs, data->d_array+indx-1, sizeof(CeedScalar), + ierr = hipMemcpy(&normNoAbs, impl->d_array+indx-1, sizeof(CeedScalar), hipMemcpyDeviceToHost); CeedChk_Hip(ceed, ierr); *norm = fabs(normNoAbs); break; @@ -433,28 +584,19 @@ static int CeedVectorReciprocal_Hip(CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(vec, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(data->memState) { - case CEED_HIP_HOST_SYNC: - ierr = CeedHostReciprocal_Hip(data->h_array, length); CeedChkBackend(ierr); - break; - case CEED_HIP_DEVICE_SYNC: - ierr = CeedDeviceReciprocal_Hip(data->d_array, length); CeedChkBackend(ierr); - break; - case CEED_HIP_BOTH_SYNC: - ierr = CeedDeviceReciprocal_Hip(data->d_array, length); CeedChkBackend(ierr); - data->memState = CEED_HIP_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_HIP_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP + if (impl->d_array) { + ierr = CeedDeviceReciprocal_Hip(impl->d_array, length); CeedChkBackend(ierr); } + if (impl->h_array) { + ierr = CeedHostReciprocal_Hip(impl->h_array, length); CeedChkBackend(ierr); + } + return CEED_ERROR_SUCCESS; } @@ -481,31 +623,20 @@ static int CeedVectorScale_Hip(CeedVector x, CeedScalar alpha) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(x, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *x_data; - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); + CeedVector_Hip *x_impl; + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(x, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(x_data->memState) { - case CEED_HIP_HOST_SYNC: - ierr = CeedHostScale_Hip(x_data->h_array, alpha, length); - CeedChkBackend(ierr); - break; - case CEED_HIP_DEVICE_SYNC: - ierr = CeedDeviceScale_Hip(x_data->d_array, alpha, length); - CeedChkBackend(ierr); - break; - case CEED_HIP_BOTH_SYNC: - ierr = CeedDeviceScale_Hip(x_data->d_array, alpha, length); + if (x_impl->d_array) { + ierr = CeedDeviceScale_Hip(x_impl->d_array, alpha, length); CeedChkBackend(ierr); - x_data->memState = CEED_HIP_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_HIP_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP } + if (x_impl->h_array) { + ierr = CeedHostScale_Hip(x_impl->h_array, alpha, length); CeedChkBackend(ierr); + } + return CEED_ERROR_SUCCESS; } @@ -532,35 +663,24 @@ static int CeedVectorAXPY_Hip(CeedVector y, CeedScalar alpha, CeedVector x) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(y, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *y_data, *x_data; - ierr = CeedVectorGetData(y, &y_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); + CeedVector_Hip *y_impl, *x_impl; + ierr = CeedVectorGetData(y, &y_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(y, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(y_data->memState) { - case CEED_HIP_HOST_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedHostAXPY_Hip(y_data->h_array, alpha, x_data->h_array, length); - CeedChkBackend(ierr); - break; - case CEED_HIP_DEVICE_SYNC: + if (y_impl->d_array) { ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDeviceAXPY_Hip(y_data->d_array, alpha, x_data->d_array, length); + ierr = CeedDeviceAXPY_Hip(y_impl->d_array, alpha, x_impl->d_array, length); CeedChkBackend(ierr); - break; - case CEED_HIP_BOTH_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDeviceAXPY_Hip(y_data->d_array, alpha, x_data->d_array, length); + } + if (y_impl->h_array) { + ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedHostAXPY_Hip(y_impl->h_array, alpha, x_impl->h_array, length); CeedChkBackend(ierr); - y_data->memState = CEED_HIP_DEVICE_SYNC; - break; - // LCOV_EXCL_START - case CEED_HIP_NONE_SYNC: - break; // Not possible, but included for completness - // LCOV_EXCL_STOP } + return CEED_ERROR_SUCCESS; } @@ -588,46 +708,32 @@ static int CeedVectorPointwiseMult_Hip(CeedVector w, CeedVector x, int ierr; Ceed ceed; ierr = CeedVectorGetCeed(w, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *w_data, *x_data, *y_data; - ierr = CeedVectorGetData(w, &w_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr); - ierr = CeedVectorGetData(y, &y_data); CeedChkBackend(ierr); + CeedVector_Hip *w_impl, *x_impl, *y_impl; + ierr = CeedVectorGetData(w, &w_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(x, &x_impl); CeedChkBackend(ierr); + ierr = CeedVectorGetData(y, &y_impl); CeedChkBackend(ierr); CeedInt length; ierr = CeedVectorGetLength(w, &length); CeedChkBackend(ierr); // Set value for synced device/host array - switch(w_data->memState) { - case CEED_HIP_HOST_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_HOST); CeedChkBackend(ierr); - ierr = CeedHostPointwiseMult_Hip(w_data->h_array, x_data->h_array, - y_data->h_array, length); - CeedChkBackend(ierr); - break; - case CEED_HIP_DEVICE_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Hip(w_data->d_array, x_data->d_array, - y_data->d_array, length); - CeedChkBackend(ierr); - break; - case CEED_HIP_BOTH_SYNC: - ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Hip(w_data->d_array, x_data->d_array, - y_data->d_array, length); - CeedChkBackend(ierr); - w_data->memState = CEED_HIP_DEVICE_SYNC; - break; - case CEED_HIP_NONE_SYNC: + if (!w_impl->d_array && !w_impl->h_array) { ierr = CeedVectorSetValue(w, 0.0); CeedChkBackend(ierr); + } + if (w_impl->d_array) { ierr = CeedVectorSyncArray(x, CEED_MEM_DEVICE); CeedChkBackend(ierr); ierr = CeedVectorSyncArray(y, CEED_MEM_DEVICE); CeedChkBackend(ierr); - ierr = CeedDevicePointwiseMult_Hip(w_data->d_array, x_data->d_array, - y_data->d_array, length); + ierr = CeedDevicePointwiseMult_Hip(w_impl->d_array, x_impl->d_array, + y_impl->d_array, length); + CeedChkBackend(ierr); + } + if (w_impl->h_array) { + ierr = CeedVectorSyncArray(x, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedVectorSyncArray(y, CEED_MEM_HOST); CeedChkBackend(ierr); + ierr = CeedHostPointwiseMult_Hip(w_impl->h_array, x_impl->h_array, + y_impl->h_array, length); CeedChkBackend(ierr); - break; } + return CEED_ERROR_SUCCESS; } @@ -638,12 +744,13 @@ static int CeedVectorDestroy_Hip(const CeedVector vec) { int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - CeedVector_Hip *data; - ierr = CeedVectorGetData(vec, &data); CeedChkBackend(ierr); + CeedVector_Hip *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + ierr = hipFree(impl->d_array_owned); CeedChk_Hip(ceed, ierr); + ierr = CeedFree(&impl->h_array_owned); CeedChkBackend(ierr); + ierr = CeedFree(&impl); CeedChkBackend(ierr); - ierr = hipFree(data->d_array_allocated); CeedChk_Hip(ceed, ierr); - ierr = CeedFree(&data->h_array_allocated); CeedChkBackend(ierr); - ierr = CeedFree(&data); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -651,11 +758,16 @@ static int CeedVectorDestroy_Hip(const CeedVector vec) { // Create a vector of the specified length (does not allocate memory) //------------------------------------------------------------------------------ int CeedVectorCreate_Hip(CeedInt n, CeedVector vec) { - CeedVector_Hip *data; + CeedVector_Hip *impl; int ierr; Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", + CeedVectorHasValidArray_Hip); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", + CeedVectorHasBorrowedArrayOfType_Hip); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", @@ -666,6 +778,8 @@ int CeedVectorCreate_Hip(CeedInt n, CeedVector vec) { CeedVectorGetArray_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Hip); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", + CeedVectorGetArrayWrite_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArray", CeedVectorRestoreArray_Hip); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArrayRead", @@ -683,8 +797,8 @@ int CeedVectorCreate_Hip(CeedInt n, CeedVector vec) { ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Hip); CeedChkBackend(ierr); - ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); - ierr = CeedVectorSetData(vec, data); CeedChkBackend(ierr); - data->memState = CEED_HIP_NONE_SYNC; + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); + ierr = CeedVectorSetData(vec, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } diff --git a/backends/hip/ceed-hip.h b/backends/hip/ceed-hip.h index 99705dc1e8..c06c07069e 100644 --- a/backends/hip/ceed-hip.h +++ b/backends/hip/ceed-hip.h @@ -61,19 +61,13 @@ CEED_UNUSED static const char *hipblasGetErrorName(hipblasStatus_t error) { } // LCOV_EXCL_STOP -typedef enum { - CEED_HIP_HOST_SYNC, - CEED_HIP_DEVICE_SYNC, - CEED_HIP_BOTH_SYNC, - CEED_HIP_NONE_SYNC -} CeedHipSyncState; - typedef struct { CeedScalar *h_array; - CeedScalar *h_array_allocated; + CeedScalar *h_array_borrowed; + CeedScalar *h_array_owned; CeedScalar *d_array; - CeedScalar *d_array_allocated; - CeedHipSyncState memState; + CeedScalar *d_array_borrowed; + CeedScalar *d_array_owned; } CeedVector_Hip; typedef struct { @@ -109,11 +103,12 @@ typedef struct { } CeedQFunction_Hip; typedef struct { - CeedScalar *h_data; - CeedScalar *h_data_allocated; - CeedScalar *d_data; - CeedScalar *d_data_allocated; - CeedHipSyncState memState; + void *h_data; + void *h_data_borrowed; + void *h_data_owned; + void *d_data; + void *d_data_borrowed; + void *d_data_owned; } CeedQFunctionContext_Hip; typedef struct { diff --git a/backends/magma/ceed-magma-basis.c b/backends/magma/ceed-magma-basis.c index 3f21150e69..746824a276 100644 --- a/backends/magma/ceed-magma-basis.c +++ b/backends/magma/ceed-magma-basis.c @@ -45,7 +45,7 @@ int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, "An input vector is required for this CeedEvalMode"); // LCOV_EXCL_STOP } - ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); CeedBasis_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); @@ -237,7 +237,7 @@ int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, "An input vector is required for this CeedEvalMode"); // LCOV_EXCL_STOP } - ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); @@ -362,7 +362,7 @@ int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, "An input vector is required for this CeedEvalMode"); // LCOV_EXCL_STOP } - ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); CeedBasisNonTensor_Magma *impl; ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); diff --git a/backends/magma/ceed-magma-restriction.c b/backends/magma/ceed-magma-restriction.c index 35f12f43ee..b130e3d124 100644 --- a/backends/magma/ceed-magma-restriction.c +++ b/backends/magma/ceed-magma-restriction.c @@ -45,7 +45,13 @@ static int CeedElemRestrictionApply_Magma(CeedElemRestriction r, CeedScalar *dv; ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); + if (tmode == CEED_TRANSPOSE) { + // Sum into for transpose mode, e-vec to l-vec + ierr = CeedVectorGetArray(v, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); + } else { + // Overwrite for notranspose mode, l-vec to e-vec + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); + } bool isStrided; ierr = CeedElemRestrictionIsStrided(r, &isStrided); CeedChkBackend(ierr); diff --git a/backends/memcheck/ceed-memcheck-qfunction.c b/backends/memcheck/ceed-memcheck-qfunction.c index 3171dbdb21..b76616b636 100644 --- a/backends/memcheck/ceed-memcheck-qfunction.c +++ b/backends/memcheck/ceed-memcheck-qfunction.c @@ -47,7 +47,7 @@ static int CeedQFunctionApply_Memcheck(CeedQFunction qf, CeedInt Q, CeedChkBackend(ierr); } for (int i = 0; ioutputs[i]); + ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_HOST, &impl->outputs[i]); CeedChkBackend(ierr); CeedInt len; ierr = CeedVectorGetLength(V[i], &len); CeedChkBackend(ierr); diff --git a/backends/opt/ceed-opt-operator.c b/backends/opt/ceed-opt-operator.c index 2f1c9434ae..688c284959 100644 --- a/backends/opt/ceed-opt-operator.c +++ b/backends/opt/ceed-opt-operator.c @@ -133,6 +133,10 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, case CEED_EVAL_CURL: break; // Not implemented } + if (is_input && !!e_vecs[i]) { + ierr = CeedVectorSetArray(e_vecs[i], CEED_MEM_HOST, + CEED_COPY_VALUES, NULL); CeedChkBackend(ierr); + } } return CEED_ERROR_SUCCESS; } @@ -248,21 +252,23 @@ static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields, CeedChkBackend(ierr); impl->input_states[i] = state; } + // Get evec + ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, + (const CeedScalar **) &e_data[i]); + CeedChkBackend(ierr); } else { // Set Qvec for CEED_EVAL_NONE if (eval_mode == CEED_EVAL_NONE) { - ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, - &e_data[i]); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayRead(impl->e_vecs_in[i], CEED_MEM_HOST, + (const CeedScalar **)&e_data[i]); + CeedChkBackend(ierr); ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, CEED_USE_POINTER, e_data[i]); CeedChkBackend(ierr); - ierr = CeedVectorRestoreArray(impl->e_vecs_in[i], - &e_data[i]); CeedChkBackend(ierr); + ierr = CeedVectorRestoreArrayRead(impl->e_vecs_in[i], + (const CeedScalar **)&e_data[i]); + CeedChkBackend(ierr); } } - // Get evec - ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, - (const CeedScalar **) &e_data[i]); - CeedChkBackend(ierr); } } return CEED_ERROR_SUCCESS; @@ -440,13 +446,15 @@ static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, CeedScalar *e_data[2*CEED_FIELD_MAX], CeedOperator_Opt *impl) { CeedInt ierr; - CeedEvalMode eval_mode; for (CeedInt i=0; ie_vecs_full[i], (const CeedScalar **) &e_data[i]); CeedChkBackend(ierr); @@ -511,8 +519,8 @@ static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec, CeedChkBackend(ierr); if (eval_mode == CEED_EVAL_NONE) { // Set qvec to single block evec - ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST, - &e_data[i + num_input_fields]); + ierr = CeedVectorGetArrayWrite(impl->e_vecs_out[i], CEED_MEM_HOST, + &e_data[i + num_input_fields]); CeedChkBackend(ierr); ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, CEED_USE_POINTER, e_data[i + num_input_fields]); @@ -658,6 +666,7 @@ static inline int CeedOperatorLinearAssembleQFunctionCore_Opt(CeedOperator op, if (!l_vec) { ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, &l_vec); CeedChkBackend(ierr); + ierr = CeedVectorSetValue(l_vec, 0.0); CeedChkBackend(ierr); impl->qf_l_vec = l_vec; } ierr = CeedVectorGetArray(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); diff --git a/backends/ref/ceed-ref-basis.c b/backends/ref/ceed-ref-basis.c index cbc7e8e19b..dcb40bd668 100644 --- a/backends/ref/ceed-ref-basis.c +++ b/backends/ref/ceed-ref-basis.c @@ -48,7 +48,7 @@ static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, "An input vector is required for this CeedEvalMode"); // LCOV_EXCL_STOP } - ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(V, CEED_MEM_HOST, &v); CeedChkBackend(ierr); // Clear v if operating in transpose if (t_mode == CEED_TRANSPOSE) { diff --git a/backends/ref/ceed-ref-operator.c b/backends/ref/ceed-ref-operator.c index b5e3bd4546..e5ce56e924 100644 --- a/backends/ref/ceed-ref-operator.c +++ b/backends/ref/ceed-ref-operator.c @@ -455,8 +455,9 @@ static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector in_vec, // Output Evecs for (CeedInt i=0; ie_vecs_full[i+impl->num_inputs], CEED_MEM_HOST, - &e_data_full[i + num_input_fields]); CeedChkBackend(ierr); + ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs], + CEED_MEM_HOST, &e_data_full[i + num_input_fields]); + CeedChkBackend(ierr); } // Loop through elements diff --git a/backends/ref/ceed-ref-qfunction.c b/backends/ref/ceed-ref-qfunction.c index 641a1f0370..132fd6bf9d 100644 --- a/backends/ref/ceed-ref-qfunction.c +++ b/backends/ref/ceed-ref-qfunction.c @@ -47,7 +47,7 @@ static int CeedQFunctionApply_Ref(CeedQFunction qf, CeedInt Q, CeedChkBackend(ierr); } for (int i = 0; ioutputs[i]); + ierr = CeedVectorGetArrayWrite(V[i], CEED_MEM_HOST, &impl->outputs[i]); CeedChkBackend(ierr); } diff --git a/backends/ref/ceed-ref-qfunctioncontext.c b/backends/ref/ceed-ref-qfunctioncontext.c index e5818925b9..bbcbafe2d3 100644 --- a/backends/ref/ceed-ref-qfunctioncontext.c +++ b/backends/ref/ceed-ref-qfunctioncontext.c @@ -19,11 +19,54 @@ #include #include "ceed-ref.h" +//------------------------------------------------------------------------------ +// QFunctionContext has valid data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextHasValidData_Ref(CeedQFunctionContext ctx, + bool *has_valid_data) { + int ierr; + CeedQFunctionContext_Ref *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + + *has_valid_data = !!impl->data; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// QFunctionContext has borrowed data +//------------------------------------------------------------------------------ +static int CeedQFunctionContextHasBorrowedDataOfType_Ref( + CeedQFunctionContext ctx, CeedMemType mem_type, + bool *has_borrowed_data_of_type) { + int ierr; + CeedQFunctionContext_Ref *impl; + ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); + CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + + switch (mem_type) { + case CEED_MEM_HOST: + *has_borrowed_data_of_type = !!impl->data_borrowed; + break; + default: + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); + // LCOV_EXCL_STOP + break; + } + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // QFunctionContext Set Data //------------------------------------------------------------------------------ static int CeedQFunctionContextSetData_Ref(CeedQFunctionContext ctx, - CeedMemType mem_type, CeedCopyMode copy_mode, CeedScalar *data) { + CeedMemType mem_type, CeedCopyMode copy_mode, void *data) { int ierr; CeedQFunctionContext_Ref *impl; ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); @@ -35,21 +78,25 @@ static int CeedQFunctionContextSetData_Ref(CeedQFunctionContext ctx, if (mem_type != CEED_MEM_HOST) // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); // LCOV_EXCL_STOP - ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + + ierr = CeedFree(&impl->data_owned); CeedChkBackend(ierr); switch (copy_mode) { case CEED_COPY_VALUES: - ierr = CeedMallocArray(1, ctx_size, &impl->data_allocated); - CeedChkBackend(ierr); - impl->data = impl->data_allocated; + ierr = CeedMallocArray(1, ctx_size, &impl->data_owned); CeedChkBackend(ierr); + impl->data_borrowed = NULL; + impl->data = impl->data_owned; memcpy(impl->data, data, ctx_size); break; case CEED_OWN_POINTER: - impl->data_allocated = data; + impl->data_owned = data; + impl->data_borrowed = NULL; impl->data = data; break; case CEED_USE_POINTER: + impl->data_borrowed = data; impl->data = data; } return CEED_ERROR_SUCCESS; @@ -59,7 +106,7 @@ static int CeedQFunctionContextSetData_Ref(CeedQFunctionContext ctx, // QFunctionContext Take Data //------------------------------------------------------------------------------ static int CeedQFunctionContextTakeData_Ref(CeedQFunctionContext ctx, - CeedMemType mem_type, CeedScalar *data) { + CeedMemType mem_type, void *data) { int ierr; CeedQFunctionContext_Ref *impl; ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); @@ -69,15 +116,14 @@ static int CeedQFunctionContextTakeData_Ref(CeedQFunctionContext ctx, if (mem_type != CEED_MEM_HOST) // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); - // LCOV_EXCL_STOP - if (!impl->data) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only provide HOST memory for this backend"); // LCOV_EXCL_STOP + *(void **)data = impl->data; + impl->data_borrowed = NULL; impl->data = NULL; - impl->data_allocated = NULL; + return CEED_ERROR_SUCCESS; } @@ -85,7 +131,7 @@ static int CeedQFunctionContextTakeData_Ref(CeedQFunctionContext ctx, // QFunctionContext Get Data //------------------------------------------------------------------------------ static int CeedQFunctionContextGetData_Ref(CeedQFunctionContext ctx, - CeedMemType mem_type, CeedScalar *data) { + CeedMemType mem_type, void *data) { int ierr; CeedQFunctionContext_Ref *impl; ierr = CeedQFunctionContextGetBackendData(ctx, (void *)&impl); @@ -95,13 +141,12 @@ static int CeedQFunctionContextGetData_Ref(CeedQFunctionContext ctx, if (mem_type != CEED_MEM_HOST) // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); - // LCOV_EXCL_STOP - if (!impl->data) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set"); + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only provide HOST memory for this backend"); // LCOV_EXCL_STOP + *(void **)data = impl->data; + return CEED_ERROR_SUCCESS; } @@ -120,7 +165,7 @@ static int CeedQFunctionContextDestroy_Ref(CeedQFunctionContext ctx) { CeedQFunctionContext_Ref *impl; ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr); - ierr = CeedFree(&impl->data_allocated); CeedChkBackend(ierr); + ierr = CeedFree(&impl->data_owned); CeedChkBackend(ierr); ierr = CeedFree(&impl); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -134,6 +179,13 @@ int CeedQFunctionContextCreate_Ref(CeedQFunctionContext ctx) { Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "HasValidData", + CeedQFunctionContextHasValidData_Ref); + CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, + "HasBorrowedDataOfType", + CeedQFunctionContextHasBorrowedDataOfType_Ref); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData", CeedQFunctionContextSetData_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData", @@ -144,8 +196,10 @@ int CeedQFunctionContextCreate_Ref(CeedQFunctionContext ctx) { CeedQFunctionContextRestoreData_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "Destroy", CeedQFunctionContextDestroy_Ref); CeedChkBackend(ierr); + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); ierr = CeedQFunctionContextSetBackendData(ctx, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ diff --git a/backends/ref/ceed-ref-restriction.c b/backends/ref/ceed-ref-restriction.c index 2aff6c6de4..b125fc818a 100644 --- a/backends/ref/ceed-ref-restriction.c +++ b/backends/ref/ceed-ref-restriction.c @@ -38,7 +38,13 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, v_offset = start*blk_size*elem_size*num_comp; ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); - ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); + if (t_mode == CEED_TRANSPOSE) { + // Sum into for transpose mode, e-vec to l-vec + ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); + } else { + // Overwrite for notranspose mode, l-vec to e-vec + ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); + } // Restriction from L-vector to E-vector // Perform: v = r * u if (t_mode == CEED_NOTRANSPOSE) { diff --git a/backends/ref/ceed-ref-vector.c b/backends/ref/ceed-ref-vector.c index 56597ad5fa..d7556d65a3 100644 --- a/backends/ref/ceed-ref-vector.c +++ b/backends/ref/ceed-ref-vector.c @@ -19,6 +19,45 @@ #include #include "ceed-ref.h" +//------------------------------------------------------------------------------ +// Has Valid Array +//------------------------------------------------------------------------------ +static int CeedVectorHasValidArray_Ref(CeedVector vec, bool *has_valid_array) { + int ierr; + CeedVector_Ref *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + + *has_valid_array = !!impl->array; + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Check if has borrowed array of given type +//------------------------------------------------------------------------------ +static inline int CeedVectorHasBorrowedArrayOfType_Ref(const CeedVector vec, + CeedMemType mem_type, bool *has_borrowed_array_of_type) { + int ierr; + CeedVector_Ref *impl; + ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); + Ceed ceed; + ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + + switch (mem_type) { + case CEED_MEM_HOST: + *has_borrowed_array_of_type = !!impl->array_borrowed; + break; + default: + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); + // LCOV_EXCL_STOP + break; + } + + return CEED_ERROR_SUCCESS; +} + //------------------------------------------------------------------------------ // Vector Set Array //------------------------------------------------------------------------------ @@ -34,20 +73,29 @@ static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mem_type, if (mem_type != CEED_MEM_HOST) // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only set HOST memory for this backend"); // LCOV_EXCL_STOP - ierr = CeedFree(&impl->array_allocated); CeedChkBackend(ierr); + switch (copy_mode) { case CEED_COPY_VALUES: - ierr = CeedMalloc(length, &impl->array_allocated); CeedChkBackend(ierr); - impl->array = impl->array_allocated; - if (array) memcpy(impl->array, array, length * sizeof(array[0])); + if (!impl->array_owned) { + ierr = CeedCalloc(length, &impl->array_owned); CeedChkBackend(ierr); + } + impl->array_borrowed = NULL; + impl->array = impl->array_owned; + if (array) + memcpy(impl->array, array, length * sizeof(array[0])); break; case CEED_OWN_POINTER: - impl->array_allocated = array; + ierr = CeedFree(&impl->array_owned); CeedChkBackend(ierr); + impl->array_owned = array; + impl->array_borrowed = NULL; impl->array = array; break; case CEED_USE_POINTER: + ierr = CeedFree(&impl->array_owned); CeedChkBackend(ierr); + impl->array_borrowed = array; impl->array = array; } return CEED_ERROR_SUCCESS; @@ -64,14 +112,9 @@ static int CeedVectorTakeArray_Ref(CeedVector vec, CeedMemType mem_type, Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - if (mem_type != CEED_MEM_HOST) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); - // LCOV_EXCL_STOP - - (*array) = impl->array; + (*array) = impl->array_borrowed; + impl->array_borrowed = NULL; impl->array = NULL; - impl->array_allocated = NULL; return CEED_ERROR_SUCCESS; } @@ -79,8 +122,8 @@ static int CeedVectorTakeArray_Ref(CeedVector vec, CeedMemType mem_type, //------------------------------------------------------------------------------ // Vector Get Array //------------------------------------------------------------------------------ -static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mem_type, - CeedScalar **array) { +static int CeedVectorGetArrayCore_Ref(CeedVector vec, CeedMemType mem_type, + CeedScalar **array) { int ierr; CeedVector_Ref *impl; ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); @@ -89,13 +132,12 @@ static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mem_type, if (mem_type != CEED_MEM_HOST) // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); + return CeedError(ceed, CEED_ERROR_BACKEND, + "Can only provide HOST memory for this backend"); // LCOV_EXCL_STOP - if (!impl->array) { // Allocate if array is not yet allocated - ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); - CeedChkBackend(ierr); - } + *array = impl->array; + return CEED_ERROR_SUCCESS; } @@ -104,22 +146,41 @@ static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mem_type, //------------------------------------------------------------------------------ static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mem_type, const CeedScalar **array) { + return CeedVectorGetArrayCore_Ref(vec, mem_type, (CeedScalar **)array); +} + +//------------------------------------------------------------------------------ +// Vector Get Array +//------------------------------------------------------------------------------ +static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mem_type, + CeedScalar **array) { + return CeedVectorGetArrayCore_Ref(vec, mem_type, array); +} + +//------------------------------------------------------------------------------ +// Vector Get Array Write +//------------------------------------------------------------------------------ +static int CeedVectorGetArrayWrite_Ref(CeedVector vec, CeedMemType mem_type, + const CeedScalar **array) { int ierr; CeedVector_Ref *impl; ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - Ceed ceed; - ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); - if (mem_type != CEED_MEM_HOST) - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); - // LCOV_EXCL_STOP - if (!impl->array) { // Allocate if array is not yet allocated - ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); - CeedChkBackend(ierr); + if (!impl->array) { + if (!impl->array_owned && !impl->array_borrowed) { + // Allocate if array is not yet allocated + ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL); + CeedChkBackend(ierr); + } else { + // Select dirty array for GetArrayWrite + if (impl->array_borrowed) + impl->array = impl->array_borrowed; + else + impl->array = impl->array_owned; + } } - *array = impl->array; - return CEED_ERROR_SUCCESS; + + return CeedVectorGetArrayCore_Ref(vec, mem_type, (CeedScalar **)array); } //------------------------------------------------------------------------------ @@ -141,7 +202,7 @@ static int CeedVectorDestroy_Ref(CeedVector vec) { CeedVector_Ref *impl; ierr = CeedVectorGetData(vec, &impl); CeedChkBackend(ierr); - ierr = CeedFree(&impl->array_allocated); CeedChkBackend(ierr); + ierr = CeedFree(&impl->array_owned); CeedChkBackend(ierr); ierr = CeedFree(&impl); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } @@ -155,6 +216,11 @@ int CeedVectorCreate_Ref(CeedInt n, CeedVector vec) { Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", + CeedVectorHasValidArray_Ref); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", + CeedVectorHasBorrowedArrayOfType_Ref); + CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", @@ -163,14 +229,18 @@ int CeedVectorCreate_Ref(CeedInt n, CeedVector vec) { CeedVectorGetArray_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Ref); CeedChkBackend(ierr); + ierr = CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", + CeedVectorGetArrayWrite_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArray", CeedVectorRestoreArray_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArrayRead", CeedVectorRestoreArrayRead_Ref); CeedChkBackend(ierr); ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Ref); CeedChkBackend(ierr); - ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); + + ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); ierr = CeedVectorSetData(vec, impl); CeedChkBackend(ierr); + return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ diff --git a/backends/ref/ceed-ref.h b/backends/ref/ceed-ref.h index 8da6fd3eac..ef76750e43 100644 --- a/backends/ref/ceed-ref.h +++ b/backends/ref/ceed-ref.h @@ -29,7 +29,8 @@ typedef struct { typedef struct { CeedScalar *array; - CeedScalar *array_allocated; + CeedScalar *array_borrowed; + CeedScalar *array_owned; } CeedVector_Ref; typedef struct { @@ -47,7 +48,8 @@ typedef struct { typedef struct { void *data; - void *data_allocated; + void *data_borrowed; + void *data_owned; } CeedQFunctionContext_Ref; typedef struct { diff --git a/doc/sphinx/source/libCEEDdev.md b/doc/sphinx/source/libCEEDdev.md index 6d1689d0f7..cf157a817c 100644 --- a/doc/sphinx/source/libCEEDdev.md +++ b/doc/sphinx/source/libCEEDdev.md @@ -88,10 +88,45 @@ and are purely implicit -- one just indexes the same array using the appropriate convention. -## `restrict` semantics +## `restrict` Semantics QFunction arguments can be assumed to have `restrict` semantics. That is, each input and output array must reside in distinct memory without overlap. +## CeedVector Array Access Semantics + +Backend implementations are expected to separately track 'owned' and 'borrowed' memory locations. +Backends are responsible for freeing 'owned' memory; 'borrowed' memory is set by the user and backends only have read/write access to 'borrowed' memory. +For any given precision and memory type, a backend should only have 'owned' or 'borrowed' memory, not both. + +Backends are responsible for tracking which memory locations contain valid data. +If the user calls {c:func}`CeedVectorTakeArray` on the only memory location that contains valid data, then the {ref}`CeedVector` is left in an *invalid state*. +To repair an *invalid state*, the user must set valid data by calling {c:func}`CeedVectorSetValue`, {c:func}`CeedVectorSetArray`, or {c:func}`CeedVectorGetArrayWrite`. + +Some checks for consistency and data validity with {ref}`CeedVector` array access are performed at the interface level. +All backends may assume that array access will conform to these guidelines: + +- Borrowed memory + + - {ref}`CeedVector` access to borrowed memory is set with {c:func}`CeedVectorSetArray` with `copy_mode = CEED_USE_POINTER` and revoked with {c:func}`CeedVectorTakeArray`. + The user must first call {c:func}`CeedVectorSetArray` with `copy_mode = CEED_USE_POINTER` for the appropriate precision and memory type before calling {c:func}`CeedVectorTakeArray`. + - {c:func}`CeedVectorTakeArray` cannot be called on a vector in a *invalid state*. + +- Owned memory + + - Owned memory can be allocated by calling {c:func}`CeedVectorSetValue` or by calling {c:func}`CeedVectorSetArray` with `copy_mode = CEED_COPY_VALUES`. + - Owned memory can be set by calling {c:func}`CeedVectorSetArray` with `copy_mode = CEED_OWN_POINTER`. + - Owned memory can also be allocated by calling {c:func}`CeedVectorGetArrayWrite`. + The user is responsible for manually setting the contents of the array in this case. + +- Data validity + + - Internal syncronization and user calls to {c:func}`CeedVectorSync` cannot be made on a vector in an *invalid state*. + - Calls to {c:func}`CeedVectorGetArray` and {c:func}`CeedVectorGetArrayRead` cannot be made on a vector in an *invalid state*. + - Calls to {c:func}`CeedVectorSetArray` and {c:func}`CeedVectorSetValue` can be made on a vector in an *invalid state*. + - Calls to {c:func}`CeedVectorGetArrayWrite` can be made on a vector in an *invalid* state. + Data syncronization is not required for the memory location returned by {c:func}`CeedVectorGetArrayWrite`. + The caller should assume that all data at the memory location returned by {c:func}`CeedVectorGetArrayWrite` is *invalid*. + ## Internal Layouts Ceed backends are free to use any **E-vector** and **Q-vector** data layout, to include never fully forming these vectors, so long as the backend passes the `t5**` series tests and all examples. diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 1a392e6429..7a7bc47475 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -17,6 +17,8 @@ for each release of libCEED. - Warning added when compiling OCCA backend to alert users that this backend is experimental. - `ceed-backend.h`, `ceed-hash.h`, and `ceed-khash.h` removed. Users should use `ceed/backend.h`, `ceed/hash.h`, and `ceed/khash.h`. - Added {c:func}`CeedQFunctionGetKernelName`; refactored {c:func}`CeedQFunctionGetSourcePath` to exclude function kernel name. +- Clarify documentation for {c:func}`CeedVectorTakeArray`; this function will error if {c:func}`CeedVectorSetArray` with `copy_mode == CEED_USE_POINTER` was not previously called for the corresponding `CeedMemType`. +- Added {c:func}`CeedVectorGetArrayWrite` that allows access to uninitalized arrays; require initalized data for {c:func}`CeedVectorGetArray`. ### New features diff --git a/examples/ceed/ex1-volume.c b/examples/ceed/ex1-volume.c index 53123acabb..a498ae563a 100644 --- a/examples/ceed/ex1-volume.c +++ b/examples/ceed/ex1-volume.c @@ -374,7 +374,7 @@ int SetCartesianMeshCoords(int dim, int num_xyz[dim], int mesh_degree, scalar_size *= nd[d]; } CeedScalar *coords; - CeedVectorGetArray(mesh_coords, CEED_MEM_HOST, &coords); + CeedVectorGetArrayWrite(mesh_coords, CEED_MEM_HOST, &coords); CeedScalar *nodes = malloc(sizeof(CeedScalar) * p); // The H1 basis uses Lobatto quadrature points as nodes. CeedLobattoQuadrature(p, nodes, NULL); // nodes are in [-1,1] diff --git a/examples/ceed/ex2-surface.c b/examples/ceed/ex2-surface.c index 190925b46f..3274b3e40d 100644 --- a/examples/ceed/ex2-surface.c +++ b/examples/ceed/ex2-surface.c @@ -271,7 +271,7 @@ int main(int argc, const char *argv[]) { // Initialize 'u' with sum of coordinates, x+y+z. CeedScalar *u_array; const CeedScalar *x_array; - CeedVectorGetArray(u, CEED_MEM_HOST, &u_array); + CeedVectorGetArrayWrite(u, CEED_MEM_HOST, &u_array); CeedVectorGetArrayRead(mesh_coords, CEED_MEM_HOST, &x_array); for (CeedInt i = 0; i < sol_size; i++) { u_array[i] = 0; @@ -402,7 +402,7 @@ int SetCartesianMeshCoords(int dim, int num_xyz[3], int mesh_degree, scalar_size *= nd[d]; } CeedScalar *coords; - CeedVectorGetArray(mesh_coords, CEED_MEM_HOST, &coords); + CeedVectorGetArrayWrite(mesh_coords, CEED_MEM_HOST, &coords); CeedScalar *nodes = malloc(sizeof(CeedScalar) * p); // The H1 basis uses Lobatto quadrature points as nodes. CeedLobattoQuadrature(p, nodes, NULL); // nodes are in [-1,1] diff --git a/examples/petsc/area.c b/examples/petsc/area.c index 442802a280..9c8456cd9d 100644 --- a/examples/petsc/area.c +++ b/examples/petsc/area.c @@ -223,8 +223,8 @@ int main(int argc, char **argv) { // Setup libCEED's objects and apply setup operator ierr = PetscMalloc1(1, &ceed_data); CHKERRQ(ierr); ierr = SetupLibceedByDegree(dm, ceed, degree, topo_dim, q_extra, num_comp_x, - num_comp_u, - g_size, xl_size, problem_options[problem_choice], ceed_data, + num_comp_u, g_size, xl_size, + problem_options[problem_choice], ceed_data, false, (CeedVector)NULL, (CeedVector *)NULL); CHKERRQ(ierr); diff --git a/examples/rust/ex2-surface/src/main.rs b/examples/rust/ex2-surface/src/main.rs index 77a6fc01e7..1d144db304 100644 --- a/examples/rust/ex2-surface/src/main.rs +++ b/examples/rust/ex2-surface/src/main.rs @@ -325,6 +325,7 @@ fn example_2(options: opt::Opt) -> libceed::Result<()> { // Initialize u with sum of node coordinates let coords = mesh_coords.view()?; + u.set_value(0.0)?; u.view_mut()?.iter_mut().enumerate().for_each(|(i, u)| { *u = (0..dim).map(|d| coords[i + d * solution_size]).sum(); }); diff --git a/examples/rust/mesh/src/lib.rs b/examples/rust/mesh/src/lib.rs index ee44f35c37..02f7d38ab8 100644 --- a/examples/rust/mesh/src/lib.rs +++ b/examples/rust/mesh/src/lib.rs @@ -147,6 +147,7 @@ pub fn cartesian_mesh_coords( // Coordinates for mesh let mut mesh_coords = ceed.vector(mesh_size)?; + mesh_coords.set_value(0.0)?; { let mut coords = mesh_coords.view_mut()?; let nodes = nodes_full.view()?; diff --git a/include/ceed-impl.h b/include/ceed-impl.h index 36329a90dd..9a8518ebc4 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -129,12 +129,15 @@ struct Ceed_private { struct CeedVector_private { Ceed ceed; + int (*HasValidArray)(CeedVector, bool *); + int (*HasBorrowedArrayOfType)(CeedVector, CeedMemType, bool *); int (*SetArray)(CeedVector, CeedMemType, CeedCopyMode, CeedScalar *); int (*SetValue)(CeedVector, CeedScalar); int (*SyncArray)(CeedVector, CeedMemType); int (*TakeArray)(CeedVector, CeedMemType, CeedScalar **); int (*GetArray)(CeedVector, CeedMemType, CeedScalar **); int (*GetArrayRead)(CeedVector, CeedMemType, const CeedScalar **); + int (*GetArrayWrite)(CeedVector, CeedMemType, CeedScalar **); int (*RestoreArray)(CeedVector); int (*RestoreArrayRead)(CeedVector); int (*Norm)(CeedVector, CeedNormType, CeedScalar *); @@ -252,6 +255,8 @@ struct CeedQFunction_private { struct CeedQFunctionContext_private { Ceed ceed; int ref_count; + int (*HasValidData)(CeedQFunctionContext, bool *); + int (*HasBorrowedDataOfType)(CeedQFunctionContext, CeedMemType, bool *); int (*SetData)(CeedQFunctionContext, CeedMemType, CeedCopyMode, void *); int (*TakeData)(CeedQFunctionContext, CeedMemType, void *); int (*GetData)(CeedQFunctionContext, CeedMemType, void *); diff --git a/include/ceed/backend.h b/include/ceed/backend.h index 6d66d6da92..2786d8af3e 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -129,6 +129,10 @@ CEED_EXTERN int CeedGetData(Ceed ceed, void *data); CEED_EXTERN int CeedSetData(Ceed ceed, void *data); CEED_EXTERN int CeedReference(Ceed ceed); +CEED_EXTERN int CeedVectorHasValidArray(CeedVector vec, bool *has_valid_array); +CEED_EXTERN int CeedVectorHasBorrowedArrayOfType(CeedVector vec, CeedMemType mem_type, + bool *has_borrowed_array_of_type); +CEED_EXTERN int CeedVectorHasValidArray(CeedVector vec, bool *has_valid_array); CEED_EXTERN int CeedVectorGetState(CeedVector vec, uint64_t *state); CEED_EXTERN int CeedVectorAddReference(CeedVector vec); CEED_EXTERN int CeedVectorGetData(CeedVector vec, void *data); @@ -158,8 +162,9 @@ CEED_EXTERN int CeedElemRestrictionReference(CeedElemRestriction rstr); CEED_EXTERN int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *colo_grad_1d); CEED_EXTERN int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, - const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n, - CeedInt k, CeedInt row, CeedInt col); + const CeedScalar *tau, CeedTransposeMode t_mode, + CeedInt m, CeedInt n, CeedInt k, + CeedInt row, CeedInt col); CEED_EXTERN int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor); CEED_EXTERN int CeedBasisGetData(CeedBasis basis, void *data); CEED_EXTERN int CeedBasisSetData(CeedBasis basis, void *data); @@ -212,8 +217,12 @@ CEED_EXTERN int CeedQFunctionGetData(CeedQFunction qf, void *data); CEED_EXTERN int CeedQFunctionSetData(CeedQFunction qf, void *data); CEED_EXTERN int CeedQFunctionReference(CeedQFunction qf); -CEED_EXTERN int CeedQFunctionContextGetCeed(CeedQFunctionContext cxt, +CEED_EXTERN int CeedQFunctionContextGetCeed(CeedQFunctionContext ctx, Ceed *ceed); +CEED_EXTERN int CeedQFunctionContextHasValidData(CeedQFunctionContext ctx, + bool *has_valid_data); +CEED_EXTERN int CeedQFunctionContextHasBorrowedDataOfType(CeedQFunctionContext ctx, + CeedMemType mem_type, bool *has_borrowed_data_of_type); CEED_EXTERN int CeedQFunctionContextGetState(CeedQFunctionContext ctx, uint64_t *state); CEED_EXTERN int CeedQFunctionContextGetBackendData(CeedQFunctionContext ctx, diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 6237089f8c..78ea506c2b 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -347,6 +347,8 @@ CEED_EXTERN int CeedVectorGetArray(CeedVector vec, CeedMemType mem_type, CeedScalar **array); CEED_EXTERN int CeedVectorGetArrayRead(CeedVector vec, CeedMemType mem_type, const CeedScalar **array); +CEED_EXTERN int CeedVectorGetArrayWrite(CeedVector vec, CeedMemType mem_type, + CeedScalar **array); CEED_EXTERN int CeedVectorRestoreArray(CeedVector vec, CeedScalar **array); CEED_EXTERN int CeedVectorRestoreArrayRead(CeedVector vec, const CeedScalar **array); diff --git a/interface/ceed-fortran.c b/interface/ceed-fortran.c index b6ff29407a..a377431bad 100644 --- a/interface/ceed-fortran.c +++ b/interface/ceed-fortran.c @@ -191,6 +191,16 @@ void fCeedVectorGetArrayRead(int *vec, int *memtype, CeedScalar *array, *offset = b - array; } +#define fCeedVectorGetArrayWrite \ + FORTRAN_NAME(ceedvectorgetarraywrite,CEEDVECTORGETARRAYWRITE) +void fCeedVectorGetArrayWrite(int *vec, int *memtype, CeedScalar *array, + int64_t *offset, int *err) { + CeedScalar *b; + CeedVector vec_ = CeedVector_dict[*vec]; + *err = CeedVectorGetArrayWrite(vec_, (CeedMemType)*memtype, &b); + *offset = b - array; +} + #define fCeedVectorRestoreArray \ FORTRAN_NAME(ceedvectorrestorearray,CEEDVECTORRESTOREARRAY) void fCeedVectorRestoreArray(int *vec, CeedScalar *array, diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index 974369661b..d6a37d05d9 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -311,11 +311,12 @@ static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, CeedChk(ierr); // Assemble element operator diagonals - CeedScalar *elem_diag_array, *assembled_qf_array; + CeedScalar *elem_diag_array; + const CeedScalar *assembled_qf_array; ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr); ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array); CeedChk(ierr); - ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); + ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array); CeedChk(ierr); CeedInt num_elem, num_nodes, num_qpts; ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr); @@ -386,7 +387,8 @@ static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op, } } ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr); - ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr); + ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array); + CeedChk(ierr); // Assemble local operator diagonal ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, @@ -473,7 +475,7 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedVector index_vec; ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr); CeedScalar *array; - ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr); for (CeedInt i = 0; i < num_nodes; ++i) { array[i] = i; } @@ -678,7 +680,7 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedScalar elem_mat[elem_size * elem_size]; int count = 0; CeedScalar *vals; - ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr); for (int e = 0; e < num_elem; ++e) { for (int comp_in = 0; comp_in < num_comp; ++comp_in) { for (int comp_out = 0; comp_out < num_comp; ++comp_out) { @@ -2000,7 +2002,8 @@ int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data); CeedChk(ierr); ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr); - ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array); + CeedChk(ierr); for (CeedInt e=0; eHasValidData) + // LCOV_EXCL_START + return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support HasValidData"); + // LCOV_EXCL_STOP + + ierr = ctx->HasValidData(ctx, has_valid_data); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Check for borrowed data of a specific CeedMemType in a + CeedQFunctionContext + + @param ctx CeedQFunctionContext to check + @param mem_type Memory type to check + @param[out] has_borrowed_data_of_type Variable to store result + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedQFunctionContextHasBorrowedDataOfType(CeedQFunctionContext ctx, + CeedMemType mem_type, bool *has_borrowed_data_of_type) { + int ierr; + + if (!ctx->HasBorrowedDataOfType) + // LCOV_EXCL_START + return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support HasBorrowedDataOfType"); + // LCOV_EXCL_STOP + + ierr = ctx->HasBorrowedDataOfType(ctx, mem_type, has_borrowed_data_of_type); + CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + /** @brief Get the state of a CeedQFunctionContext @@ -229,6 +282,14 @@ int CeedQFunctionContextTakeData(CeedQFunctionContext ctx, CeedMemType mem_type, void *data) { int ierr; + bool has_valid_data = true; + ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); CeedChk(ierr); + if (!has_valid_data) + // LCOV_EXCL_START + return CeedError(ctx->ceed, CEED_ERROR_BACKEND, + "CeedQFunctionContext has no valid data to take, must set data"); + // LCOV_EXCL_STOP + if (!ctx->TakeData) // LCOV_EXCL_START return CeedError(ctx->ceed, CEED_ERROR_UNSUPPORTED, @@ -242,6 +303,17 @@ int CeedQFunctionContextTakeData(CeedQFunctionContext ctx, CeedMemType mem_type, "access lock is already in use"); // LCOV_EXCL_STOP + bool has_borrowed_data_of_type = true; + ierr = CeedQFunctionContextHasBorrowedDataOfType(ctx, mem_type, + &has_borrowed_data_of_type); CeedChk(ierr); + if (!has_borrowed_data_of_type) + // LCOV_EXCL_START + return CeedError(ctx->ceed, CEED_ERROR_BACKEND, + "CeedQFunctionContext has no borowed %s data, " + "must set data with CeedQFunctionContextSetData", + CeedMemTypes[mem_type]); + // LCOV_EXCL_STOP + void *temp_data = NULL; ierr = ctx->TakeData(ctx, mem_type, &temp_data); CeedChk(ierr); if (data) (*(void **)data) = temp_data; @@ -282,6 +354,14 @@ int CeedQFunctionContextGetData(CeedQFunctionContext ctx, CeedMemType mem_type, "access lock is already in use"); // LCOV_EXCL_STOP + bool has_valid_data = true; + ierr = CeedQFunctionContextHasValidData(ctx, &has_valid_data); CeedChk(ierr); + if (!has_valid_data) + // LCOV_EXCL_START + return CeedError(ctx->ceed, CEED_ERROR_BACKEND, + "CeedQFunctionContext has no valid data to get, must set data"); + // LCOV_EXCL_STOP + ierr = ctx->GetData(ctx, mem_type, data); CeedChk(ierr); ctx->state += 1; return CEED_ERROR_SUCCESS; diff --git a/interface/ceed-vector.c b/interface/ceed-vector.c index d0ca0e403b..cc71eeb88a 100644 --- a/interface/ceed-vector.c +++ b/interface/ceed-vector.c @@ -47,6 +47,57 @@ const CeedVector CEED_VECTOR_NONE = &ceed_vector_none; /// @addtogroup CeedVectorBackend /// @{ +/** + @brief Check for valid data in a CeedVector + + @param vec CeedVector to check validity + @param[out] has_valid_array Variable to store validity + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedVectorHasValidArray(CeedVector vec, bool *has_valid_array) { + int ierr; + + if (!vec->HasValidArray) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support HasValidArray"); + // LCOV_EXCL_STOP + + ierr = vec->HasValidArray(vec, has_valid_array); CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + +/** + @brief Check for borrowed array of a specific CeedMemType in a CeedVector + + @param vec CeedVector to check + @param mem_type Memory type to check + @param[out] has_borrowed_array_of_type Variable to store result + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedVectorHasBorrowedArrayOfType(CeedVector vec, CeedMemType mem_type, + bool *has_borrowed_array_of_type) { + int ierr; + + if (!vec->HasBorrowedArrayOfType) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support HasBorrowedArrayOfType"); + // LCOV_EXCL_STOP + + ierr = vec->HasBorrowedArrayOfType(vec, mem_type, has_borrowed_array_of_type); + CeedChk(ierr); + + return CEED_ERROR_SUCCESS; +} + /** @brief Get the state of a CeedVector @@ -247,15 +298,24 @@ int CeedVectorSetValue(CeedVector vec, CeedScalar value) { int ierr; if (vec->state % 2 == 1) + // LCOV_EXCL_START return CeedError(vec->ceed, CEED_ERROR_ACCESS, "Cannot grant CeedVector array access, the " "access lock is already in use"); + // LCOV_EXCL_STOP + + if (vec->num_readers > 0) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_ACCESS, + "Cannot grant CeedVector array access, a " + "process has read access"); + // LCOV_EXCL_STOP if (vec->SetValue) { ierr = vec->SetValue(vec, value); CeedChk(ierr); } else { CeedScalar *array; - ierr = CeedVectorGetArray(vec, CEED_MEM_HOST, &array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(vec, CEED_MEM_HOST, &array); CeedChk(ierr); for (int i=0; ilength; i++) array[i] = value; ierr = CeedVectorRestoreArray(vec, &array); CeedChk(ierr); } @@ -295,9 +355,11 @@ int CeedVectorSyncArray(CeedVector vec, CeedMemType mem_type) { } /** - @brief Take ownership of the CeedVector array and remove the array from the - CeedVector. The caller is responsible for managing and freeing - the array. + @brief Take ownership of the CeedVector array set by @ref CeedVectorSetArray() + with @ref CEED_USE_POINTER and remove the array from the CeedVector. + The caller is responsible for managing and freeing the array. + This function will error if @ref CeedVectorSetArray() was not previously + called with @ref CEED_USE_POINTER for the corresponding mem_type. @param vec CeedVector @param mem_type Memory type on which to take the array. If the backend @@ -326,6 +388,26 @@ int CeedVectorTakeArray(CeedVector vec, CeedMemType mem_type, "has read access"); // LCOV_EXCL_STOP + bool has_borrowed_array_of_type = true; + ierr = CeedVectorHasBorrowedArrayOfType(vec, mem_type, + &has_borrowed_array_of_type); + CeedChk(ierr); + if (!has_borrowed_array_of_type) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no borrowed %s array, " + "must set array with CeedVectorSetArray", CeedMemTypes[mem_type]); + // LCOV_EXCL_STOP + + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to take, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + CeedScalar *temp_array = NULL; ierr = vec->TakeArray(vec, mem_type, &temp_array); CeedChk(ierr); if (array) (*array) = temp_array; @@ -370,6 +452,15 @@ int CeedVectorGetArray(CeedVector vec, CeedMemType mem_type, "Cannot grant CeedVector array access, a " "process has read access"); + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to read, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + ierr = vec->GetArray(vec, mem_type, array); CeedChk(ierr); vec->state += 1; return CEED_ERROR_SUCCESS; @@ -404,11 +495,62 @@ int CeedVectorGetArrayRead(CeedVector vec, CeedMemType mem_type, "Cannot grant CeedVector read-only array " "access, the access lock is already in use"); + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to read, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + ierr = vec->GetArrayRead(vec, mem_type, array); CeedChk(ierr); vec->num_readers++; return CEED_ERROR_SUCCESS; } +/** + @brief Get write access to a CeedVector via the specified memory type. + Restore access with @ref CeedVectorRestoreArray(). All old + values should be assumed to be invalid. + + @param vec CeedVector to access + @param mem_type Memory type on which to access the array. + @param[out] array Array on memory type mem_type + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedVectorGetArrayWrite(CeedVector vec, CeedMemType mem_type, + CeedScalar **array) { + int ierr; + + if (!vec->GetArrayWrite) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_UNSUPPORTED, + "Backend does not support GetArrayWrite"); + // LCOV_EXCL_STOP + + if (vec->state % 2 == 1) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_ACCESS, + "Cannot grant CeedVector array access, the " + "access lock is already in use"); + // LCOV_EXCL_STOP + + if (vec->num_readers > 0) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_ACCESS, + "Cannot grant CeedVector array access, a " + "process has read access"); + // LCOV_EXCL_STOP + + ierr = vec->GetArrayWrite(vec, mem_type, array); CeedChk(ierr); + vec->state += 1; + return CEED_ERROR_SUCCESS; +} + /** @brief Restore an array obtained using @ref CeedVectorGetArray() @@ -458,6 +600,13 @@ int CeedVectorRestoreArrayRead(CeedVector vec, const CeedScalar **array) { "Backend does not support RestoreArrayRead"); // LCOV_EXCL_STOP + if (vec->num_readers == 0) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_ACCESS, + "Cannot restore CeedVector array read access, " + "access was not granted"); + // LCOV_EXCL_STOP + ierr = vec->RestoreArrayRead(vec); CeedChk(ierr); *array = NULL; vec->num_readers--; @@ -482,6 +631,15 @@ int CeedVectorRestoreArrayRead(CeedVector vec, const CeedScalar **array) { int CeedVectorNorm(CeedVector vec, CeedNormType norm_type, CeedScalar *norm) { int ierr; + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to compute norm, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + // Backend impl for GPU, if added if (vec->Norm) { ierr = vec->Norm(vec, norm_type, norm); CeedChk(ierr); @@ -531,6 +689,15 @@ int CeedVectorScale(CeedVector x, CeedScalar alpha) { CeedScalar *x_array; CeedInt n_x; + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(x, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(x->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to scale, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + ierr = CeedVectorGetLength(x, &n_x); CeedChk(ierr); // Backend implementation @@ -538,7 +705,7 @@ int CeedVectorScale(CeedVector x, CeedScalar alpha) { return x->Scale(x, alpha); // Default implementation - ierr = CeedVectorGetArray(x, CEED_MEM_HOST, &x_array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(x, CEED_MEM_HOST, &x_array); CeedChk(ierr); for (CeedInt i=0; iceed, CEED_ERROR_BACKEND, + "CeedVector x has no valid data, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + ierr = CeedVectorHasValidArray(y, &has_valid_array_y); CeedChk(ierr); + if (!has_valid_array_y) + // LCOV_EXCL_START + return CeedError(y->ceed, CEED_ERROR_BACKEND, + "CeedVector y has no valid data, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + Ceed ceed_parent_x, ceed_parent_y; ierr = CeedGetParent(x->ceed, &ceed_parent_x); CeedChk(ierr); ierr = CeedGetParent(y->ceed, &ceed_parent_y); CeedChk(ierr); @@ -592,7 +775,7 @@ int CeedVectorAXPY(CeedVector y, CeedScalar alpha, CeedVector x) { } // Default implementation - ierr = CeedVectorGetArray(y, CEED_MEM_HOST, &y_array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(y, CEED_MEM_HOST, &y_array); CeedChk(ierr); ierr = CeedVectorGetArrayRead(x, CEED_MEM_HOST, &x_array); CeedChk(ierr); for (CeedInt i=0; iceed, &ceed_parent_w); CeedChk(ierr); ierr = CeedGetParent(x->ceed, &ceed_parent_x); CeedChk(ierr); ierr = CeedGetParent(y->ceed, &ceed_parent_y); CeedChk(ierr); - if ((ceed_parent_w != ceed_parent_y) || + if ((ceed_parent_w != ceed_parent_x) || (ceed_parent_w != ceed_parent_y)) // LCOV_EXCL_START return CeedError(w->ceed, CEED_ERROR_INCOMPATIBLE, "Vectors w, x, and y must be created by the same Ceed context"); // LCOV_EXCL_STOP + bool has_valid_array_x = true, has_valid_array_y = true; + ierr = CeedVectorHasValidArray(x, &has_valid_array_x); CeedChk(ierr); + if (!has_valid_array_x) + // LCOV_EXCL_START + return CeedError(x->ceed, CEED_ERROR_BACKEND, + "CeedVector x has no valid data, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + ierr = CeedVectorHasValidArray(y, &has_valid_array_y); CeedChk(ierr); + if (!has_valid_array_y) + // LCOV_EXCL_START + return CeedError(y->ceed, CEED_ERROR_BACKEND, + "CeedVector y has no valid data, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + // Backend implementation if (w->PointwiseMult) { ierr = w->PointwiseMult(w, x, y); CeedChk(ierr); @@ -649,7 +848,7 @@ int CeedVectorPointwiseMult(CeedVector w, CeedVector x, CeedVector y) { } // Default implementation - ierr = CeedVectorGetArray(w, CEED_MEM_HOST, &w_array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(w, CEED_MEM_HOST, &w_array); CeedChk(ierr); if (x != w) { ierr = CeedVectorGetArrayRead(x, CEED_MEM_HOST, &x_array); CeedChk(ierr); } else { @@ -688,6 +887,15 @@ int CeedVectorPointwiseMult(CeedVector w, CeedVector x, CeedVector y) { int CeedVectorReciprocal(CeedVector vec) { int ierr; + bool has_valid_array = true; + ierr = CeedVectorHasValidArray(vec, &has_valid_array); CeedChk(ierr); + if (!has_valid_array) + // LCOV_EXCL_START + return CeedError(vec->ceed, CEED_ERROR_BACKEND, + "CeedVector has no valid data to compute reciprocal, " + "must set data with CeedVectorSetValue or CeedVectorSetArray"); + // LCOV_EXCL_STOP + // Check if vector data set if (!vec->state) // LCOV_EXCL_START @@ -704,7 +912,7 @@ int CeedVectorReciprocal(CeedVector vec) { CeedInt len; ierr = CeedVectorGetLength(vec, &len); CeedChk(ierr); CeedScalar *array; - ierr = CeedVectorGetArray(vec, CEED_MEM_HOST, &array); CeedChk(ierr); + ierr = CeedVectorGetArrayWrite(vec, CEED_MEM_HOST, &array); CeedChk(ierr); for (CeedInt i=0; i CEED_EPSILON) array[i] = 1./array[i]; diff --git a/interface/ceed.c b/interface/ceed.c index 6e890691a2..b714d3cfe8 100644 --- a/interface/ceed.c +++ b/interface/ceed.c @@ -828,11 +828,14 @@ int CeedInit(const char *resource, Ceed *ceed) { CEED_FTABLE_ENTRY(Ceed, QFunctionContextCreate), CEED_FTABLE_ENTRY(Ceed, OperatorCreate), CEED_FTABLE_ENTRY(Ceed, CompositeOperatorCreate), + CEED_FTABLE_ENTRY(CeedVector, HasValidArray), + CEED_FTABLE_ENTRY(CeedVector, HasBorrowedArrayOfType), CEED_FTABLE_ENTRY(CeedVector, SetArray), CEED_FTABLE_ENTRY(CeedVector, TakeArray), CEED_FTABLE_ENTRY(CeedVector, SetValue), CEED_FTABLE_ENTRY(CeedVector, GetArray), CEED_FTABLE_ENTRY(CeedVector, GetArrayRead), + CEED_FTABLE_ENTRY(CeedVector, GetArrayWrite), CEED_FTABLE_ENTRY(CeedVector, RestoreArray), CEED_FTABLE_ENTRY(CeedVector, RestoreArrayRead), CEED_FTABLE_ENTRY(CeedVector, Norm), @@ -853,6 +856,8 @@ int CeedInit(const char *resource, Ceed *ceed) { CEED_FTABLE_ENTRY(CeedQFunction, SetCUDAUserFunction), CEED_FTABLE_ENTRY(CeedQFunction, SetHIPUserFunction), CEED_FTABLE_ENTRY(CeedQFunction, Destroy), + CEED_FTABLE_ENTRY(CeedQFunctionContext, HasValidData), + CEED_FTABLE_ENTRY(CeedQFunctionContext, HasBorrowedDataOfType), CEED_FTABLE_ENTRY(CeedQFunctionContext, SetData), CEED_FTABLE_ENTRY(CeedQFunctionContext, TakeData), CEED_FTABLE_ENTRY(CeedQFunctionContext, GetData), diff --git a/julia/LibCEED.jl/examples/ex2-surface.jl b/julia/LibCEED.jl/examples/ex2-surface.jl index 17e361b7b9..27ea44f2b8 100644 --- a/julia/LibCEED.jl/examples/ex2-surface.jl +++ b/julia/LibCEED.jl/examples/ex2-surface.jl @@ -143,6 +143,7 @@ function run_ex2(; ceed_spec, dim, mesh_order, sol_order, num_qpts, prob_size, g u = CeedVector(ceed, sol_size) v = CeedVector(ceed, sol_size) # Initialize 'u' with sum of coordinates, x+y+z. + u[] = 0.0 @witharray_read( x_host = mesh_coords, size = (mesh_size÷dim, dim), diff --git a/julia/LibCEED.jl/src/ElemRestriction.jl b/julia/LibCEED.jl/src/ElemRestriction.jl index 8c4ab94c3f..2b21d58222 100644 --- a/julia/LibCEED.jl/src/ElemRestriction.jl +++ b/julia/LibCEED.jl/src/ElemRestriction.jl @@ -168,8 +168,8 @@ function apply(r::ElemRestriction, u::AbstractVector; tmode=NOTRANSPOSE) ruv = create_evector(r) else ruv = create_lvector(r) - ruv[] = 0.0 end + ruv[] = 0.0 apply!(r, uv, ruv; tmode=tmode) Vector(ruv) end diff --git a/python/ceed_vector.py b/python/ceed_vector.py index 7c0420c802..9a31240a39 100644 --- a/python/ceed_vector.py +++ b/python/ceed_vector.py @@ -191,6 +191,52 @@ def get_array_read(self, memtype=MEM_HOST): # return read only Numba array return nbcuda.from_cuda_array_interface(desc) + # Get Vector's data array in write-only mode + def get_array_write(self, memtype=MEM_HOST): + """Get write-only access to a Vector via the specified memory type. + All old values should be considered invalid. + + Args: + **memtype: memory type of the array being passed, default CEED_MEM_HOST + + Returns: + *array: Numpy or Numba array""" + + # Retrieve the length of the array + length_pointer = ffi.new("CeedInt *") + err_code = lib.CeedVectorGetLength(self._pointer[0], length_pointer) + self._ceed._check_error(err_code) + + # Setup the pointer's pointer + array_pointer = ffi.new("CeedScalar **") + + # libCEED call + err_code = lib.CeedVectorGetArrayWrite( + self._pointer[0], memtype, array_pointer) + self._ceed._check_error(err_code) + + # Return array created from buffer + if memtype == MEM_HOST: + # Create buffer object from returned pointer + buff = ffi.buffer( + array_pointer[0], + ffi.sizeof("CeedScalar") * + length_pointer[0]) + # return Numpy array + return np.frombuffer(buff, dtype=scalar_types[lib.CEED_SCALAR_TYPE]) + else: + # CUDA array interface + # https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html + import numba.cuda as nbcuda + desc = { + 'shape': (length_pointer[0]), + 'typestr': '>f8', + 'data': (int(ffi.cast("intptr_t", array_pointer[0])), False), + 'version': 2 + } + # return Numba array + return nbcuda.from_cuda_array_interface(desc) + # Restore the Vector's data array def restore_array(self): """Restore an array obtained using get_array().""" @@ -264,6 +310,32 @@ def array_read(self, *shape, memtype=MEM_HOST): yield x self.restore_array_read() + @contextlib.contextmanager + def array_write(self, *shape, memtype=MEM_HOST): + """Context manager for write-only array access. + All old values should be considered invalid. + + Args: + shape (tuple): shape of returned numpy.array + **memtype: memory type of the array being passed, default CEED_MEM_HOST + + Returns: + np.array: write-only view of vector + + Examples: + Viewing contents of a reshaped libceed.Vector view: + + >>> vec = ceed.Vector(6) + >>> vec.set_value(1.3) + >>> with vec.array_read(2, 3) as x: + >>> print(x) + """ + x = self.get_array_write(memtype=memtype) + if shape: + x = x.reshape(shape) + yield x + self.restore_array() + # Get the length of a Vector def get_length(self): """Get the length of a Vector. diff --git a/python/tests/test-0-ceed.py b/python/tests/test-0-ceed.py index efd4078e18..ba8a9c79db 100644 --- a/python/tests/test-0-ceed.py +++ b/python/tests/test-0-ceed.py @@ -71,6 +71,7 @@ def test_005(ceed_resource): ceed = libceed.Ceed(ceed_resource) vec = ceed.Vector(5) + vec.set_value(0.0) array1 = vec.get_array() exception_raised = False diff --git a/python/tests/test-1-vector.py b/python/tests/test-1-vector.py index cc7e0464bc..60fc4c563a 100644 --- a/python/tests/test-1-vector.py +++ b/python/tests/test-1-vector.py @@ -88,6 +88,7 @@ def test_102(ceed_resource): n = 10 x = ceed.Vector(n) + x.set_value(0) # Two read accesses should not generate an error a = x.get_array_read() @@ -311,6 +312,26 @@ def test_123(ceed_resource, capsys): with x.array() as b: assert np.allclose(-.5 * a, b) +# ------------------------------------------------------------------------------- +# Test getArrayWrite to modify array +# ------------------------------------------------------------------------------- + + +def test_124(ceed_resource): + ceed = libceed.Ceed(ceed_resource) + + n = 10 + + x = ceed.Vector(n) + + with x.array_write() as a: + for i in range(len(a)): + a[i] = 3 * i + + with x.array_read() as a: + for i in range(len(a)): + assert a[i] == 3 * i + # ------------------------------------------------------------------------------- # Test modification of reshaped array # ------------------------------------------------------------------------------- @@ -321,6 +342,7 @@ def test_199(ceed_resource): ceed = libceed.Ceed(ceed_resource) vec = ceed.Vector(12) + vec.set_value(0.0) with vec.array(4, 3) as x: x[...] = np.eye(4, 3) diff --git a/python/tests/test-5-operator.py b/python/tests/test-5-operator.py index ab372c98e2..4ad1bbb2f3 100644 --- a/python/tests/test-5-operator.py +++ b/python/tests/test-5-operator.py @@ -300,6 +300,7 @@ def test_502(ceed_resource): op_setup.apply(x, qdata) # Apply mass matrix + u.set_value(0.0) with u.array() as u_array: for i in range(nu): u_array[2 * i] = 1. diff --git a/rust/libceed/src/operator.rs b/rust/libceed/src/operator.rs index 1b3d601bbb..7ddc43611f 100644 --- a/rust/libceed/src/operator.rs +++ b/rust/libceed/src/operator.rs @@ -1064,6 +1064,7 @@ impl<'a> Operator<'a> { /// /// // Manual diagonal computation /// let mut true_diag = ceed.vector(ndofs)?; + /// true_diag.set_value(0.0)?; /// for i in 0..ndofs { /// u.set_value(0.0); /// { @@ -1074,7 +1075,7 @@ impl<'a> Operator<'a> { /// op_mass.apply(&u, &mut v)?; /// /// { - /// let v_array = v.view_mut()?; + /// let v_array = v.view()?; /// let mut true_array = true_diag.view_mut()?; /// true_array[i] = v_array[i]; /// } @@ -1170,6 +1171,7 @@ impl<'a> Operator<'a> { /// /// // Manual diagonal computation /// let mut true_diag = ceed.vector(ndofs)?; + /// true_diag.set_value(0.0)?; /// for i in 0..ndofs { /// u.set_value(0.0); /// { @@ -1180,7 +1182,7 @@ impl<'a> Operator<'a> { /// op_mass.apply(&u, &mut v)?; /// /// { - /// let v_array = v.view_mut()?; + /// let v_array = v.view()?; /// let mut true_array = true_diag.view_mut()?; /// true_array[i] = v_array[i] + 1.0; /// } @@ -1301,6 +1303,7 @@ impl<'a> Operator<'a> { /// /// // Manual diagonal computation /// let mut true_diag = ceed.vector(ncomp * ncomp * ndofs)?; + /// true_diag.set_value(0.0)?; /// for i in 0..ndofs { /// for j in 0..ncomp { /// u.set_value(0.0); @@ -1312,7 +1315,7 @@ impl<'a> Operator<'a> { /// op_mass.apply(&u, &mut v)?; /// /// { - /// let v_array = v.view_mut()?; + /// let v_array = v.view()?; /// let mut true_array = true_diag.view_mut()?; /// for k in 0..ncomp { /// true_array[i * ncomp * ncomp + k * ncomp + j] = v_array[i + k * ndofs]; @@ -1439,6 +1442,7 @@ impl<'a> Operator<'a> { /// /// // Manual diagonal computation /// let mut true_diag = ceed.vector(ncomp * ncomp * ndofs)?; + /// true_diag.set_value(0.0)?; /// for i in 0..ndofs { /// for j in 0..ncomp { /// u.set_value(0.0); @@ -1450,7 +1454,7 @@ impl<'a> Operator<'a> { /// op_mass.apply(&u, &mut v)?; /// /// { - /// let v_array = v.view_mut()?; + /// let v_array = v.view()?; /// let mut true_array = true_diag.view_mut()?; /// for k in 0..ncomp { /// true_array[i * ncomp * ncomp + k * ncomp + j] = v_array[i + k * ndofs]; diff --git a/tests/junit.py b/tests/junit.py index 8e34ee0411..13899c8e7b 100755 --- a/tests/junit.py +++ b/tests/junit.py @@ -107,7 +107,7 @@ def run(test, backends): case.add_skipped_info('occa mode not supported {} {}'.format(test, ceed_resource)) elif 'Backend does not implement' in proc.stderr: case.add_skipped_info('not implemented {} {}'.format(test, ceed_resource)) - elif 'Can only provide to HOST memory' in proc.stderr: + elif 'Can only provide HOST memory for this backend' in proc.stderr: case.add_skipped_info('device memory not supported {} {}'.format(test, ceed_resource)) elif 'Test not implemented in single precision' in proc.stderr: case.add_skipped_info('not implemented {} {}'.format(test, ceed_resource)) diff --git a/tests/t102-vector-f.f90 b/tests/t102-vector-f.f90 index 785521b6f4..1cad0bf7f4 100644 --- a/tests/t102-vector-f.f90 +++ b/tests/t102-vector-f.f90 @@ -17,6 +17,7 @@ program test n=10 call ceedvectorcreate(ceed,n,x,err) + call ceedvectorsetvalue(x,0.0,err) call ceedvectorgetarrayread(x,ceed_mem_host,a,aoffset,err) call ceedvectorgetarrayread(x,ceed_mem_host,b,boffset,err) diff --git a/tests/t102-vector.c b/tests/t102-vector.c index ec093c93d1..3b31587923 100644 --- a/tests/t102-vector.c +++ b/tests/t102-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Two read accesses should not generate an error CeedVectorGetArrayRead(x, CEED_MEM_HOST, &a); diff --git a/tests/t109-vector-f.f90 b/tests/t109-vector-f.f90 index 30a0e4b252..529a01ad86 100644 --- a/tests/t109-vector-f.f90 +++ b/tests/t109-vector-f.f90 @@ -38,7 +38,7 @@ program test endif ! Getting array should not modify a - call ceedvectorgetarray(x,ceed_mem_host,b,boffset,err) + call ceedvectorgetarraywrite(x,ceed_mem_host,b,boffset,err) b(boffset+5) = -3.14 call ceedvectorrestorearray(x,b,boffset,err) diff=a(5)+3.14 diff --git a/tests/t109-vector.c b/tests/t109-vector.c index b06b547566..de812cb6d0 100644 --- a/tests/t109-vector.c +++ b/tests/t109-vector.c @@ -27,7 +27,7 @@ int main(int argc, char **argv) { // LCOV_EXCL_STOP // Getting array should not modify a - CeedVectorGetArray(x, CEED_MEM_HOST, &b); + CeedVectorGetArrayWrite(x, CEED_MEM_HOST, &b); b[5] = -3.14; CeedVectorRestoreArray(x, &b); @@ -37,7 +37,6 @@ int main(int argc, char **argv) { // LCOV_EXCL_STOP // Note: We do not need to free c because c == a was stack allocated. -// If libCEED allocated c, then free() would be required. CeedVectorDestroy(&x); CeedDestroy(&ceed); return 0; diff --git a/tests/t110-vector.c b/tests/t110-vector.c index 7f7a982afd..afa06f2744 100644 --- a/tests/t110-vector.c +++ b/tests/t110-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Two write accesses should generate an error CeedVectorGetArray(x, CEED_MEM_HOST, &a); diff --git a/tests/t111-vector.c b/tests/t111-vector.c index 092519c3e8..f92fe7a4c0 100644 --- a/tests/t111-vector.c +++ b/tests/t111-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Two write accesses should generate an error CeedVectorGetArray(x, CEED_MEM_HOST, &a); diff --git a/tests/t112-vector.c b/tests/t112-vector.c index a31840a99c..361fd3083a 100644 --- a/tests/t112-vector.c +++ b/tests/t112-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Write access followed by set value should generate an error CeedVectorGetArray(x, CEED_MEM_HOST, &a); diff --git a/tests/t113-vector.c b/tests/t113-vector.c index 6bf950db0b..399bc561c8 100644 --- a/tests/t113-vector.c +++ b/tests/t113-vector.c @@ -14,6 +14,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); CeedVectorGetArrayRead(x, CEED_MEM_HOST, &a); // Write access with read access generate an error diff --git a/tests/t115-vector.c b/tests/t115-vector.c index 767d945e23..61dd9dd620 100644 --- a/tests/t115-vector.c +++ b/tests/t115-vector.c @@ -14,6 +14,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Write access followed by read access should generate an error CeedVectorGetArray(x, CEED_MEM_HOST, &a); diff --git a/tests/t116-vector.c b/tests/t116-vector.c index edb270fd2d..8a7f39aa83 100644 --- a/tests/t116-vector.c +++ b/tests/t116-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); CeedVectorGetArray(x, CEED_MEM_HOST, &a); // Write access not restored should generate an error diff --git a/tests/t117-vector.c b/tests/t117-vector.c index 0db92b8d64..3fc7ca8939 100644 --- a/tests/t117-vector.c +++ b/tests/t117-vector.c @@ -12,6 +12,7 @@ int main(int argc, char **argv) { CeedInit(argv[1], &ceed); CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Should error because no GetArray was not called CeedVectorRestoreArray(x, &a); diff --git a/tests/t118-vector.c b/tests/t118-vector.c index 50b59f4ce9..275dca58ed 100644 --- a/tests/t118-vector.c +++ b/tests/t118-vector.c @@ -13,6 +13,7 @@ int main(int argc, char **argv) { n = 10; CeedVectorCreate(ceed, n, &x); + CeedVectorSetValue(x, 0.0); // Write access followed by sync array should generate an error CeedVectorGetArray(x, CEED_MEM_HOST, &a); diff --git a/tests/t122-vector.c b/tests/t122-vector.c index 28f8c9c07c..09b1b2283d 100644 --- a/tests/t122-vector.c +++ b/tests/t122-vector.c @@ -23,12 +23,6 @@ int main(int argc, char **argv) { CeedVectorSetArray(y, CEED_MEM_HOST, CEED_COPY_VALUES, a); // Test multiplying two vectors into third - { - // Sync memtype to device for GPU backends - CeedMemType type = CEED_MEM_HOST; - CeedGetPreferredMemType(ceed, &type); - CeedVectorSyncArray(w, type); - } CeedVectorPointwiseMult(w, x, y); CeedVectorGetArrayRead(w, CEED_MEM_HOST, &b); for (CeedInt i=0; i1.0D-15) then +! LCOV_EXCL_START + write(*,*) 'Error writing array a(',i,')=',a(i+aoffset) +! LCOV_EXCL_STOP + endif + enddo + call ceedvectorrestorearrayread(x,a,aoffset,err) + + call ceedvectordestroy(x,err) + call ceeddestroy(ceed,err) + + end +!----------------------------------------------------------------------- diff --git a/tests/t124-vector.c b/tests/t124-vector.c new file mode 100644 index 0000000000..25fdeb95ec --- /dev/null +++ b/tests/t124-vector.c @@ -0,0 +1,32 @@ +/// @file +/// Test CeedVectorGetArrayWrite to modify array +/// \test Test CeedVectorGetArrayWrite to modify array +#include + +int main(int argc, char **argv) { + Ceed ceed; + CeedVector x; + const CeedInt n = 10; + CeedScalar *a; + + CeedInit(argv[1], &ceed); + + CeedVectorCreate(ceed, n, &x); + + CeedVectorGetArrayWrite(x, CEED_MEM_HOST, &a); + for (CeedInt i = 0; i < n; i++) + a[i] = 3*i; + CeedVectorRestoreArray(x, &a); + + CeedVectorGetArrayRead(x, CEED_MEM_HOST, (const CeedScalar **)&a); + for (CeedInt i = 0; i < n; i++) + if (a[i] != (CeedScalar)(3*i)) + // LCOV_EXCL_START + printf("Error writing array a[%d] = %f", i, a[i]); + // LCOV_EXCL_STOP + CeedVectorRestoreArrayRead(x, (const CeedScalar **)&a); + + CeedVectorDestroy(&x); + CeedDestroy(&ceed); + return 0; +} diff --git a/tests/t325-basis.c b/tests/t325-basis.c index 6d8691b24b..9c8a07772e 100644 --- a/tests/t325-basis.c +++ b/tests/t325-basis.c @@ -30,7 +30,7 @@ int main(int argc, char **argv) { q_weight, &b); CeedVectorCreate(ceed, Q*dim*num_comp, &In); - CeedVectorGetArray(In, CEED_MEM_HOST, &in); + CeedVectorGetArrayWrite(In, CEED_MEM_HOST, &in); for (int d=0; d