From b435c5a60b1833cef613d6281360673b9d9fef16 Mon Sep 17 00:00:00 2001 From: rezgarshakeri Date: Thu, 3 Feb 2022 19:53:08 -0700 Subject: [PATCH] Added CeedElemRestrictionIsOriented function --- backends/ref/ceed-ref-restriction.c | 52 ++++++++++++++++++++--------- include/ceed-impl.h | 1 + include/ceed/backend.h | 2 ++ interface/ceed-elemrestriction.c | 20 +++++++++++ 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/backends/ref/ceed-ref-restriction.c b/backends/ref/ceed-ref-restriction.c index 23e82e2a15..e9e70645ab 100644 --- a/backends/ref/ceed-ref-restriction.c +++ b/backends/ref/ceed-ref-restriction.c @@ -37,6 +37,8 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); v_offset = start*blk_size*elem_size*num_comp; + bool is_oriented; + ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr); ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); if (t_mode == CEED_TRANSPOSE) { // Sum into for transpose mode, e-vec to l-vec @@ -85,14 +87,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, // Offsets provided, standard or blocked restriction // vv has shape [elem_size, num_comp, num_elem], row-major // uu has shape [nnodes, num_comp] - for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) - CeedPragmaSIMD - for (CeedInt k = 0; k < num_comp; k++) + if (!is_oriented) { + for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) + CeedPragmaSIMD + for (CeedInt k = 0; k < num_comp; k++) + CeedPragmaSIMD + for (CeedInt i = 0; i < elem_size*blk_size; i++) + vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] + = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; + } else { + for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) CeedPragmaSIMD - for (CeedInt i = 0; i < elem_size*blk_size; i++) - vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] - = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * - (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); + for (CeedInt k = 0; k < num_comp; k++) + CeedPragmaSIMD + for (CeedInt i = 0; i < elem_size*blk_size; i++) + vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] + = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * + (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); + } } } else { // Restriction from E-vector to L-vector @@ -132,14 +144,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, // Offsets provided, standard or blocked restriction // uu has shape [elem_size, num_comp, num_elem] // vv has shape [nnodes, num_comp] - for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) - for (CeedInt k = 0; k < num_comp; k++) - for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) - // Iteration bound set to discard padding elements - for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) - vv[impl->offsets[j+e*elem_size] + k*comp_stride] - += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * - (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); + if (!is_oriented) { + for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) + for (CeedInt k = 0; k < num_comp; k++) + for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) + // Iteration bound set to discard padding elements + for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) + vv[impl->offsets[j+e*elem_size] + k*comp_stride] + += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; + } else { + for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) + for (CeedInt k = 0; k < num_comp; k++) + for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) + // Iteration bound set to discard padding elements + for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) + vv[impl->offsets[j+e*elem_size] + k*comp_stride] + += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * + (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); + } } } ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); diff --git a/include/ceed-impl.h b/include/ceed-impl.h index 2b8ccefba0..510e523ec8 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -180,6 +180,7 @@ struct CeedElemRestriction_private { CeedInt *strides; /* strides between [nodes, components, elements] */ CeedInt layout[3]; /* E-vector layout [nodes, components, elements] */ uint64_t num_readers; /* number of instances of offset read only access */ + bool is_oriented; /* flag for oriented restriction */ void *data; /* place for the backend to store any data */ }; diff --git a/include/ceed/backend.h b/include/ceed/backend.h index 73f5db242d..78f15ae9c4 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -148,6 +148,8 @@ CEED_EXTERN int CeedElemRestrictionRestoreOffsets(CeedElemRestriction rstr, const CeedInt **offsets); CEED_EXTERN int CeedElemRestrictionIsStrided(CeedElemRestriction rstr, bool *is_strided); +CEED_EXTERN int CeedElemRestrictionIsOriented(CeedElemRestriction rstr, + bool *is_oriented); CEED_EXTERN int CeedElemRestrictionHasBackendStrides(CeedElemRestriction rstr, bool *has_backend_strides); CEED_EXTERN int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c index 65d5a8431d..a18221faf4 100644 --- a/interface/ceed-elemrestriction.c +++ b/interface/ceed-elemrestriction.c @@ -152,6 +152,21 @@ int CeedElemRestrictionIsStrided(CeedElemRestriction rstr, bool *is_strided) { return CEED_ERROR_SUCCESS; } +/** + @brief Get oriented status of a CeedElemRestriction + + @param rstr CeedElemRestriction + @param[out] is_oriented Variable to store oriented status, 1 if oriented else 0 + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedElemRestrictionIsOriented(CeedElemRestriction rstr, bool *is_oriented) { + *is_oriented = rstr->is_oriented; + return CEED_ERROR_SUCCESS; +} + /** @brief Get the backend stride status of a CeedElemRestriction @@ -352,6 +367,7 @@ int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, CeedInt elem_size, (*rstr)->l_size = l_size; (*rstr)->num_blk = num_elem; (*rstr)->blk_size = 1; + (*rstr)->is_oriented = 0; ierr = ceed->ElemRestrictionCreate(mem_type, copy_mode, offsets, *rstr); CeedChk(ierr); return CEED_ERROR_SUCCESS; @@ -423,6 +439,7 @@ int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem, (*rstr)->l_size = l_size; (*rstr)->num_blk = num_elem; (*rstr)->blk_size = 1; + (*rstr)->is_oriented = 1; ierr = ceed->ElemRestrictionCreateOriented(mem_type, copy_mode, offsets, orient, *rstr); CeedChk(ierr); return CEED_ERROR_SUCCESS; @@ -485,6 +502,7 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem, (*rstr)->l_size = l_size; (*rstr)->num_blk = num_elem; (*rstr)->blk_size = 1; + (*rstr)->is_oriented = 0; ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr); for (int i=0; i<3; i++) (*rstr)->strides[i] = strides[i]; @@ -571,6 +589,7 @@ int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem, (*rstr)->l_size = l_size; (*rstr)->num_blk = num_blk; (*rstr)->blk_size = blk_size; + (*rstr)->is_oriented = 0; ierr = ceed->ElemRestrictionCreateBlocked(CEED_MEM_HOST, CEED_OWN_POINTER, (const CeedInt *) blk_offsets, *rstr); CeedChk(ierr); if (copy_mode == CEED_OWN_POINTER) { @@ -636,6 +655,7 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem, (*rstr)->l_size = l_size; (*rstr)->num_blk = num_blk; (*rstr)->blk_size = blk_size; + (*rstr)->is_oriented = 0; ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr); for (int i=0; i<3; i++) (*rstr)->strides[i] = strides[i];