-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,8 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, | |
ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); | ||
v_offset = start*blk_size*elem_size*num_comp; | ||
|
||
bool is_oriented; | ||
ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr); | ||
ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); | ||
if (t_mode == CEED_TRANSPOSE) { | ||
// Sum into for transpose mode, e-vec to l-vec | ||
|
@@ -85,14 +87,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, | |
// Offsets provided, standard or blocked restriction | ||
// vv has shape [elem_size, num_comp, num_elem], row-major | ||
// uu has shape [nnodes, num_comp] | ||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
CeedPragmaSIMD | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
if (!is_oriented) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
rezgarshakeri
Author
Collaborator
|
||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
CeedPragmaSIMD | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
CeedPragmaSIMD | ||
for (CeedInt i = 0; i < elem_size*blk_size; i++) | ||
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] | ||
= uu[impl->offsets[i+elem_size*e] + k*comp_stride]; | ||
} else { | ||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
CeedPragmaSIMD | ||
for (CeedInt i = 0; i < elem_size*blk_size; i++) | ||
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] | ||
= uu[impl->offsets[i+elem_size*e] + k*comp_stride] * | ||
(impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
CeedPragmaSIMD | ||
for (CeedInt i = 0; i < elem_size*blk_size; i++) | ||
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] | ||
= uu[impl->offsets[i+elem_size*e] + k*comp_stride] * | ||
(impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); | ||
} | ||
} | ||
} else { | ||
// Restriction from E-vector to L-vector | ||
|
@@ -132,14 +144,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, | |
// Offsets provided, standard or blocked restriction | ||
// uu has shape [elem_size, num_comp, num_elem] | ||
// vv has shape [nnodes, num_comp] | ||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) | ||
// Iteration bound set to discard padding elements | ||
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) | ||
vv[impl->offsets[j+e*elem_size] + k*comp_stride] | ||
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * | ||
(impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); | ||
if (!is_oriented) { | ||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) | ||
// Iteration bound set to discard padding elements | ||
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) | ||
vv[impl->offsets[j+e*elem_size] + k*comp_stride] | ||
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; | ||
} else { | ||
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) | ||
for (CeedInt k = 0; k < num_comp; k++) | ||
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) | ||
// Iteration bound set to discard padding elements | ||
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) | ||
vv[impl->offsets[j+e*elem_size] + k*comp_stride] | ||
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * | ||
(impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); | ||
} | ||
} | ||
} | ||
ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); | ||
|
Oh, I would do this outside of this function so you don't need so much repetition. I'd imagine that the compiler would have a fast
is oriented == false
path and a slower oriented path.