Skip to content

Commit

Permalink
Added CeedElemRestrictionIsOriented function
Browse files Browse the repository at this point in the history
  • Loading branch information
rezgarshakeri committed Feb 4, 2022
1 parent c774505 commit b435c5a
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 15 deletions.
52 changes: 37 additions & 15 deletions backends/ref/ceed-ref-restriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
v_offset = start*blk_size*elem_size*num_comp;

bool is_oriented;
ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr);
ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
if (t_mode == CEED_TRANSPOSE) {
// Sum into for transpose mode, e-vec to l-vec
Expand Down Expand Up @@ -85,14 +87,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
// Offsets provided, standard or blocked restriction
// vv has shape [elem_size, num_comp, num_elem], row-major
// uu has shape [nnodes, num_comp]
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
CeedPragmaSIMD
for (CeedInt k = 0; k < num_comp; k++)
if (!is_oriented) {

This comment has been minimized.

Copy link
@jeremylt

jeremylt Feb 4, 2022

Member

Oh, I would do this outside of this function so you don't need so much repetition. I'd imagine that the compiler would have a fast is oriented == false path and a slower oriented path.

This comment has been minimized.

Copy link
@rezgarshakeri

rezgarshakeri Feb 4, 2022

Author Collaborator

I was going to ask if this is a good place! where exactly?!

This comment has been minimized.

Copy link
@jeremylt

jeremylt Feb 4, 2022

Member

I was thinking of checking down around line 285?

This comment has been minimized.

Copy link
@jedbrown

jedbrown Feb 4, 2022

Member

@rezgarshakeri What optimization level were you at?

I hope we can do this without blowing up the variants. What if you use (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.)? The strict aliasing rules should be able to make these equivalent, but not at all optimization levels and it's not great to rely on strict aliasing analysis. I think with the bool hoisted out, the compiler should generate good code both ways.

for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
CeedPragmaSIMD
for (CeedInt k = 0; k < num_comp; k++)
CeedPragmaSIMD
for (CeedInt i = 0; i < elem_size*blk_size; i++)
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
= uu[impl->offsets[i+elem_size*e] + k*comp_stride];
} else {
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
CeedPragmaSIMD
for (CeedInt i = 0; i < elem_size*blk_size; i++)
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
= uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
(impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.);
for (CeedInt k = 0; k < num_comp; k++)
CeedPragmaSIMD
for (CeedInt i = 0; i < elem_size*blk_size; i++)
vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
= uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
(impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.);
}
}
} else {
// Restriction from E-vector to L-vector
Expand Down Expand Up @@ -132,14 +144,24 @@ static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
// Offsets provided, standard or blocked restriction
// uu has shape [elem_size, num_comp, num_elem]
// vv has shape [nnodes, num_comp]
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
for (CeedInt k = 0; k < num_comp; k++)
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
// Iteration bound set to discard padding elements
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
vv[impl->offsets[j+e*elem_size] + k*comp_stride]
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
(impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.);
if (!is_oriented) {
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
for (CeedInt k = 0; k < num_comp; k++)
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
// Iteration bound set to discard padding elements
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
vv[impl->offsets[j+e*elem_size] + k*comp_stride]
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset];
} else {
for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
for (CeedInt k = 0; k < num_comp; k++)
for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
// Iteration bound set to discard padding elements
for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
vv[impl->offsets[j+e*elem_size] + k*comp_stride]
+= uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
(impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.);
}
}
}
ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
Expand Down
1 change: 1 addition & 0 deletions include/ceed-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ struct CeedElemRestriction_private {
CeedInt *strides; /* strides between [nodes, components, elements] */
CeedInt layout[3]; /* E-vector layout [nodes, components, elements] */
uint64_t num_readers; /* number of instances of offset read only access */
bool is_oriented; /* flag for oriented restriction */
void *data; /* place for the backend to store any data */
};

Expand Down
2 changes: 2 additions & 0 deletions include/ceed/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ CEED_EXTERN int CeedElemRestrictionRestoreOffsets(CeedElemRestriction rstr,
const CeedInt **offsets);
CEED_EXTERN int CeedElemRestrictionIsStrided(CeedElemRestriction rstr,
bool *is_strided);
CEED_EXTERN int CeedElemRestrictionIsOriented(CeedElemRestriction rstr,
bool *is_oriented);
CEED_EXTERN int CeedElemRestrictionHasBackendStrides(CeedElemRestriction rstr,
bool *has_backend_strides);
CEED_EXTERN int CeedElemRestrictionGetELayout(CeedElemRestriction rstr,
Expand Down
20 changes: 20 additions & 0 deletions interface/ceed-elemrestriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,21 @@ int CeedElemRestrictionIsStrided(CeedElemRestriction rstr, bool *is_strided) {
return CEED_ERROR_SUCCESS;
}

/**
@brief Get oriented status of a CeedElemRestriction
@param rstr CeedElemRestriction
@param[out] is_oriented Variable to store oriented status, 1 if oriented else 0
@return An error code: 0 - success, otherwise - failure
@ref Backend
**/
int CeedElemRestrictionIsOriented(CeedElemRestriction rstr, bool *is_oriented) {
*is_oriented = rstr->is_oriented;
return CEED_ERROR_SUCCESS;
}

/**
@brief Get the backend stride status of a CeedElemRestriction
Expand Down Expand Up @@ -352,6 +367,7 @@ int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, CeedInt elem_size,
(*rstr)->l_size = l_size;
(*rstr)->num_blk = num_elem;
(*rstr)->blk_size = 1;
(*rstr)->is_oriented = 0;
ierr = ceed->ElemRestrictionCreate(mem_type, copy_mode, offsets, *rstr);
CeedChk(ierr);
return CEED_ERROR_SUCCESS;
Expand Down Expand Up @@ -423,6 +439,7 @@ int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem,
(*rstr)->l_size = l_size;
(*rstr)->num_blk = num_elem;
(*rstr)->blk_size = 1;
(*rstr)->is_oriented = 1;
ierr = ceed->ElemRestrictionCreateOriented(mem_type, copy_mode,
offsets, orient, *rstr); CeedChk(ierr);
return CEED_ERROR_SUCCESS;
Expand Down Expand Up @@ -485,6 +502,7 @@ int CeedElemRestrictionCreateStrided(Ceed ceed, CeedInt num_elem,
(*rstr)->l_size = l_size;
(*rstr)->num_blk = num_elem;
(*rstr)->blk_size = 1;
(*rstr)->is_oriented = 0;
ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
for (int i=0; i<3; i++)
(*rstr)->strides[i] = strides[i];
Expand Down Expand Up @@ -571,6 +589,7 @@ int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem,
(*rstr)->l_size = l_size;
(*rstr)->num_blk = num_blk;
(*rstr)->blk_size = blk_size;
(*rstr)->is_oriented = 0;
ierr = ceed->ElemRestrictionCreateBlocked(CEED_MEM_HOST, CEED_OWN_POINTER,
(const CeedInt *) blk_offsets, *rstr); CeedChk(ierr);
if (copy_mode == CEED_OWN_POINTER) {
Expand Down Expand Up @@ -636,6 +655,7 @@ int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, CeedInt num_elem,
(*rstr)->l_size = l_size;
(*rstr)->num_blk = num_blk;
(*rstr)->blk_size = blk_size;
(*rstr)->is_oriented = 0;
ierr = CeedMalloc(3, &(*rstr)->strides); CeedChk(ierr);
for (int i=0; i<3; i++)
(*rstr)->strides[i] = strides[i];
Expand Down

0 comments on commit b435c5a

Please sign in to comment.