Skip to content

Commit

Permalink
Replace explicit passing of work arrays with VLA.
Browse files Browse the repository at this point in the history
Start with plumbing for element mappings.  The first implementation will
just use nodal coordinates.

Also, replace some hairy indexing with VLA pointer types.  External
tests show that GCC produces better inner-loop code for

  double (*A)[N];      /* A[i][j] in inner loop */

than

  double *A;           /* A[i*N+j] in inner loop */

In fact, the assembly produced at -O3 with the VLA-pointer seems roughly
equivalent to hoisting

  double *restrict a = &A[i*N];        /* a[j] in inner loop */

out of the innermost loop.  GCC -Wcast-qual produces a warning for

  const double *a;
  const double (*b)[2] = (const double (*)[2])a;

claiming that `cast discards qualifiers from pointer target type' but it
is not clear to me that this is actually the case.

Signed-off-by: Jed Brown <jed@59A2.org>
  • Loading branch information
jedbrown committed Nov 16, 2008
1 parent cc88196 commit 5ea8123
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 248 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if (Dohp_USE_DEBUG)
endif (Dohp_USE_DEBUG)

if (PEDANTIC_WARNINGS)
set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Wlogical-op -Wsign-compare -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wmultichar -Wformat-nonliteral -Wformat-security -Wformat-y2k -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wmissing-field-initializers -Wvariadic-macros -Wunsafe-loop-optimizations -Wvolatile-register-var -Wvla -Wstrict-aliasing -funit-at-a-time -Wno-sign-conversion")
set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Wlogical-op -Wsign-compare -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wmultichar -Wformat-nonliteral -Wformat-security -Wformat-y2k -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wmissing-field-initializers -Wvariadic-macros -Wunsafe-loop-optimizations -Wvolatile-register-var -Wstrict-aliasing -funit-at-a-time -Wno-sign-conversion")
#set (DEFAULT_PEDANTIC_FLAGS "-Wunreachable-code -Wfloat-equal -Wc++-compat")
#set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Winline -Wshadow -Wconversion -Wlogical-op -Wmissing-prototypes -Wvla")
#set (DEFAULT_PEDANTIC_FLAGS "${DEFAULT_PEDANTIC_FLAGS} -Wno-sign-conversion -Wwrite-strings -Wstrict-aliasing -Wcast-align -fstrict-aliasing")
Expand Down
16 changes: 13 additions & 3 deletions include/dohpjacobi.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,23 @@ typedef struct s_dRule {
* Indicates whether or not to apply the transpose of a interpolation/derivative matrix.
*
*/
typedef enum { dTRANSPOSE_NO=113634,dTRANSPOSE_YES=853467 } dTransposeMode;
typedef enum {
dTRANSPOSE_NO=30001,
dTRANSPOSE_YES
} dTransposeMode;

/**
* Indicates what type of basis operation to do, see dEFSApply().
*
*/
typedef enum { dAPPLY_INTERP=40001,dAPPLY_INTERP_TRANSPOSE=40002,dAPPLY_GRAD=40003,dAPPLY_GRAD_TRANSPOSE=40004 } dApplyMode;
typedef enum {
dAPPLY_INTERP=40001,
dAPPLY_INTERP_TRANSPOSE,
dAPPLY_GRAD,
dAPPLY_GRAD_TRANSPOSE,
dAPPLY_SYMGRAD,
dAPPLY_SYMGRAD_TRANSPOSE
} dApplyMode;

/**
* Handle for setting up #dRule and #dEFS contexts.
Expand Down Expand Up @@ -147,7 +157,7 @@ EXTERN dErr dEFSView(dEFS efs,PetscViewer viewer);
EXTERN dErr dEFSGetSizes(dEFS efs,dInt*,dInt *inodes,dInt *total);
EXTERN dErr dEFSGetTensorNodes(dEFS,dInt*,dInt*,dReal**);
EXTERN dErr dEFSGetRule(dEFS efs,dRule *rule);
EXTERN dErr dEFSApply(dEFS efs,dInt dofs,dInt *wlen,dScalar **work,const dScalar *in,dScalar *out,dApplyMode amode,InsertMode imode);
EXTERN dErr dEFSApply(dEFS,const dReal[],dInt,const dScalar[],dScalar[restrict],dApplyMode,InsertMode);
EXTERN dErr dJacobiPropogateDown(dJacobi,const struct dMeshAdjacency*,dInt[]);
EXTERN dErr dJacobiGetNodeCount(dJacobi,dInt,const dEntTopology[],const dInt[],dInt[],dInt[]);

Expand Down
34 changes: 33 additions & 1 deletion include/dohptype.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ static inline dScalar dSqr(dScalar a) { return a * a; }
#define dNAME_LEN 256
#define dSTR_LEN 256

#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
/* defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) */
#if 1
# define dUNUSED __attribute__((unused))
# define dLIKELY(x) __builtin_expect(!!(x),1)
# define dUNLIKELY(x) __builtin_expect(!!(x),0)
Expand All @@ -152,6 +153,37 @@ static inline dScalar dSqr(dScalar a) { return a * a; }
# define dUNLIKELY(x) (x)
#endif

#define dCACHE_LINE 64l /* my cache lines are 64 bytes long */
#define dRPCL dCACHE_LINE/sizeof(dReal)
#define dSPCL dCACHE_LINE/sizeof(dScalar)

#define dDEFAULT_ALIGN 16l /* SSE instructions require 16 byte alignment */

#define dNextCacheAligned(p) dNextAlignedAddr(CACHE_LINE,p)
#define dNextAligned(p) dNextAlignedAddr(DEFAULT_ALIGN,p)

/** Returns the next address satisfying the given alignment.
*
* This function cannot fail.
*
* @param alignment must be a power of 2
* @param ptr The pointer
*
* @return aligned address
*/
static inline void *dNextAlignedAddr(size_t alignment,void *ptr)
{
const uintptr_t base = (uintptr_t)ptr;
const uintptr_t mask = (uintptr_t)alignment-1;
return (void*)((base + mask) & ~mask);
}

/* Needs to be a macro because of pointer arithmetic */
#define dMemClaim(mem,n,p) do { \
(p) = dNextAligned(mem); \
(mem) = dNextAligned((p) + (n)); \
} while (0)

#define dFunctionBegin \
{\
if (petscstack && (petscstack->currentsize < PETSCSTACKSIZE)) { \
Expand Down
2 changes: 1 addition & 1 deletion include/private/jacimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct _dEFSOps {
dErr (*view)(dEFS,PetscViewer);
dErr (*getSizes)(dEFS,dInt*,dInt*,dInt*); /**< topological dimension, number of interior nodes, total number of nodes */
dErr (*getTensorNodes)(dEFS,dInt*,dInt*,dReal**);
dErr (*apply)(dEFS,dInt,dInt*,dScalar**restrict,const dScalar[],dScalar[],dApplyMode,InsertMode);
dErr (*apply)(dEFS,const dReal[],dInt,const dScalar[],dScalar[],dApplyMode,InsertMode);
/**< dofs/node, work length, work, modal values, nodal values */
dErr (*scatterInt)(dEFS,dInt,dInt,const dScalar[],dScalar[],InsertMode,ScatterMode); /**< dofs/node, offset of interior dofs, array, local array */
/**
Expand Down

0 comments on commit 5ea8123

Please sign in to comment.