Skip to content

Commit

Permalink
Use VLA-pointers in TensorMult_Hex: slightly faster, more readable.
Browse files Browse the repository at this point in the history
Log FLOPS in TensorMult_Hex, need to register an event for this.

Lower-dimensional entities now set more outgoing stuff, should slightly
simplify dimension-independent programming.

Signed-off-by: Jed Brown <jed@59A2.org>
  • Loading branch information
jedbrown committed Nov 17, 2008
1 parent 5ea8123 commit 373f802
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 36 deletions.
6 changes: 3 additions & 3 deletions src/fs/interface/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,9 @@ dErr dFSMatSetValuesExpanded(dFS fs,Mat A,dInt m,const dInt idxm[],dInt n,const
dFunctionBegin;
dValidHeader(fs,dFS_COOKIE,1);
dValidHeader(A,MAT_COOKIE,2);
dValidPointer(idxm,4);dCHK(err);
dValidPointer(idxn,6);dCHK(err);
dValidPointer(v,7);dCHK(err);
dValidPointer(idxm,4);
dValidPointer(idxn,6);
dValidPointer(v,7);
C = (fs->assemblefull) ? fs->C : fs->Cp;
err = MatGetRowIJ(C,0,dFALSE,dFALSE,&cn,&ci,&cj,&done);dCHK(err);
if (!done) dERROR(1,"Could not get indices");
Expand Down
2 changes: 0 additions & 2 deletions src/fs/tests/ex1.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ static dErr useFS(dFS fs)
{
Vec x,y,g;
dScalar *xx;
dReal norm;
dInt nx=-1;
dErr err;

Expand Down Expand Up @@ -178,7 +177,6 @@ static dErr useFS(dFS fs)
* the expanded sum is larger than the global sum by 32. */
if (dAbs(ysum-gsum-32) > 1e-14) dERROR(1,"Unexpected expanded sum %f != %f + 32",ysum,gsum);
}
if (norm != 0) dERROR(1,"Norm does not agree.");
err = VecDestroy(x);dCHK(err);
err = VecDestroy(y);dCHK(err);
err = VecDestroy(g);dCHK(err);
Expand Down
65 changes: 49 additions & 16 deletions src/jacobi/impls/tensor/efstopo.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,16 @@ static dErr dEFSGetTensorNodes_Tensor_Line(dEFS efs,dInt *dim,dInt tsize[restric

dFunctionBegin;
if (dim) *dim = 1;
if (tsize) tsize[0] = b[0]->P;
x[0] = b[0]->node;
if (tsize) {
tsize[0] = b[0]->P;
tsize[1] = 1;
tsize[2] = 1;
}
if (x) {
x[0] = b[0]->node;
x[1] = NULL;
x[2] = NULL;
}
dFunctionReturn(0);
}

Expand All @@ -203,9 +211,15 @@ static dErr dEFSGetTensorNodes_Tensor_Quad(dEFS efs,dInt *dim,dInt tsize[restric

dFunctionBegin;
if (dim) *dim = 2;
for (dInt i=0; i<2; i++) {
if (tsize) tsize[i] = b[i]->P;
if (x) x[i] = b[i]->node;
if (tsize) {
tsize[0] = b[0]->P;
tsize[1] = b[1]->P;
tsize[2] = 1;
}
if (x) {
x[0] = b[0]->node;
x[1] = b[1]->node;
x[2] = NULL;
}
dFunctionReturn(0);
}
Expand All @@ -216,9 +230,15 @@ static dErr dEFSGetTensorNodes_Tensor_Hex(dEFS efs,dInt *dim,dInt tsize[restrict

dFunctionBegin;
if (dim) *dim = 3;
for (dInt i=0; i<3; i++) {
if (tsize) tsize[i] = b[i]->P;
if (x) x[i] = b[i]->node;
if (tsize) {
tsize[0] = b[0]->P;
tsize[1] = b[1]->P;
tsize[2] = b[2]->P;
}
if (x) {
x[0] = b[0]->node;
x[1] = b[1]->node;
x[2] = b[2]->node;
}
dFunctionReturn(0);
}
Expand Down Expand Up @@ -417,18 +437,27 @@ static dErr TensorMult_Quad(dInt D,const dInt P[2],const dInt Q[2],dReal *A[2],c
*
* @return err
*/
static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],const dScalar f[],dScalar g[restrict],InsertMode imode)
static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],const dScalar in[],dScalar out[restrict],InsertMode imode)
{
dScalar a[Q[0]*P[1]*P[2]*D],b[Q[0]*Q[1]*P[2]*D];
dScalar amem[Q[0]*P[1]*P[2]*D],bmem[Q[0]*Q[1]*P[2]*D];
dScalar (*restrict a)[P[1]][P[2]][D] = (dScalar (*restrict)[P[1]][P[2]][D])amem;
dScalar (*restrict b)[Q[1]][P[2]][D] = (dScalar (*restrict)[Q[1]][P[2]][D])bmem;
dScalar (*restrict g)[Q[1]][Q[2]][D] = (dScalar (*restrict)[Q[1]][Q[2]][D])out;
const dScalar (*restrict f)[P[1]][P[2]][D] = (const dScalar (*)[P[1]][P[2]][D])in;
const dReal (*Ax)[P[0]] = (const dReal (*)[P[0]])A[0];
const dReal (*Ay)[P[1]] = (const dReal (*)[P[1]])A[1];
const dReal (*Az)[P[2]] = (const dReal (*)[P[2]])A[2];
dInt i,j,k,l,d;
dErr err;

dFunctionBegin;
err = dMemzero(a,D*Q[0]*P[1]*P[2]*sizeof(a[0]));dCHK(err);
err = dMemzero(b,D*Q[0]*Q[1]*P[2]*sizeof(b[0]));dCHK(err);
if (D*Q[0]*P[1]*P[2]*sizeof(amem[0]) != Q[0]*sizeof(a[0])
|| D*Q[0]*Q[1]*Q[2]*sizeof(out[0]) != Q[0]*sizeof(g[0])) dERROR(1,"sizeof not working as expected");
err = dMemzero(a,Q[0]*sizeof(a[0]));dCHK(err);
err = dMemzero(b,Q[0]*sizeof(b[0]));dCHK(err);
switch (imode) {
case INSERT_VALUES:
err = dMemzero(g,D*Q[0]*Q[1]*Q[2]*sizeof(g[0]));dCHK(err);
err = dMemzero(g,Q[0]*sizeof(g[0]));dCHK(err);
break;
case ADD_VALUES:
break;
Expand All @@ -441,7 +470,8 @@ static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],co
for (j=0; j<P[1]; j++) {
for (k=0; k<P[2]; k++) {
for (d=0; d<D; d++) {
a[((l*P[1]+j)*P[2]+k)*D+d] += A[0][l*P[0]+i] * f[((i*P[1]+j)*P[2]+k)*D+d];
//amem[((l*P[1]+j)*P[2]+k)*D+d] += A[0][l*P[0]+i] * in[((i*P[1]+j)*P[2]+k)*D+d];
a[l][j][k][d] += Ax[l][i] * f[i][j][k][d];
}
}
}
Expand All @@ -452,7 +482,8 @@ static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],co
for (j=0; j<P[1]; j++) {
for (k=0; k<P[2]; k++) {
for (d=0; d<D; d++) {
b[((i*Q[1]+l)*P[2]+k)*D+d] += A[1][l*P[1]+j] * a[((i*P[1]+j)*P[2]+k)*D+d];
//bmem[((i*Q[1]+l)*P[2]+k)*D+d] += A[1][l*P[1]+j] * amem[((i*P[1]+j)*P[2]+k)*D+d];
b[i][l][k][d] += Ay[l][j] * a[i][j][k][d];
}
}
}
Expand All @@ -463,12 +494,14 @@ static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],co
for (l=0; l<Q[2]; l++) {
for (k=0; k<P[2]; k++) {
for (d=0; d<D; d++) {
g[((i*Q[1]+j)*Q[2]+l)*D+d] += A[2][l*P[2]+k] * b[((i*Q[1]+j)*P[2]+k)*D+d];
//out[((i*Q[1]+j)*Q[2]+l)*D+d] += A[2][l*P[2]+k] * bmem[((i*Q[1]+j)*P[2]+k)*D+d];
g[i][j][l][d] += Az[l][k] * b[i][j][k][d];
}
}
}
}
}
PetscLogFlops((Q[0]*P[0]*P[1]*P[2] + Q[0]*Q[1]*P[1]*P[2] + Q[0]*Q[1]*Q[2]*P[2])*D*2);
dFunctionReturn(0);
}

Expand Down
21 changes: 18 additions & 3 deletions src/jacobi/impls/tensor/ruletopo.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,21 @@ static dErr dRuleGetTensorNodeWeight_Tensor_Line(dRule rule,dInt *dim,dInt nnode
TensorRule *r = ((dRule_Tensor*)rule)->trule;
dFunctionBegin;
if (dim) *dim = 1;
if (nnodes) nnodes[0] = r[0]->size;
if (coord) coord[0] = r[0]->coord;
if (weight) weight[0] = r[0]->weight;
if (nnodes) {
nnodes[0] = r[0]->size;
nnodes[1] = 0;
nnodes[2] = 0;
}
if (coord) {
coord[0] = r[0]->coord;
coord[1] = NULL;
coord[2] = NULL;
}
if (weight) {
weight[0] = r[0]->weight;
weight[1] = NULL;
weight[2] = NULL;
}
dFunctionReturn(0);
}

Expand All @@ -156,14 +168,17 @@ static dErr dRuleGetTensorNodeWeight_Tensor_Quad(dRule rule,dInt *dim,dInt nnode
if (nnodes) {
nnodes[0] = r[0]->size;
nnodes[1] = r[1]->size;
nnodes[2] = 0;
}
if (coord) {
coord[0] = r[0]->coord;
coord[1] = r[1]->coord;
coord[2] = NULL;
}
if (weight) {
weight[0] = r[0]->weight;
weight[1] = r[1]->weight;
weight[2] = NULL;
}
dFunctionReturn(0);
}
Expand Down
2 changes: 1 addition & 1 deletion src/jacobi/impls/tensor/tensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ static dErr dJacobiAddConstraints_Tensor(dJacobi dUNUSED jac,dInt nx,const dInt
for (j=start[incd[0]]; j!=end[0]; j+=inc[0]) {
for (k=start[incd[1]]; k!=end[1]; k+=inc[1]) {
const dInt faceDim[2] = {d[incd[0]],d[incd[1]]};
dInt facejk[2],faceIndex;
dInt facejk[2],faceIndex = -1;
nrow = ncol = 0;
irow[nrow++] = (xs[elem] + (start[0]*d1+start[1])*d2+start[2]
+ (j-start[incd[0]])*scan[incd[0]] + (k-start[incd[1]])*scan[incd[1]]);
Expand Down
18 changes: 7 additions & 11 deletions src/jacobi/tests/ex1.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,6 @@ static dErr checkInterp(dInt N,s_dEFS efs[],Vec u)
ind = 0;
for (dInt i=0; i<N; i++) {
err = dEFSGetTensorNodes(&efs[i],&dim,P,x);dCHK(err);
switch (dim) {
case 1: P[1] = 1; P[2] = 1; break;
case 2: P[2] = 1; break;
case 3: break;
default: dERROR(1,"dim %d out of range",dim);
}
for (dInt j=0; j<P[0]; j++) {
for (dInt k=0; k<P[1]; k++) {
for (dInt l=0; l<P[2]; l++) {
Expand Down Expand Up @@ -170,10 +164,12 @@ static dErr checkInterp(dInt N,s_dEFS efs[],Vec u)
}

err = dMemzero(g,gsize*sizeof g[0]);dCHK(err);
for (dInt j=0; j<gsize; j++) { g[j] = NAN; }
for (dInt j=0; j<gsize*dim; j++) { ((dScalar*)dg)[j] = NAN; }
err = dEFSApply(&efs[i],NULL,dim,f+ind,g,dAPPLY_INTERP,INSERT_VALUES);dCHK(err);
err = dEFSApply(&efs[i],NULL,dim,f+ind,&dg[0][0],dAPPLY_GRAD,INSERT_VALUES);dCHK(err);
for (dInt count=0; count<10; count++) {
for (dInt j=0; j<gsize; j++) { g[j] = NAN; }
for (dInt j=0; j<gsize*dim; j++) { ((dScalar*)dg)[j] = NAN; }
err = dEFSApply(&efs[i],NULL,dim,f+ind,g,dAPPLY_INTERP,INSERT_VALUES);dCHK(err);
err = dEFSApply(&efs[i],NULL,dim,f+ind,&dg[0][0],dAPPLY_GRAD,INSERT_VALUES);dCHK(err);
}

/* compare to exact solution */
switch (dim) {
Expand Down Expand Up @@ -246,7 +242,7 @@ static dErr checkRulesAndEFS(dJacobi jac)
const dInt dim = 3;
PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
MPI_Comm comm = ((PetscObject)jac)->comm;
Vec u;
Vec u = NULL;
dInt N,minrdeg,maxrdeg,minbdeg,maxbdeg,*rdeg,*bdeg;
dEntTopology *topo;
dTruth showrules,showefs;
Expand Down

0 comments on commit 373f802

Please sign in to comment.