Partial implementation of geometric maps.

* We still need plumbing for the geometric maps. The map can either be computed in advance or every time we visit elements. With Q1 element maps, 24=8*3 doubles must be stored. To store it instead, we need 10q (9 values for inverse Jacobian, 1 for determinant) where q is the number of quadrature points. For linear elements, q=8 is natural. For a scalar problem (worst case), 8 new (in cache) values are used so an extra 80 for the stored element map seems disproportionate. This motivates recomputing the map every element, a cost of around 234q. It's not clear yet whether this reduced stress on memory actually pays off. Of course affine elements are a big win, but they are pretty rare in a fully unstructured hexahedral mesh. * Remove -Wcast-qual from pedantic warnings. The problem is that there is no way to specify a const array type, for instance when casting const double a[]; const double (*b)[3] = (const double(*)[3])a; which complains since the const means that the elements are const. There is no way to indicate that the (double[3]) objects are const, only that the elements are const. GCC is technically correct to complain about this cast, but I believe that the qualifier (const) should be promoted from the elements to the object (double[3]) so that the warning would not be thrown in this case. Signed-off-by: Jed Brown <jed@59A2.org>
jedbrown · Nov 17, 2008 · 4411834 · 4411834
1 parent 373f802
commit 4411834
Show file tree

Hide file tree

Showing 5 changed files with 173 additions and 28 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -31,7 +31,7 @@ if (Dohp_USE_DEBUG)
 endif (Dohp_USE_DEBUG)
 
 if (PEDANTIC_WARNINGS)
-  set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Wlogical-op -Wsign-compare -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wmultichar -Wformat-nonliteral -Wformat-security -Wformat-y2k -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wmissing-field-initializers -Wvariadic-macros -Wunsafe-loop-optimizations -Wvolatile-register-var -Wstrict-aliasing -funit-at-a-time -Wno-sign-conversion")
+  set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-align -Wwrite-strings -Wconversion -Wlogical-op -Wsign-compare -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wmultichar -Wformat-nonliteral -Wformat-security -Wformat-y2k -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wmissing-field-initializers -Wvariadic-macros -Wunsafe-loop-optimizations -Wvolatile-register-var -Wstrict-aliasing -funit-at-a-time -Wno-sign-conversion")
   #set (DEFAULT_PEDANTIC_FLAGS "-Wunreachable-code -Wfloat-equal -Wc++-compat")
   #set (DEFAULT_PEDANTIC_FLAGS "-pedantic -Wall -Wextra -Winline -Wshadow -Wconversion -Wlogical-op -Wmissing-prototypes -Wvla")
   #set (DEFAULT_PEDANTIC_FLAGS "${DEFAULT_PEDANTIC_FLAGS} -Wno-sign-conversion -Wwrite-strings -Wstrict-aliasing -Wcast-align -fstrict-aliasing")

diff --git a/include/dohpgeom.h b/include/dohpgeom.h
@@ -45,12 +45,34 @@ INLINE void dGeomQuadFaceNormal(const dReal a[],dReal b[]) {
 INLINE dBool dGeomQuadParallel(const dReal a[],const dReal b[]) /* return true if Quad a \dot b > 0 */
 {dReal f[3]; dGeomQuadFaceNormal(a,f); return (dGeomDotProd(b,f) > 0);}
 
-INLINE void dGeomConvexComb_2_4(dReal x,dReal y,const dReal v[],const dInt p[],dReal f[])
+INLINE dErr dGeomConvexComb_2_4(dReal x,dReal y,const dReal (*v)[3],const dInt p[],dReal f[])
 {
   dInt i;
+  if (!(-1 <= x && x <= 1 && -1 <= y && y <= 1)) dERROR(1,"Point out of bounds");
   for (i=0; i<3; i++) {
-    f[i] = 0.25*((-1-x)*((-1-y)*v[p[0]*3+i] + (1+y)*v[p[3]*3+i]) + (1+x)*((-1-y)*v[p[1]*3+i] + (1+y)*v[p[2]*3+i]));
+    f[i] = 0.25*((1-x)*((1-y)*v[p[0]][i] + (1+y)*v[p[3]][i]) + (1+x)*((1-y)*v[p[1]][i] + (1+y)*v[p[2]][i]));
   }
+  return 0;
+}
+
+INLINE dErr dGeomInvert3(const dReal a[restrict static 9],dReal b[restrict static 9],dReal det[restrict static 1])
+{
+  const dReal b0 =  (a[1*3+1]*a[2*3+2] - a[2*3+1]*a[1*3+2]);
+  const dReal b3 = -(a[1*3+0]*a[2*3+2] - a[2*3+0]*a[1*3+2]);
+  const dReal b6 =  (a[1*3+0]*a[2*3+1] - a[2*3+0]*a[1*3+1]);
+  const dReal ldet = a[0]*b0 + a[1]*b3 + a[2]*b6;
+  const dReal idet = 1.0 / ldet;
+  b[0] =  idet*b0;
+  b[1] = -idet*(a[0*3+1]*a[2*3+2] - a[2*3+1]*a[0*3+2]);
+  b[2] =  idet*(a[0*3+1]*a[1*3+2] - a[1*3+1]*a[0*3+2]);
+  b[3] =  idet*b3;
+  b[4] =  idet*(a[0*3+0]*a[2*3+2] - a[2*3+0]*a[0*3+2]);
+  b[5] = -idet*(a[0*3+0]*a[1*3+2] - a[1*3+0]*a[0*3+2]);
+  b[6] =  idet*b6;
+  b[7] = -idet*(a[0*3+0]*a[2*3+1] - a[2*3+0]*a[0*3+1]);
+  b[8] =  idet*(a[0*3+0]*a[1*3+1] - a[1*3+0]*a[0*3+1]);
+  det[0] =  ldet;
+  return 0;
 }
 
 /**

diff --git a/include/private/fsimpl.h b/include/private/fsimpl.h
@@ -43,8 +43,8 @@ struct _p_dFS {
   dInt                rstart;   /**< global offset of first owned dof */
   dInt                m;        /**< Number of expanded dofs */
   dInt                D;        /**< Number of dofs per (non-boundary) node */
-  dRule              *rule;     /**< Integration rule */
-  dEFS               *efs;      /**< Element function space, defined for all entities */
+  s_dRule            *rule;     /**< Integration rule */
+  s_dEFS             *efs;      /**< Element function space, defined for all entities */
   Mat                 C;        /**< full-order constraint matrix (element dofs to local numbering) */
   Mat                 Cp;       /**< preconditioning constraint matrix (element dofs to local numbering, as sparse as possible) */
   Vec                 weight;   /**< Vector in global space, used to compensate for overcounting after local to global */

diff --git a/src/jacobi/impls/tensor/efstopo.c b/src/jacobi/impls/tensor/efstopo.c
@@ -21,7 +21,7 @@ _F(dEFSGetTensorNodes_Tensor_Line);
 _F(dEFSGetTensorNodes_Tensor_Quad);
 _F(dEFSGetTensorNodes_Tensor_Hex);
 #undef _F
-#define _F(f) static dErr f(dEFS,const dReal[],dInt,const dScalar[],dScalar[restrict],dApplyMode,InsertMode) /* dEFSApply */
+#define _F(f) static dErr f(dEFS,const dReal[restrict],dInt,const dScalar[restrict],dScalar[restrict],dApplyMode,InsertMode) /* dEFSApply */
 _F(dEFSApply_Tensor_Line);
 _F(dEFSApply_Tensor_Quad);
 _F(dEFSApply_Tensor_Hex);
@@ -50,6 +50,9 @@ _F(dRuleMappingApply_Tensor_Quad);
 _F(dRuleMappingApply_Tensor_Hex);
 #undef _F
 
+static dErr TensorRuleMapping(dInt Q,const dReal jinv_flat[restrict],dInt D,const dScalar in[restrict],dScalar out[restrict],InsertMode imode);
+static dErr TensorRuleMappingTranspose(dInt Q,const dReal jinv_flat[restrict],dInt D,const dScalar in[restrict],dScalar out[restrict],InsertMode imode);
+
 /**
 * Set up the EFS ops table for each topology.  This is the only exported function in this file.
 *
@@ -317,7 +320,7 @@ static dErr dEFSApply_Tensor_Quad(dEFS efs,const dReal mapdata[],dInt D,const dS
   dFunctionReturn(0);
 }
 
-static dErr dEFSApply_Tensor_Hex(dEFS efs,const dReal mapdata[],dInt D,const dScalar in[],dScalar out[],dApplyMode amode,InsertMode imode)
+static dErr dEFSApply_Tensor_Hex(dEFS efs,const dReal jinv[restrict],dInt D,const dScalar in[],dScalar out[],dApplyMode amode,InsertMode imode)
 {
   TensorBasis *b = ((dEFS_Tensor*)efs)->basis;
   dReal *A[3];
@@ -350,9 +353,23 @@ static dErr dEFSApply_Tensor_Hex(dEFS efs,const dReal mapdata[],dInt D,const dSc
       err = TensorMult_Hex(D,P,Q,A,in,df[1],INSERT_VALUES);dCHK(err);
       A[0] = b[0]->interp; A[1] = b[1]->interp; A[2] = b[2]->deriv;
       err = TensorMult_Hex(D,P,Q,A,in,df[2],INSERT_VALUES);dCHK(err);
-      err = dRuleMappingApply_Tensor_Hex((dRule_Tensor*)efs->rule,mapdata,D,&df[0][0],out,imode);dCHK(err);
+      err = dRuleMappingApply_Tensor_Hex((dRule_Tensor*)efs->rule,jinv,D,&df[0][0],out,imode);dCHK(err);
+    } break;
+    case dAPPLY_GRAD_TRANSPOSE: {
+      dScalar df[3][Q[0]*Q[1]*Q[2]*D];
+      err = TensorRuleMappingTranspose(Q[0]*Q[1]*Q[2],jinv,D,in,&df[0][0],imode);dCHK(err);
+      switch (imode) {
+        case INSERT_VALUES: err = dMemzero(out,P[0]*P[1]*P[2]*D*sizeof(out[0]));dCHK(err); break;
+        case ADD_VALUES: break;
+        default: dERROR(1,"InsertMode %d invalid or unimplemented",imode);
+      }
+      A[0] = b[0]->derivTranspose; A[1] = b[1]->interpTranspose; A[2] = b[2]->interpTranspose;
+      err = TensorMult_Hex(D,Q,P,A,df[0],out,ADD_VALUES);dCHK(err);
+      A[0] = b[0]->interpTranspose; A[1] = b[1]->derivTranspose; A[2] = b[2]->interpTranspose;
+      err = TensorMult_Hex(D,Q,P,A,df[1],out,ADD_VALUES);dCHK(err);
+      A[0] = b[0]->interpTranspose; A[1] = b[1]->interpTranspose; A[2] = b[2]->derivTranspose;
+      err = TensorMult_Hex(D,Q,P,A,df[2],out,ADD_VALUES);dCHK(err);
     } break;
-    case dAPPLY_GRAD_TRANSPOSE:
     default:
       dERROR(1,"invalid/unimplemented dApplyMode %d specified",amode);
   }
@@ -506,8 +523,10 @@ static dErr TensorMult_Hex(dInt D,const dInt P[3],const dInt Q[3],dReal *A[3],co
 }
 
 
-static dErr TensorRuleNoMapping(dInt Q,dInt D,const dScalar u[3][Q][D],dScalar v[Q][D][3],InsertMode imode)
+static dErr TensorRuleNoMapping(dInt Q,dInt D,const dScalar in[restrict],dScalar out[restrict],InsertMode imode)
 {
+  const dScalar (*restrict u)[Q][D] = (const dScalar(*)[Q][D])in;
+  dScalar (*restrict v)[D][3] = (dScalar(*)[D][3])out;
 
   dFunctionBegin;
   switch (imode) {
@@ -534,54 +553,122 @@ static dErr TensorRuleNoMapping(dInt Q,dInt D,const dScalar u[3][Q][D],dScalar v
   dFunctionReturn(0);
 }
 
+/** Push gradients wrt reference space forward to gradients wrt physical space
+*
+* @param Q number of quadrature points
+* @param jinv_flat inverse jacobian at quadrature points, logically of type 'const dReal[Q][3][3]'
+* @param D number of dofs per node
+* @param in gradients in reference space, logically 'const dReal[3][Q][D]'
+* @param out gradients in physical space, logically 'const dReal[Q][D][3]'
+* @param imode INSERT_VALUES or ADD_VALUES
+*/
+static dErr TensorRuleMapping(dInt Q,const dReal jinv_flat[restrict],dInt D,const dScalar in[restrict],dScalar out[restrict],InsertMode imode)
+{
+  const dReal (*restrict jinv)[3][3] = (const dReal(*)[3][3])jinv_flat;
+  const dScalar (*restrict u)[Q][D] = (const dScalar(*)[Q][D])in;
+  dScalar (*restrict v)[D][3] = (dScalar(*)[D][3])out;
+
+  dFunctionBegin;
+  switch (imode) {
+    case INSERT_VALUES: {
+      for (dInt i=0; i<Q; i++) {
+        for (dInt d=0; d<D; d++) {
+          for (dInt e=0; e<3; e++) {
+            v[i][d][e] = u[0][i][d] * jinv[i][0][e] + u[1][i][d] * jinv[i][1][e] + u[2][i][d] * jinv[i][2][e];
+          }
+        }
+      }
+    } break;
+    case ADD_VALUES: {
+      for (dInt i=0; i<Q; i++) {
+        for (dInt d=0; d<D; d++) {
+          for (dInt e=0; e<3; e++) {
+            v[i][d][e] += u[0][i][d] * jinv[i][0][e] + u[1][i][d] * jinv[i][1][e] + u[2][i][d] * jinv[i][2][e];
+          }
+        }
+      }
+    } break;
+    default: dERROR(1,"InsertMode %d invalid or not implemented",imode);
+  }
+  dFunctionReturn(0);
+}
+
+static dErr TensorRuleMappingTranspose(dInt Q,const dReal jinv_flat[restrict],dInt D,const dScalar in[restrict],dScalar out[restrict],InsertMode imode)
+{
+  const dReal (*restrict jinv)[3][3] = (const dReal(*)[3][3])jinv_flat;
+  const dScalar (*restrict u)[D][3] = (const dScalar(*)[D][3])in;
+  dScalar (*restrict v)[Q][D] = (dScalar(*)[Q][D])out;
+
+  dFunctionBegin;
+  switch (imode) {
+    case INSERT_VALUES: {
+      for (dInt i=0; i<Q; i++) {
+        for (dInt d=0; d<D; d++) {
+          for (dInt e=0; e<3; e++) {
+            v[e][i][d] = u[i][d][0] * jinv[i][e][0] + u[i][d][1] * jinv[i][e][1] + u[i][d][2] * jinv[i][e][2];
+          }
+        }
+      }
+    } break;
+    case ADD_VALUES: {
+      for (dInt i=0; i<Q; i++) {
+        for (dInt d=0; d<D; d++) {
+          for (dInt e=0; e<3; e++) {
+            v[e][i][d] += u[i][d][0] * jinv[i][e][0] + u[i][d][1] * jinv[i][e][1] + u[i][d][2] * jinv[i][e][2];
+          }
+        }
+      }
+    } break;
+    default: dERROR(1,"InsertMode %d invalid or not implemented",imode);
+  }
+  dFunctionReturn(0);
+}
+
+
 /**
 * The following functions are defined in this file so that they can be static.  Perhaps they should become virtual and
 * be put in ruletopo.c
 **/
-static dErr dRuleMappingApply_Tensor_Line(dRule_Tensor *rule,const dReal mapdata[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
+static dErr dRuleMappingApply_Tensor_Line(dRule_Tensor *rule,const dReal jinv[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
 {
   const dInt Q = rule->trule[0]->size;
-  const dScalar (*u)[Q][D] = (const dScalar (*)[Q][D])in;
-  dScalar (*v)[D][3] = (dScalar (*)[D][3])out;
   dErr err;
 
   dFunctionBegin;
-  if (mapdata) {
+  if (jinv) {
     dERROR(1,"Not implemented");
   } else {
-    err = TensorRuleNoMapping(Q,D,u,v,imode);dCHK(err);
+    err = TensorRuleNoMapping(Q,D,in,out,imode);dCHK(err);
   }
   dFunctionReturn(0);
 }
 
-static dErr dRuleMappingApply_Tensor_Quad(dRule_Tensor *rule,const dReal mapdata[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
+static dErr dRuleMappingApply_Tensor_Quad(dRule_Tensor *rule,const dReal jinv[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
 {
-  const dInt Q[2] = {rule->trule[0]->size,rule->trule[1]->size},QQ = Q[0]*Q[1];
-  const dScalar (*u)[QQ][D] = (const dScalar (*)[QQ][D])in;
-  dScalar (*v)[D][3] = (dScalar (*)[D][3])out;
+  const dInt Q = rule->trule[0]->size * rule->trule[1]->size;
   dErr err;
 
   dFunctionBegin;
-  if (mapdata) {
+  if (jinv) {
     dERROR(1,"Not implemented");
   } else {
-    err = TensorRuleNoMapping(QQ,D,u,v,imode);dCHK(err);
+    err = TensorRuleNoMapping(Q,D,in,out,imode);dCHK(err);
   }
   dFunctionReturn(0);
 }
 
-static dErr dRuleMappingApply_Tensor_Hex(dRule_Tensor *rule,const dReal mapdata[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
+static dErr dRuleMappingApply_Tensor_Hex(dRule_Tensor *rule,const dReal jinv[],dInt D,const dScalar in[],dScalar out[],InsertMode imode)
 {
-  const dInt Q[3] = {rule->trule[0]->size,rule->trule[1]->size,rule->trule[2]->size},QQ = Q[0]*Q[1]*Q[2];
-  const dScalar (*u)[QQ][D] = (const dScalar (*)[QQ][D])in;
-  dScalar (*restrict v)[D][3] = (dScalar(*)[D][3])out;
+  const TensorRule *r = rule->trule;
+  const dInt Q = r[0]->size * r[1]->size * r[2]->size;
   dErr err;
 
   dFunctionBegin;
-  if (mapdata) {
-    dERROR(1,"Not implemented");
+  if (jinv) {
+    if (imode != INSERT_VALUES) dERROR(1,"not implemented");
+    err = TensorRuleMapping(Q,jinv,D,in,out,imode);dCHK(err);
   } else {
-    err = TensorRuleNoMapping(QQ,D,u,v,imode);dCHK(err);
+    err = TensorRuleNoMapping(Q,D,in,out,imode);dCHK(err);
   }
   dFunctionReturn(0);
 }
diff --git a/src/jacobi/impls/tensor/ruletopo.c b/src/jacobi/impls/tensor/ruletopo.c
@@ -1,3 +1,4 @@
+#include "dohpgeom.h"
 #include "tensor.h"
 
 static dErr dRuleView_Tensor_Private(const char*,dInt,TensorRule*,PetscViewer);
@@ -205,3 +206,38 @@ static dErr dRuleGetTensorNodeWeight_Tensor_Hex(dRule rule,dInt *dim,dInt nnodes
   }
   dFunctionReturn(0);
 }
+
+static dErr dRuleComputeMapping_Tensor_Hex(dRule rule,const dReal vtx[],dReal jinv[restrict][3][3],dReal jdet[restrict])
+{
+  const TensorRule *r = ((dRule_Tensor*)rule)->trule;
+  const dInt Q[3] = {r[0]->size,r[1]->size,r[2]->size},QQ = Q[0]*Q[1]*Q[2];
+  const dReal *qx[3] = {r[0]->coord,r[1]->coord,r[2]->coord};
+  const dReal (*restrict x)[3] = (const dReal(*)[3])vtx;
+  dErr err;
+
+  dFunctionBegin;
+  for (dInt i=0; i<Q[0]; i++) {
+    for (dInt j=0; j<Q[1]; j++) {
+      for (dInt k=0; k<Q[2]; k++) {
+        const dInt p = (i*Q[1]+j)*Q[2]+k;                /* Index of quadrature point */
+        const dReal q[3] = {qx[0][i],qx[1][j],qx[2][k]}; /* location of quadrature point in reference coordinates */
+        dReal J[3][3],f[6][3];
+        err = dGeomConvexComb_2_4(q[0],q[2],x,dMeshConnectHexQuad[0],f[0]);dCHK(err);
+        err = dGeomConvexComb_2_4(q[1],q[2],x,dMeshConnectHexQuad[1],f[1]);dCHK(err);
+        err = dGeomConvexComb_2_4(-q[0],q[2],x,dMeshConnectHexQuad[2],f[2]);dCHK(err);
+        err = dGeomConvexComb_2_4(-q[1],q[2],x,dMeshConnectHexQuad[3],f[3]);dCHK(err);
+        err = dGeomConvexComb_2_4(q[0],q[1],x,dMeshConnectHexQuad[4],f[4]);dCHK(err);
+        err = dGeomConvexComb_2_4(q[0],q[1],x,dMeshConnectHexQuad[5],f[5]);dCHK(err);
+        for (dInt l; l<3; l++) {
+          J[l][0] = 0.5 * (f[1][l] - f[3][l]);
+          J[l][1] = 0.5 * (f[2][l] - f[0][l]);
+          J[l][2] = 0.5 * (f[5][l] - f[4][l]);
+        }
+        err = dGeomInvert3(&J[0][0],&jinv[p][0][0],&jdet[p]);dCHK(err);
+      }
+    }
+  }
+  /* Assume the optimizer has eliminated common subexpressions, this estimate is about right */
+  PetscLogFlops(QQ*(6+6*30+6+42);
+  dFunctionReturn(0);
+}