Fix Tensor Solver BC (AMReX-Codes#2930)

This fixes some bugs in the physical domain BC of tensor linear solver. At the corner of two no-slip walls (e.g., (0,0)), we have u(-1,0) = -u(0,0) and u(0,-1) = -u(0,0). It's incorrect to fill the corner ghost cell with u(-1,-1) = u(-1,0) + u(0,-1) - u(0,0), because it will result in u(-1,-1) = -3 * u(0,0). In the old approach, to avoid branches in computing transverse derivatives on cell faces, we fill the ghost cells first. For example, to compute du/dy at the lo-x boundary, we use the data in i = -1 and 0, just like we compute du/dy(i) using u(i-1) and u(i) for interior faces. The problem is the normal velocity in the ghost cells outside a wall is filled with extrapolation of the Dirichlet value (which is zero) and more than 1 interior cells. Because of the high-order extrapolation, u(-1) != -u(0). This is the desired approach for computing du/dx on the wall. However, this produces incorrect results in dudy. In the new approach, we explicitly handle the boundaries in the derivative stencil. For example, to compute transverse derivatives on an inflow face, we use the boundary values directly. Co-authored-by: cgilet <cgilet@gmail.com>
Thierry992 · Oct 3, 2022 · de7b7f4 · de7b7f4
1 parent 13aa4df
commit de7b7f4
Show file tree

Hide file tree

Showing 12 changed files with 3,487 additions and 1,359 deletions.
diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H
@@ -75,7 +75,7 @@ public:
     * according to the above ordering.
     */
     AMREX_GPU_HOST_DEVICE
-    operator int () const noexcept { return val; }
+    constexpr operator int () const noexcept { return val; }
     //! Return opposite orientation.
     AMREX_GPU_HOST_DEVICE
     Orientation flip () const noexcept
@@ -97,6 +97,30 @@ public:
     //! Read from an istream.
     friend std::istream& operator>> (std::istream& os, Orientation& o);
 
+    //! Int value of the x-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int xlo () noexcept { return 0; }
+
+    //! Int value of the x-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int xhi () noexcept { return AMREX_SPACEDIM; }
+
+    //! Int value of the y-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int ylo () noexcept { return 1; }
+
+    //! Int value of the y-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; }
+
+    //! Int value of the z-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int zlo () noexcept { return 2; }
+
+    //! Int value of the z-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; }
+
 private:
     //! Used internally.
     AMREX_GPU_HOST_DEVICE

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H
@@ -105,7 +105,8 @@ public: // for cuda
 
     void applyBCTensor (int amrlev, int mglev, MultiFab& vel,
                         BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const;
-    void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const;
+    void compCrossTerms(int amrlev, int mglev, MultiFab const& mf,
+                        const MLMGBndry* bndry) const;
 };
 
 }

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp
@@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
     const auto& bcondloc = *m_bcondloc[amrlev][mglev];
     const auto& maskvals = m_maskvals[amrlev][mglev];
 
-    FArrayBox foofab(Box::TheUnitBox(),3);
-    const auto& foo = foofab.array();
+    Array4<Real const> foo;
 
     const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
     const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);
+    const auto dlo = amrex::lbound(domain);
+    const auto dhi = amrex::ubound(domain);
 
     auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
     const FabArray<EBCellFlagFab>* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr;
@@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
             const auto & bdlv = bcondloc.bndryLocs(mfi);
             const auto & bdcv = bcondloc.bndryConds(mfi);
 
-            GpuArray<BoundCond,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bct;
-            GpuArray<Real,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bcl;
-            for (OrientationIter face; face; ++face) {
-                Orientation ori = face();
-                const int iface = ori;
-                for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
-                    bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori];
-                    bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori];
+            Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
+            Array2D<Real,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bcl;
+            for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
+                for (OrientationIter face; face; ++face) {
+                    Orientation ori = face();
+                    bct(ori,icomp) = bdcv[icomp][ori];
+                    bcl(ori,icomp) = bdlv[icomp][ori];
                 }
             }
 
@@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
                                       mxlo, mylo, mxhi, myhi,
                                       bvxlo, bvylo, bvxhi, bvyhi,
                                       bct, bcl, inhomog, imaxorder,
-                                      dxinv, domain);
+                                      dxinv, dlo, dhi);
             });
 #else
             const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi);
@@ -83,28 +83,50 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
             const auto& bvzhi = (bndry != nullptr) ?
                 (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;
 
-            AMREX_HOST_DEVICE_FOR_1D ( 12, iedge,
+#ifdef AMREX_USE_GPU
+            if (Gpu::inLaunchRegion()) {
+                amrex::launch(12, 64, Gpu::gpuStream(),
+#ifdef AMREX_USE_DPCPP
+                [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item)
+                {
+                    int bid = item.get_group_linear_id();
+                    int tid = item.get_local_linear_id();
+                    int bdim = item.get_local_range(0);
+#else
+                [=] AMREX_GPU_DEVICE ()
+                {
+                    int bid = blockIdx.x;
+                    int tid = threadIdx.x;
+                    int bdim = blockDim.x;
+#endif
+                    mltensor_fill_edges(bid, tid, bdim, vbx, velfab,
+                                        mxlo, mylo, mzlo, mxhi, myhi, mzhi,
+                                        bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
+                                        bct, bcl, inhomog, imaxorder,
+                                        dxinv, dlo, dhi);
+                });
+            } else
+#endif
             {
-                mltensor_fill_edges(iedge, vbx, velfab,
+                mltensor_fill_edges(vbx, velfab,
                                     mxlo, mylo, mzlo, mxhi, myhi, mzhi,
                                     bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
                                     bct, bcl, inhomog, imaxorder,
-                                    dxinv, domain);
-            });
+                                    dxinv, dlo, dhi);
+            }
 
             AMREX_HOST_DEVICE_FOR_1D ( 8, icorner,
             {
                 mltensor_fill_corners(icorner, vbx, velfab,
                                       mxlo, mylo, mzlo, mxhi, myhi, mzhi,
                                       bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
                                       bct, bcl, inhomog, imaxorder,
-                                      dxinv, domain);
+                                      dxinv, dlo, dhi);
             });
+
 #endif
         }
     }
-
-    // Notet that it is incorrect to call EnforcePeriodicity on vel.
 }
 
 }
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H
@@ -6,10 +6,95 @@
 
 namespace amrex {
 
-namespace {
-    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    Real mlebtensor_weight (int d) {
-        return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0);
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
+                                Array4<Real const> const& vel,
+                                Array4<Real const> const& etax,
+                                Array4<Real const> const& kapx,
+                                Array4<Real const> const& apx,
+                                Array4<EBCellFlag const> const& flag,
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+{
+    const Real dyi = dxinv[1];
+    const auto lo = amrex::lbound(box);
+    const auto hi = amrex::ubound(box);
+    constexpr Real twoThirds = 2./3.;
+
+    int k = 0;
+    for     (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            if (apx(i,j,0) == 0.0)
+            {
+                fx(i,j,0,0) = 0.0;
+                fx(i,j,0,1) = 0.0;
+            }
+            else
+            {
+                int jhip = j + flag(i  ,j,0).isConnected(0, 1,0);
+                int jhim = j - flag(i  ,j,0).isConnected(0,-1,0);
+                int jlop = j + flag(i-1,j,0).isConnected(0, 1,0);
+                int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
+                Real whi = mlebtensor_weight(jhip-jhim);
+                Real wlo = mlebtensor_weight(jlop-jlom);
+                Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real divu = dvdy;
+                Real xif = kapx(i,j,0);
+                Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
+                Real mut =             etax(i,j,0,1);
+                fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
+                fx(i,j,0,1) = -mut*dudy;
+            }
+        }
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
+                                Array4<Real const> const& vel,
+                                Array4<Real const> const& etay,
+                                Array4<Real const> const& kapy,
+                                Array4<Real const> const& apy,
+                                Array4<EBCellFlag const> const& flag,
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+{
+    const Real dxi = dxinv[0];
+    const auto lo = amrex::lbound(box);
+    const auto hi = amrex::ubound(box);
+    constexpr Real twoThirds = 2./3.;
+
+    int k = 0;
+    for     (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            if (apy(i,j,0) == 0.0)
+            {
+                fy(i,j,0,0) = 0.0;
+                fy(i,j,0,1) = 0.0;
+            }
+            else
+            {
+                int ihip = i + flag(i,j  ,0).isConnected( 1,0,0);
+                int ihim = i - flag(i,j  ,0).isConnected(-1,0,0);
+                int ilop = i + flag(i,j-1,0).isConnected( 1,0,0);
+                int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
+                Real whi = mlebtensor_weight(ihip-ihim);
+                Real wlo = mlebtensor_weight(ilop-ilom);
+                Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real divu = dudx;
+                Real xif = kapy(i,j,0);
+                Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
+                Real mut =             etay(i,j,0,0);
+                fy(i,j,0,0) = -mut*dvdx;
+                fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
+            }
+        }
     }
 }
 
@@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
                                 Array4<Real const> const& kapx,
                                 Array4<Real const> const& apx,
                                 Array4<EBCellFlag const> const& flag,
-                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
+                                Array4<Real const> const& bvxlo,
+                                Array4<Real const> const& bvxhi,
+                                Array2D<BoundCond,
+                                        0,2*AMREX_SPACEDIM,
+                                        0,AMREX_SPACEDIM> const& bct,
+                                Dim3 const& dlo, Dim3 const& dhi) noexcept
 {
     const Real dyi = dxinv[1];
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
     constexpr Real twoThirds = 2./3.;
 
+    int k = 0;
     for     (int j = lo.y; j <= hi.y; ++j) {
         AMREX_PRAGMA_SIMD
         for (int i = lo.x; i <= hi.x; ++i) {
@@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
                 int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
                 Real whi = mlebtensor_weight(jhip-jhim);
                 Real wlo = mlebtensor_weight(jlop-jlom);
-                Real dudy = (0.5*dyi) * ((vel(i  ,jhip,0,0)-vel(i  ,jhim,0,0))*whi
-                                        +(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo);
-                Real dvdy = (0.5*dyi) * ((vel(i  ,jhip,0,1)-vel(i  ,jhim,0,1))*whi
-                                        +(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo);
+                Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
+                                                   bvxlo,bvxhi,bct,dlo,dhi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
+                                                   bvxlo,bvxhi,bct,dlo,dhi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
                 Real divu = dvdy;
                 Real xif = kapx(i,j,0);
-                Real mun = 0.75*(etax(i,j,0,0)-xif);  // restore the original eta
+                Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
                 Real mut =       etax(i,j,0,1);
                 fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
                 fx(i,j,0,1) = -mut*dudy;
@@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
                                 Array4<Real const> const& kapy,
                                 Array4<Real const> const& apy,
                                 Array4<EBCellFlag const> const& flag,
-                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
+                                Array4<Real const> const& bvylo,
+                                Array4<Real const> const& bvyhi,
+                                Array2D<BoundCond,
+                                        0,2*AMREX_SPACEDIM,
+                                        0,AMREX_SPACEDIM> const& bct,
+                                Dim3 const& dlo, Dim3 const& dhi) noexcept
 {
     const Real dxi = dxinv[0];
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
     constexpr Real twoThirds = 2./3.;
 
+    int k = 0;
     for     (int j = lo.y; j <= hi.y; ++j) {
         AMREX_PRAGMA_SIMD
         for (int i = lo.x; i <= hi.x; ++i) {
@@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
                 int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
                 Real whi = mlebtensor_weight(ihip-ihim);
                 Real wlo = mlebtensor_weight(ilop-ilom);
-                Real dudx = (0.5*dxi) * ((vel(ihip,j  ,0,0)-vel(ihim,j  ,0,0))*whi
-                                        +(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo);
-                Real dvdx = (0.5*dxi) * ((vel(ihip,j  ,0,1)-vel(ihim,j  ,0,1))*whi
-                                        +(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo);
-
+                Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
+                                                   bvylo,bvyhi,bct,dlo,dhi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
+                                                   bvylo,bvyhi,bct,dlo,dhi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
                 Real divu = dudx;
                 Real xif = kapy(i,j,0);
-                Real mun = 0.75*(etay(i,j,0,1)-xif);  // restore the original eta
-                Real mut =       etay(i,j,0,0);
+                Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
+                Real mut =             etay(i,j,0,0);
                 fy(i,j,0,0) = -mut*dvdx;
                 fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
             }