From 2d931f63cb4d611d0d23d694726889647f8a482d Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 22 Jun 2022 15:03:50 -0500 Subject: [PATCH 001/111] Maintain the high end of the 'roundoff domain' in both float and double precision (#2839) * Maintain the high end of the 'roundoff domain' in both float and double precision * fix shadowing * fix warning * fix float conversion warning * fix logic * Update Src/Base/AMReX_Geometry.H * Update Src/Base/AMReX_Geometry.H --- Src/Base/AMReX_Geometry.H | 53 ++++++++++++++++++------- Src/Base/AMReX_Geometry.cpp | 48 ++++++++++------------ Src/Particle/AMReX_ParticleContainerI.H | 19 ++++----- Src/Particle/AMReX_ParticleUtil.H | 6 +-- 4 files changed, 74 insertions(+), 52 deletions(-) diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 54a8b8630d3..4238793861d 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -67,6 +67,32 @@ public: int coord; }; + namespace detail { + template + T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real idx, int ilo, int ihi, amrex::Real tol) { + T hi = static_cast(phi - tol); + bool safe; + { + int i = int(Math::floor((hi - plo)*idx)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return hi; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T lo = static_cast(phi - 0.5_rt/idx); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*idx)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + } + class Geometry : public CoordSys @@ -168,8 +194,6 @@ public: //! Returns the problem domain. const RealBox& ProbDomain () const noexcept { return prob_domain; } - //! Returns the roundoff domain. - const RealBox& RoundoffDomain () const noexcept { return roundoff_domain; } //! Sets the problem domain. void ProbDomain (const RealBox& rb) noexcept { @@ -193,12 +217,12 @@ public: return {{AMREX_D_DECL(prob_domain.hi(0),prob_domain.hi(1),prob_domain.hi(2))}}; } - GpuArray RoundoffLoArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.lo(0),roundoff_domain.lo(1),roundoff_domain.lo(2))}}; - } - - GpuArray RoundoffHiArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.hi(0),roundoff_domain.hi(1),roundoff_domain.hi(2))}}; + GpuArray ProbHiArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_hi_f; +#else + return roundoff_hi_d; +#endif } //! Returns the overall size of the domain by multiplying the ProbLength's together @@ -406,7 +430,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Returns true if a point is inside the roundoff domain. @@ -414,7 +438,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Compute the roundoff domain. Public because it contains an @@ -430,10 +454,11 @@ private: RealBox prob_domain; // Due to round-off errors, not all floating point numbers for which plo >= x < phi - // will map to a cell that is inside "domain". "roundoff_domain" stores a phi - // that is very close to that in prob_domain, and for which all floating point numbers - // inside it according to a naive inequality check will map to a cell inside domain. - RealBox roundoff_domain; + // will map to a cell that is inside "domain". "roundoff_hi_d" and "roundoff_hi_f" each store + // a phi that is very close to that in prob_domain, and for which all doubles and floats less than + // it will map to a cell inside domain. + GpuArray roundoff_hi_d; + GpuArray roundoff_hi_f; // Box domain; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 395f17e352b..1457db6b8d1 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -506,7 +506,6 @@ Geometry::computeRoundoffDomain () inv_dx[k] = 1.0_rt/dx[k]; } - roundoff_domain = prob_domain; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { int ilo = Domain().smallEnd(idim); @@ -516,40 +515,37 @@ Geometry::computeRoundoffDomain () Real idx = InvCellSize(idim); Real deltax = CellSize(idim); -#ifdef AMREX_SINGLE_PRECISION_PARTICLES - Real tolerance = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); -#else - Real tolerance = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); -#endif - // bisect the point at which the cell no longer maps to inside the domain - Real lo = static_cast(phi) - Real(0.5)*static_cast(deltax); - Real hi = static_cast(phi) + Real(0.5)*static_cast(deltax); - - Real mid = bisect(lo, hi, - [=] AMREX_GPU_HOST_DEVICE (Real x) -> Real - { - int i = int(Math::floor((x - plo)*idx)) + ilo; - bool inside = i >= ilo && i <= ihi; - return static_cast(inside) - Real(0.5); - }, tolerance); - roundoff_domain.setHi(idim, mid - tolerance); + Real ftol = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); + Real dtol = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); + + roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, idx, ilo, ihi, ftol); + roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, idx, ilo, ihi, dtol); } } bool -Geometry::outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { - bool outside = AMREX_D_TERM(x < roundoff_domain.lo(0) - || x >= roundoff_domain.hi(0), - || y < roundoff_domain.lo(1) - || y >= roundoff_domain.hi(1), - || z < roundoff_domain.lo(2) - || z >= roundoff_domain.hi(2)); +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + || x >= roundoff_hi_f[0], + || y < prob_domain.lo(1) + || y >= roundoff_hi_f[1], + || z < prob_domain.lo(2) + || z >= roundoff_hi_f[2]); +#else + bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + || x >= roundoff_hi_d[0], + || y < prob_domain.lo(1) + || y >= roundoff_hi_d[1], + || z < prob_domain.lo(2) + || z >= roundoff_hi_d[2]); +#endif return outside; } bool -Geometry::insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { return !outsideRoundoffDomain(AMREX_D_DECL(x, y, z)); } diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index f4ababb3a82..f6f51c572cf 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -239,7 +239,7 @@ ParticleContainer const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); const auto phi = geom.ProbHiArray(); - const auto rhi = geom.RoundoffHiArray(); + const auto rhi = geom.ProbHiArrayInParticleReal(); const auto is_per = geom.isPeriodicArray(); return enforcePeriodic(p, plo, phi, rhi, is_per); @@ -314,20 +314,21 @@ ParticleContainer::lo if (! outside) { - if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))) + if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) { - RealBox roundoff_domain = Geom(0).RoundoffDomain(); + RealBox prob_domain = Geom(0).ProbDomain(); + GpuArray phi = Geom(0).ProbHiArrayInParticleReal(); for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { - if (p.pos(idim) <= roundoff_domain.lo(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.lo(idim), (ParticleReal) roundoff_domain.hi(idim)); + if (p.pos(idim) <= prob_domain.lo(idim)) { + p.pos(idim) = std::nextafter((ParticleReal) prob_domain.lo(idim), phi[idim]); } - if (p.pos(idim) >= roundoff_domain.hi(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.hi(idim), (ParticleReal) roundoff_domain.lo(idim)); + if (p.pos(idim) >= phi[idim]) { + p.pos(idim) = std::nextafter(phi[idim], (ParticleReal) prob_domain.lo(idim)); } } - AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))); + AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))); } } @@ -1233,7 +1234,7 @@ ParticleContainer Vector > new_sizes(num_levels); const auto plo = Geom(0).ProbLoArray(); const auto phi = Geom(0).ProbHiArray(); - const auto rhi = Geom(0).RoundoffHiArray(); + const auto rhi = Geom(0).ProbHiArrayInParticleReal(); const auto is_per = Geom(0).isPeriodicArray(); for (int lev = lev_min; lev <= finest_lev_particles; ++lev) { diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 6732a271810..6623f353749 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -517,7 +517,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic (P& p, amrex::GpuArray const& plo, amrex::GpuArray const& phi, - amrex::GpuArray const& rhi, + amrex::GpuArray const& rhi, amrex::GpuArray const& is_per) noexcept { bool shifted = false; @@ -537,7 +537,7 @@ bool enforcePeriodic (P& p, p.pos(idim) += static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) >= rhi[idim]) { + if (p.pos(idim) > rhi[idim]) { p.pos(idim) = static_cast(rhi[idim]); } shifted = true; @@ -555,7 +555,7 @@ int partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBufferMap& pmap, const GpuArray& plo, const GpuArray& phi, - const GpuArray& rhi, + const GpuArray& rhi, const GpuArray& is_per, int lev, int gid, int /*tid*/, int lev_min, int lev_max, int nGrow, bool remove_negative) From 5188a6a28e64dc627c3333d13bebeb0d7250b506 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 23 Jun 2022 11:09:15 -0700 Subject: [PATCH 002/111] Explicitly invoke python3 (#2850) According to PEP 394, a python distributor may choose to not provide the python command. In fact, that's what recent versions of macOS do. --- Tools/Backtrace/parse_bt.py | 2 +- Tools/C_scripts/describe_sources.py | 6 +----- Tools/C_scripts/gatherbuildtime.py | 6 +----- Tools/CompileTesting/compiletesting.py | 4 +--- Tools/F_scripts/dep.py | 13 +------------ Tools/F_scripts/fcheck.py | 8 +------- Tools/F_scripts/find_files_vpath.py | 4 +--- Tools/F_scripts/findparams.py | 4 +--- Tools/F_scripts/makebuildinfo.py | 2 +- Tools/F_scripts/write_probin.py | 4 +--- Tools/GNUMake/Make.defs | 5 ----- Tools/Postprocessing/python/column_depth.py | 2 +- Tools/Postprocessing/python/conv_slopes.py | 2 +- Tools/Postprocessing/python/dumpparthistory.py | 7 +------ Tools/Postprocessing/python/test_helmeos.py | 2 +- Tools/Postprocessing/python/test_parseparticles.py | 3 +-- Tools/Py_util/plotsinglevar.py | 4 +--- Tools/Release/ppCleanup.py | 2 +- Tools/Release/ppCleanupDir.py | 2 +- Tools/Release/release.py | 2 +- Tools/libamrex/configure.py | 8 +------- Tools/libamrex/mkconfig.py | 8 +------- Tools/libamrex/mkpkgconfig.py | 8 +------- Tools/libamrex/mkversionheader.py | 8 +------- Tools/typechecker/typechecker.py | 4 +--- 25 files changed, 24 insertions(+), 96 deletions(-) diff --git a/Tools/Backtrace/parse_bt.py b/Tools/Backtrace/parse_bt.py index ce4a6684911..dd0234f9120 100755 --- a/Tools/Backtrace/parse_bt.py +++ b/Tools/Backtrace/parse_bt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import re diff --git a/Tools/C_scripts/describe_sources.py b/Tools/C_scripts/describe_sources.py index c49d16694a9..97cfe5e1e1c 100755 --- a/Tools/C_scripts/describe_sources.py +++ b/Tools/C_scripts/describe_sources.py @@ -1,10 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - import argparse import os import subprocess diff --git a/Tools/C_scripts/gatherbuildtime.py b/Tools/C_scripts/gatherbuildtime.py index 082ec766c28..b0b1740847f 100755 --- a/Tools/C_scripts/gatherbuildtime.py +++ b/Tools/C_scripts/gatherbuildtime.py @@ -1,11 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys, os, glob, operator, time -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - if __name__ == "__main__": dt = float(sys.argv[3])-float(sys.argv[2]) hours, rem = divmod(dt, 3600) diff --git a/Tools/CompileTesting/compiletesting.py b/Tools/CompileTesting/compiletesting.py index 129e83ca960..9cb5f59bac5 100755 --- a/Tools/CompileTesting/compiletesting.py +++ b/Tools/CompileTesting/compiletesting.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys import os import shlex @@ -148,4 +147,3 @@ def run(command, outfile=None): if __name__ == "__main__": compiletesting(sys.argv[1:]) - diff --git a/Tools/F_scripts/dep.py b/Tools/F_scripts/dep.py index 894dcdb65e6..24bd8318fb8 100755 --- a/Tools/F_scripts/dep.py +++ b/Tools/F_scripts/dep.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # automatically generate Makefile dependencies for Fortran 90 source. # @@ -20,18 +20,7 @@ # (e.g. iso_c_binding). Add any system-provided modules to the # `IGNORES` list below -from __future__ import print_function - import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - -if sys.version[0] == "2": - reload(sys) - sys.setdefaultencoding('latin-1') - - import io import re import os diff --git a/Tools/F_scripts/fcheck.py b/Tools/F_scripts/fcheck.py index 20033f85ac9..f5be4efd726 100755 --- a/Tools/F_scripts/fcheck.py +++ b/Tools/F_scripts/fcheck.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # a simple routine to parse Fortran files and make sure that things are # declared double precision, and constants are of the form 1.0_dp_t or @@ -122,9 +122,3 @@ def visit(argFiles, dirname, files): if (badFile == 1): print " " - - - - - - diff --git a/Tools/F_scripts/find_files_vpath.py b/Tools/F_scripts/find_files_vpath.py index c9dd5485930..a52d0f28f3d 100755 --- a/Tools/F_scripts/find_files_vpath.py +++ b/Tools/F_scripts/find_files_vpath.py @@ -1,12 +1,10 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Take a vpath and a list of files and find where in the first vpath the first occurrence of the file. """ -from __future__ import print_function - import sys import os import argparse diff --git a/Tools/F_scripts/findparams.py b/Tools/F_scripts/findparams.py index 70280b134de..79d698ade8d 100755 --- a/Tools/F_scripts/findparams.py +++ b/Tools/F_scripts/findparams.py @@ -1,6 +1,4 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import sys import os diff --git a/Tools/F_scripts/makebuildinfo.py b/Tools/F_scripts/makebuildinfo.py index e5f206339b2..4d08a571145 100755 --- a/Tools/F_scripts/makebuildinfo.py +++ b/Tools/F_scripts/makebuildinfo.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # a simple script that writes the build_info.f90 file that is used # to store information for the job_info file that we store in plotfiles. diff --git a/Tools/F_scripts/write_probin.py b/Tools/F_scripts/write_probin.py index 10ec4489066..54729eb5f5e 100755 --- a/Tools/F_scripts/write_probin.py +++ b/Tools/F_scripts/write_probin.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """This routine parses plain-text parameter files that list runtime parameters for use in our codes. The general format of a parameter @@ -24,8 +24,6 @@ """ -from __future__ import print_function - import os import sys import argparse diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs index db1ce350e54..90a399e98af 100644 --- a/Tools/GNUMake/Make.defs +++ b/Tools/GNUMake/Make.defs @@ -1,8 +1,3 @@ -# Check python version -my_python_version := $(word 2, $(shell python --version 2>&1)) -ifneq ($(firstword $(sort 2.7 $(my_python_version))), 2.7) - $(error Python >= 2.7 required! Your version is $(my_python_version)) -endif ifneq (,$(findstring ~,$(AMREX_HOME))) $(warning *** AMREX_HOME string constains ~ and make will not like it. So it is replaced.) diff --git a/Tools/Postprocessing/python/column_depth.py b/Tools/Postprocessing/python/column_depth.py index 3aff2ac4705..be17d6bb663 100755 --- a/Tools/Postprocessing/python/column_depth.py +++ b/Tools/Postprocessing/python/column_depth.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import numpy diff --git a/Tools/Postprocessing/python/conv_slopes.py b/Tools/Postprocessing/python/conv_slopes.py index f2fe5404aae..9f1a22e3960 100755 --- a/Tools/Postprocessing/python/conv_slopes.py +++ b/Tools/Postprocessing/python/conv_slopes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import os import commands diff --git a/Tools/Postprocessing/python/dumpparthistory.py b/Tools/Postprocessing/python/dumpparthistory.py index 092f924423b..23f6d22d1a8 100755 --- a/Tools/Postprocessing/python/dumpparthistory.py +++ b/Tools/Postprocessing/python/dumpparthistory.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # a simple routine to parse particle files and dump out the particle # histories into separate files (1 file per particle) so that they can @@ -96,8 +96,3 @@ def main(files): sys.exit(2) main(sys.argv[1:]) - - - - - diff --git a/Tools/Postprocessing/python/test_helmeos.py b/Tools/Postprocessing/python/test_helmeos.py index 890a66aef77..824f369cf60 100755 --- a/Tools/Postprocessing/python/test_helmeos.py +++ b/Tools/Postprocessing/python/test_helmeos.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # a script showing how to use the helmeos module # it reads T, rho, X data from a sample data file, calculates abar and zbar diff --git a/Tools/Postprocessing/python/test_parseparticles.py b/Tools/Postprocessing/python/test_parseparticles.py index b9181af4d8a..8a85fe2faf6 100755 --- a/Tools/Postprocessing/python/test_parseparticles.py +++ b/Tools/Postprocessing/python/test_parseparticles.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # simple script showing how to make plots of particles using the parseparticles # module @@ -92,4 +92,3 @@ def main(fileList): # this is for profiling # cProfile.run("main(sys.argv[1:])","profile.tmp2") - diff --git a/Tools/Py_util/plotsinglevar.py b/Tools/Py_util/plotsinglevar.py index 616c516c805..bb1c2abacaa 100755 --- a/Tools/Py_util/plotsinglevar.py +++ b/Tools/Py_util/plotsinglevar.py @@ -1,11 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # a simple script to plot 2-d or 3-d BoxLib data using the matplotlib # library # -from __future__ import print_function - import matplotlib matplotlib.use('agg') diff --git a/Tools/Release/ppCleanup.py b/Tools/Release/ppCleanup.py index 109444daff3..2935d0c1983 100755 --- a/Tools/Release/ppCleanup.py +++ b/Tools/Release/ppCleanup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import shutil diff --git a/Tools/Release/ppCleanupDir.py b/Tools/Release/ppCleanupDir.py index befebc15f2d..2d8a598291d 100755 --- a/Tools/Release/ppCleanupDir.py +++ b/Tools/Release/ppCleanupDir.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import shutil diff --git a/Tools/Release/release.py b/Tools/Release/release.py index 87de82e5a30..8f2b4d9d5dc 100755 --- a/Tools/Release/release.py +++ b/Tools/Release/release.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import shutil diff --git a/Tools/libamrex/configure.py b/Tools/libamrex/configure.py index ac4b399a471..ebb3cd369f4 100755 --- a/Tools/libamrex/configure.py +++ b/Tools/libamrex/configure.py @@ -1,12 +1,6 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for configure.py") - import argparse def configure(argv): diff --git a/Tools/libamrex/mkconfig.py b/Tools/libamrex/mkconfig.py index 30c54f285a2..21f66348891 100755 --- a/Tools/libamrex/mkconfig.py +++ b/Tools/libamrex/mkconfig.py @@ -1,12 +1,6 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import sys, re - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for mkconfig.py") - import argparse def doit(defines, undefines, comp, allow_diff_comp): diff --git a/Tools/libamrex/mkpkgconfig.py b/Tools/libamrex/mkpkgconfig.py index be91e8736a8..c8a626901da 100755 --- a/Tools/libamrex/mkpkgconfig.py +++ b/Tools/libamrex/mkpkgconfig.py @@ -1,12 +1,6 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for mkpkgconfig.py") - import argparse def doit(prefix, version, cflags, libs, libpriv, fflags): diff --git a/Tools/libamrex/mkversionheader.py b/Tools/libamrex/mkversionheader.py index f2f6f8865f9..b1dbf0eb2ad 100755 --- a/Tools/libamrex/mkversionheader.py +++ b/Tools/libamrex/mkversionheader.py @@ -1,12 +1,6 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import sys, re - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for mkversionheader.py") - import argparse def doit(code, defines): diff --git a/Tools/typechecker/typechecker.py b/Tools/typechecker/typechecker.py index 2086b22d1b5..6035b7a6c15 100755 --- a/Tools/typechecker/typechecker.py +++ b/Tools/typechecker/typechecker.py @@ -1,6 +1,4 @@ -#!/usr/bin/env python - -from __future__ import print_function +#!/usr/bin/env python3 import os import sys From fc0d6469f4ad590d576a7109d8719b018838dd86 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 23 Jun 2022 12:23:55 -0700 Subject: [PATCH 003/111] Remove f90doc (#2851) We no longer use it. --- Tools/F_scripts/f90doc/README | 36 -- Tools/F_scripts/f90doc/expr_parse.pl | 793 ------------------------ Tools/F_scripts/f90doc/expr_parse.y | 234 ------- Tools/F_scripts/f90doc/f90doc | 160 ----- Tools/F_scripts/f90doc/htmling.pl | 376 ----------- Tools/F_scripts/f90doc/stmts.pl | 891 --------------------------- Tools/F_scripts/f90doc/typing.pl | 516 ---------------- Tools/F_scripts/f90doc/utils.pl | 87 --- 8 files changed, 3093 deletions(-) delete mode 100644 Tools/F_scripts/f90doc/README delete mode 100644 Tools/F_scripts/f90doc/expr_parse.pl delete mode 100644 Tools/F_scripts/f90doc/expr_parse.y delete mode 100755 Tools/F_scripts/f90doc/f90doc delete mode 100644 Tools/F_scripts/f90doc/htmling.pl delete mode 100644 Tools/F_scripts/f90doc/stmts.pl delete mode 100644 Tools/F_scripts/f90doc/typing.pl delete mode 100644 Tools/F_scripts/f90doc/utils.pl diff --git a/Tools/F_scripts/f90doc/README b/Tools/F_scripts/f90doc/README deleted file mode 100644 index 6edb2de011f..00000000000 --- a/Tools/F_scripts/f90doc/README +++ /dev/null @@ -1,36 +0,0 @@ -This is f90doc version 0.3.4, a documentation tool for Fortran 90. For -more information (e.g., documentation), see - - http://theory.lcs.mit.edu/~edemaine/f90doc - -or contact Erik Demaine (edemaine@mit.edu). Comments, suggestions, -criticisms, and bug reports go to this e-mail address. If you modify f90doc or -use it in a serious way, please contact me (I'd be interested). - -COPYRIGHT - -f90doc is freeware. If you use it in a research or commercial project, you -must acknowledge the software and its author. I would also appreciate it if -you contact me -- I'd like to know how f90doc is used. If you base code on -f90doc, you must acknowledge this. Again, please let me know if you think your -changes would be at all useful to the rest of the world (even if you are not -willing to share it, the ideas may be useful). - -This information must accompany any copy of f90doc. - -INSTALLATION - -You shouldn't have to compile anything. You can put the file f90doc in -a more accessible place, but the .pl files have to be in the same directory. -Alternatively, you can create a symlink to the real f90doc, where the .pl -files are held. For example, - - ln -s /usr/local/lib/f90doc-0.3.4/f90doc /usr/local/bin/f90doc - -If you don't have a command /usr/bin/env, you'll need to replace the first line -of f90doc with - - #!/path/to/perl5/bin/perl -w - -Otherwise, Perl version 5.003 or higher must be the first program called "perl" -in your path. diff --git a/Tools/F_scripts/f90doc/expr_parse.pl b/Tools/F_scripts/f90doc/expr_parse.pl deleted file mode 100644 index 3e831337041..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.pl +++ /dev/null @@ -1,793 +0,0 @@ -$yysccsid = "@(#)yaccpar 1.8 (Berkeley) 01/20/91 (Perl 2.0 12/31/92)"; -#define YYBYACC 1 -#line 2 "expr_parse.y" -package expr_parse; - -;# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -;# Portion of line left to parse -$expr_parse::left = ""; -#line 12 "y.tab.pl" -$COMMA=257; -$LPAREN=258; -$RPAREN=259; -$NOT=260; -$OR=261; -$AND=262; -$EQV=263; -$NEQV=264; -$COMPARISON=265; -$DBLSLASH=266; -$PERCENT=267; -$PLUS=268; -$MINUS=269; -$UPLUS=270; -$UMINUS=271; -$ASTERIK=272; -$SLASH=273; -$DBLASTERIK=274; -$CONST=275; -$NAME=276; -$COLON=277; -$LARRAY=278; -$RARRAY=279; -$EQUALS=280; -$YYERRCODE=256; -@yylhs = ( -1, - 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 5, 5, 5, 5, 5, 4, 4, 7, 6, - 6, 3, 3, 3, 8, 8, 9, 9, 10, 10, - 10, 12, 11, 11, 11, 11, -); -@yylen = ( 2, - 1, 2, 1, 1, 1, 3, 2, 2, 2, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 1, 3, 1, 3, 3, 3, 1, 1, 5, - 7, 1, 3, 4, 0, 1, 3, 1, 1, 1, - 1, 3, 1, 2, 2, 3, -); -@yydefred = ( 0, - 0, 0, 0, 0, 3, 32, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 28, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 0, 6, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 38, 40, 41, 33, - 23, 0, 26, 25, 27, 0, 0, 0, 34, 0, - 0, 0, 0, 37, 0, 0, 0, 0, 0, -); -@yydgoto = ( 8, - 19, 10, 11, 20, 15, 63, 21, 55, 56, 57, - 58, 59, -); -@yysindex = ( -212, - -157, -212, -212, -212, 0, 0, -212, 0, -137, 0, - -246, -241, -29, -234, -235, -19, -223, -223, -29, -257, - 0, 0, -212, -212, -212, -212, -212, -212, -212, -212, - -212, -212, -212, -216, -229, -267, -222, 0, -212, 0, - -255, -19, 227, 227, 236, -164, -223, -223, -233, -233, - -233, -205, -212, -76, -174, -162, 0, 0, 0, 0, - 0, -180, 0, 0, 0, -212, -29, -212, 0, -216, - -212, -29, -29, 0, -118, -212, -95, -212, -29, -); -@yyrindex = ( 0, - 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, - 1, -59, 0, -43, 0, 163, 77, 96, -242, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -152, 0, 0, 0, 0, 0, 0, - 191, 172, 199, 208, 182, 153, 115, 134, 20, 39, - 58, -175, -219, -214, 0, -146, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -192, -188, 0, 0, - 0, -183, -178, 0, 0, 0, -145, 0, -143, -); -@yygindex = ( 0, - 2, 116, 0, 0, 0, 85, 84, 0, 0, 60, - 0, 0, -); -$YYTABLESIZE=510; -@yytable = ( 39, - 5, 9, 13, 16, 17, 18, 24, 61, 62, 27, - 28, 34, 29, 30, 29, 36, 31, 32, 33, 12, - 35, 40, 37, 38, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 54, 29, 43, 13, 43, - 33, 1, 39, 2, 39, 1, 60, 2, 31, 32, - 33, 3, 4, 62, 67, 3, 4, 11, 5, 52, - 53, 7, 5, 6, 45, 7, 45, 72, 44, 73, - 44, 54, 75, 42, 66, 42, 7, 77, 46, 79, - 46, 32, 32, 32, 69, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 70, 8, 32, 32, 32, 71, - 1, 32, 2, 29, 30, 1, 35, 31, 32, 33, - 3, 4, 36, 30, 14, 31, 14, 12, 6, 22, - 7, 64, 65, 23, 24, 25, 26, 27, 28, 74, - 29, 30, 0, 15, 31, 32, 33, 0, 76, 0, - 0, 0, 23, 24, 25, 26, 27, 28, 0, 29, - 30, 0, 16, 31, 32, 33, 0, 0, 0, 0, - 0, 78, 9, 0, 0, 23, 24, 25, 26, 27, - 28, 18, 29, 30, 0, 0, 31, 32, 33, 0, - 0, 17, 0, 0, 23, 24, 25, 26, 27, 28, - 19, 29, 30, 0, 0, 31, 32, 33, 20, 22, - 68, 3, 3, 3, 3, 3, 3, 21, 3, 3, - 0, 0, 3, 3, 3, 24, 0, 4, 4, 4, - 4, 4, 4, 0, 4, 4, 0, 0, 4, 4, - 4, 23, 24, 25, 26, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 0, 0, 5, 0, 5, - 0, 5, 5, 5, 5, 5, 5, 0, 5, 5, - 0, 0, 5, 5, 5, 0, 12, 5, 12, 5, - 12, 12, 12, 12, 12, 12, 0, 12, 12, 0, - 0, 12, 12, 0, 0, 13, 12, 13, 12, 13, - 13, 13, 13, 13, 13, 0, 13, 13, 0, 0, - 13, 13, 0, 0, 11, 13, 11, 13, 11, 11, - 11, 11, 11, 11, 0, 11, 11, 0, 0, 11, - 11, 0, 0, 7, 11, 7, 11, 7, 7, 7, - 7, 7, 7, 0, 7, 7, 0, 0, 0, 0, - 0, 0, 8, 7, 8, 7, 8, 8, 8, 8, - 8, 8, 0, 8, 8, 0, 0, 0, 0, 0, - 0, 14, 8, 14, 8, 14, 14, 14, 14, 14, - 14, 0, 14, 14, 0, 0, 0, 0, 0, 0, - 15, 14, 15, 14, 15, 15, 15, 15, 15, 15, - 0, 15, 15, 0, 0, 0, 0, 0, 0, 16, - 15, 16, 15, 16, 16, 16, 16, 16, 16, 9, - 0, 9, 0, 9, 9, 9, 9, 0, 18, 16, - 18, 16, 18, 18, 18, 18, 0, 0, 17, 9, - 17, 9, 17, 17, 17, 17, 0, 19, 18, 19, - 18, 19, 0, 19, 19, 20, 0, 20, 17, 0, - 17, 20, 20, 0, 21, 0, 21, 19, 0, 19, - 21, 21, 0, 0, 0, 20, 0, 20, 0, 0, - 0, 0, 0, 0, 21, 0, 21, 23, 24, 0, - 0, 27, 28, 0, 29, 30, 0, 0, 31, 32, - 33, 28, 0, 29, 30, 0, 0, 31, 32, 33, -); -@yycheck = ( 257, - 0, 0, 1, 2, 3, 4, 262, 275, 276, 265, - 266, 258, 268, 269, 257, 257, 272, 273, 274, 0, - 267, 279, 257, 259, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 279, 257, 0, 259, - 274, 258, 257, 260, 259, 258, 276, 260, 272, 273, - 274, 268, 269, 276, 53, 268, 269, 0, 275, 276, - 277, 278, 275, 276, 257, 278, 259, 66, 257, 68, - 259, 70, 71, 257, 280, 259, 0, 76, 257, 78, - 259, 257, 258, 259, 259, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 257, 0, 272, 273, 274, 280, - 258, 277, 260, 268, 269, 0, 259, 272, 273, 274, - 268, 269, 259, 259, 0, 259, 1, 275, 276, 257, - 278, 37, 39, 261, 262, 263, 264, 265, 266, 70, - 268, 269, -1, 0, 272, 273, 274, -1, 257, -1, - -1, -1, 261, 262, 263, 264, 265, 266, -1, 268, - 269, -1, 0, 272, 273, 274, -1, -1, -1, -1, - -1, 257, 0, -1, -1, 261, 262, 263, 264, 265, - 266, 0, 268, 269, -1, -1, 272, 273, 274, -1, - -1, 0, -1, -1, 261, 262, 263, 264, 265, 266, - 0, 268, 269, -1, -1, 272, 273, 274, 0, 259, - 277, 261, 262, 263, 264, 265, 266, 0, 268, 269, - -1, -1, 272, 273, 274, 259, -1, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, -1, 257, -1, 259, - -1, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, 257, 277, 259, 279, - 261, 262, 263, 264, 265, 266, -1, 268, 269, -1, - -1, 272, 273, -1, -1, 257, 277, 259, 279, 261, - 262, 263, 264, 265, 266, -1, 268, 269, -1, -1, - 272, 273, -1, -1, 257, 277, 259, 279, 261, 262, - 263, 264, 265, 266, -1, 268, 269, -1, -1, 272, - 273, -1, -1, 257, 277, 259, 279, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, -1, -1, - -1, -1, 257, 277, 259, 279, 261, 262, 263, 264, - 265, 266, -1, 268, 269, -1, -1, -1, -1, -1, - -1, 257, 277, 259, 279, 261, 262, 263, 264, 265, - 266, -1, 268, 269, -1, -1, -1, -1, -1, -1, - 257, 277, 259, 279, 261, 262, 263, 264, 265, 266, - -1, 268, 269, -1, -1, -1, -1, -1, -1, 257, - 277, 259, 279, 261, 262, 263, 264, 265, 266, 257, - -1, 259, -1, 261, 262, 263, 264, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, 257, 277, 259, - 279, 261, -1, 263, 264, 257, -1, 259, 277, -1, - 279, 263, 264, -1, 257, -1, 259, 277, -1, 279, - 263, 264, -1, -1, -1, 277, -1, 279, -1, -1, - -1, -1, -1, -1, 277, -1, 279, 261, 262, -1, - -1, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 266, -1, 268, 269, -1, -1, 272, 273, 274, -); -$YYFINAL=8; -#ifndef YYDEBUG -#define YYDEBUG 0 -#endif -$YYMAXTOKEN=280; -#if YYDEBUG -@yyname = ( -"end-of-file",'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','',"COMMA","LPAREN","RPAREN","NOT", -"OR","AND","EQV","NEQV","COMPARISON","DBLSLASH","PERCENT","PLUS","MINUS", -"UPLUS","UMINUS","ASTERIK","SLASH","DBLASTERIK","CONST","NAME","COLON","LARRAY", -"RARRAY","EQUALS", -); -@yyrule = ( -"\$accept : expr_with_abort", -"expr_with_abort : expr", -"expr_with_abort : expr COMMA", -"expr : CONST", -"expr : expr_without_const", -"expr_without_const : chain", -"expr_without_const : LARRAY array RARRAY", -"expr_without_const : PLUS expr", -"expr_without_const : MINUS expr", -"expr_without_const : NOT expr", -"expr_without_const : LPAREN potential_complex_or_implied_do RPAREN", -"expr_without_const : expr DBLASTERIK expr", -"expr_without_const : expr ASTERIK expr", -"expr_without_const : expr SLASH expr", -"expr_without_const : expr PLUS expr", -"expr_without_const : expr MINUS expr", -"expr_without_const : expr DBLSLASH expr", -"expr_without_const : expr COMPARISON expr", -"expr_without_const : expr AND expr", -"expr_without_const : expr OR expr", -"expr_without_const : expr EQV expr", -"expr_without_const : expr NEQV expr", -"potential_complex_or_implied_do : CONST", -"potential_complex_or_implied_do : CONST COMMA CONST", -"potential_complex_or_implied_do : expr_without_const", -"potential_complex_or_implied_do : expr_without_const COMMA do_args", -"potential_complex_or_implied_do : CONST COMMA do_args", -"array : array COMMA array_piece", -"array : array_piece", -"array_piece : expr", -"do_args : NAME EQUALS expr COMMA expr", -"do_args : NAME EQUALS expr COMMA expr COMMA expr", -"chain : NAME", -"chain : chain PERCENT NAME", -"chain : chain LPAREN exprlist RPAREN", -"exprlist :", -"exprlist : exprlist_ne", -"exprlist_ne : exprlist_ne COMMA argument", -"exprlist_ne : argument", -"argument : expr", -"argument : colonexpr", -"argument : namedargument", -"namedargument : NAME EQUALS expr", -"colonexpr : COLON", -"colonexpr : expr COLON", -"colonexpr : COLON expr", -"colonexpr : expr COLON expr", -); -#endif -sub yyclearin { $yychar = -1; } -sub yyerrok { $yyerrflag = 0; } -$YYSTACKSIZE = $YYSTACKSIZE || $YYMAXDEPTH || 500; -$YYMAXDEPTH = $YYMAXDEPTH || $YYSTACKSIZE || 500; -$yyss[$YYSTACKSIZE] = 0; -$yyvs[$YYSTACKSIZE] = 0; -sub YYERROR { ++$yynerrs; &yy_err_recover; } -sub yy_err_recover -{ - if ($yyerrflag < 3) - { - $yyerrflag = 3; - while (1) - { - if (($yyn = $yysindex[$yyss[$yyssp]]) && - ($yyn += $YYERRCODE) >= 0 && - $yycheck[$yyn] == $YYERRCODE) - { -#if YYDEBUG - print "yydebug: state $yyss[$yyssp], error recovery shifting", - " to state $yytable[$yyn]\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - next yyloop; - } - else - { -#if YYDEBUG - print "yydebug: error recovery discarding state ", - $yyss[$yyssp], "\n" if $yydebug; -#endif - return(1) if $yyssp <= 0; - --$yyssp; - --$yyvsp; - } - } - } - else - { - return (1) if $yychar == 0; -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $YYMAXTOKEN) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $yystate, error recovery discards ", - "token $yychar ($yys)\n"; - } -#endif - $yychar = -1; - next yyloop; - } -0; -} # yy_err_recover - -sub yyparse -{ -#ifdef YYDEBUG - if ($yys = $ENV{'YYDEBUG'}) - { - $yydebug = int($1) if $yys =~ /^(\d)/; - } -#endif - - $yynerrs = 0; - $yyerrflag = 0; - $yychar = (-1); - - $yyssp = 0; - $yyvsp = 0; - $yyss[$yyssp] = $yystate = 0; - -yyloop: while(1) - { - yyreduce: { - last yyreduce if ($yyn = $yydefred[$yystate]); - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; }; - print "yydebug: state $yystate, reading $yychar ($yys)\n"; - } -#endif - } - if (($yyn = $yysindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { -#if YYDEBUG - print "yydebug: state $yystate, shifting to state ", - $yytable[$yyn], "\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - $yychar = (-1); - --$yyerrflag if $yyerrflag > 0; - next yyloop; - } - if (($yyn = $yyrindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { - $yyn = $yytable[$yyn]; - last yyreduce; - } - if (! $yyerrflag) { - &yyerror('syntax error'); - ++$yynerrs; - } - return(1) if &yy_err_recover; - } # yyreduce -#if YYDEBUG - print "yydebug: state $yystate, reducing by rule ", - "$yyn ($yyrule[$yyn])\n" if $yydebug; -#endif - $yym = $yylen[$yyn]; - $yyval = $yyvs[$yyvsp+1-$yym]; - switch: - { -if ($yyn == 1) { -#line 29 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; return 1; -last switch; -} } -if ($yyn == 2) { -#line 30 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; return "s,"; -last switch; -} } -if ($yyn == 3) { -#line 33 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 4) { -#line 34 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 5) { -#line 37 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 6) { -#line 38 "expr_parse.y" -{ $yyval = [ "%array", @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 7) { -#line 39 "expr_parse.y" -{ $yyval = [ "u+", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 8) { -#line 40 "expr_parse.y" -{ $yyval = [ "u-", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 9) { -#line 41 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 10) { -#line 43 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; -last switch; -} } -if ($yyn == 11) { -#line 44 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 12) { -#line 45 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 13) { -#line 46 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 14) { -#line 47 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 15) { -#line 48 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 16) { -#line 49 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 17) { -#line 50 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 18) { -#line 51 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 19) { -#line 52 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 20) { -#line 53 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 21) { -#line 54 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 22) { -#line 57 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 23) { -#line 59 "expr_parse.y" -{ my ($type1, $val1) = @{$yyvs[$yyvsp-2]}; - my ($type2, $val2) = @{$yyvs[$yyvsp-0]}; - $yyval = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - -last switch; -} } -if ($yyn == 24) { -#line 64 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 25) { -#line 66 "expr_parse.y" -{ $yyval = [ "%do", $yyvs[$yyvsp-2], @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 26) { -#line 68 "expr_parse.y" -{ $yyval = [ "%do", [ "%const", @{$yyvs[$yyvsp-2]} ], @{$yyvs[$yyvsp-0]} ]; - -last switch; -} } -if ($yyn == 27) { -#line 72 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 28) { -#line 73 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 29) { -#line 76 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 30) { -#line 80 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 31) { -#line 82 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-6], $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 32) { -#line 85 "expr_parse.y" -{ $yyval = [ "%var", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 33) { -#line 86 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 34) { -#line 87 "expr_parse.y" -{ $yyval = [ "%call", $yyvs[$yyvsp-3], @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 35) { -#line 90 "expr_parse.y" -{ $yyval = []; -last switch; -} } -if ($yyn == 36) { -#line 91 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 37) { -#line 94 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 38) { -#line 95 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 39) { -#line 98 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 40) { -#line 99 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 41) { -#line 100 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 42) { -#line 103 "expr_parse.y" -{ $yyval = [ "%namedarg", $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 43) { -#line 106 "expr_parse.y" -{ $yyval = [ "%colon", "", "" ]; -last switch; -} } -if ($yyn == 44) { -#line 107 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-1], "" ]; -last switch; -} } -if ($yyn == 45) { -#line 108 "expr_parse.y" -{ $yyval = [ "%colon", "", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 46) { -#line 109 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-2], $yyvs[$yyvsp-1] ]; -last switch; -} } -#line 624 "y.tab.pl" - } # switch - $yyssp -= $yym; - $yystate = $yyss[$yyssp]; - $yyvsp -= $yym; - $yym = $yylhs[$yyn]; - if ($yystate == 0 && $yym == 0) - { -#if YYDEBUG - print "yydebug: after reduction, shifting from state 0 ", - "to state $YYFINAL\n" if $yydebug; -#endif - $yystate = $YYFINAL; - $yyss[++$yyssp] = $YYFINAL; - $yyvs[++$yyvsp] = $yyval; - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $YYFINAL, reading $yychar ($yys)\n"; - } -#endif - } - return(0) if $yychar == 0; - next yyloop; - } - if (($yyn = $yygindex[$yym]) && ($yyn += $yystate) >= 0 && - $yyn <= $#yycheck && $yycheck[$yyn] == $yystate) - { - $yystate = $yytable[$yyn]; - } else { - $yystate = $yydgoto[$yym]; - } -#if YYDEBUG - print "yydebug: after reduction, shifting from state ", - "$yyss[$yyssp] to state $yystate\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate; - $yyvs[++$yyvsp] = $yyval; - } # yyloop -} # yyparse -#line 112 "expr_parse.y" - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; -#line 794 "y.tab.pl" diff --git a/Tools/F_scripts/f90doc/expr_parse.y b/Tools/F_scripts/f90doc/expr_parse.y deleted file mode 100644 index 94070cfc768..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.y +++ /dev/null @@ -1,234 +0,0 @@ -%{ -package expr_parse; - -# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -# Portion of line left to parse -$expr_parse::left = ""; -%} - -%token COMMA LPAREN RPAREN NOT OR AND EQV NEQV COMPARISON DBLSLASH PERCENT -%token PLUS MINUS UPLUS UMINUS ASTERIK SLASH DBLASTERIK CONST NAME COLON -%token LARRAY RARRAY EQUALS - -%left EQV NEQV -%left OR -%left AND -%nonassoc NOT -%nonassoc COMPARISON -%left DBLSLASH -%left PLUS MINUS -%nonassoc UPLUS UMINUS -%left ASTERIK SLASH -%right DBLASTERIK -%left PERCENT - -%% - -expr_with_abort: expr { $$ = $1; return 1; } - | expr COMMA { $$ = $1; return "s,"; } - -expr: - CONST { $$ = [ "%const", @{$1} ]; } - | expr_without_const { $$ = $1; } - -expr_without_const: - chain { $$ = $1; } - | LARRAY array RARRAY { $$ = [ "%array", @{$2} ]; } - | PLUS expr %prec UPLUS { $$ = [ "u+", $2 ]; } - | MINUS expr %prec UMINUS { $$ = [ "u-", $2 ]; } - | NOT expr { $$ = [ $1, $2 ]; } - | LPAREN potential_complex_or_implied_do RPAREN - { $$ = $2; } - | expr DBLASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr ASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr SLASH expr { $$ = [ $2, $1, $3 ]; } - | expr PLUS expr { $$ = [ $2, $1, $3 ]; } - | expr MINUS expr { $$ = [ $2, $1, $3 ]; } - | expr DBLSLASH expr { $$ = [ $2, $1, $3 ]; } - | expr COMPARISON expr { $$ = [ $2, $1, $3 ]; } - | expr AND expr { $$ = [ $2, $1, $3 ]; } - | expr OR expr { $$ = [ $2, $1, $3 ]; } - | expr EQV expr { $$ = [ $2, $1, $3 ]; } - | expr NEQV expr { $$ = [ $2, $1, $3 ]; } - -potential_complex_or_implied_do: - CONST { $$ = [ "%const", @{$1} ]; } - | CONST COMMA CONST - { my ($type1, $val1) = @{$1}; - my ($type2, $val2) = @{$3}; - $$ = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - } - | expr_without_const { $$ = $1; } - | expr_without_const COMMA do_args - { $$ = [ "%do", $1, @{$3} ]; } - | CONST COMMA do_args - { $$ = [ "%do", [ "%const", @{$1} ], @{$3} ]; - } - -array: - array COMMA array_piece { $$ = [ @{$1}, $3 ]; } - | array_piece { $$ = [ $1 ]; } - -array_piece: - expr { $$ = $1; } -# | implied_do is handled within expr - -do_args: - NAME EQUALS expr COMMA expr { $$ = [ $1, $3, $5 ]; } - | NAME EQUALS expr COMMA expr COMMA expr - { $$ = [ $1, $3, $5, $7 ]; } - -chain: - NAME { $$ = [ "%var", $1 ]; } - | chain PERCENT NAME { $$ = [ $2, $1, $3 ]; } - | chain LPAREN exprlist RPAREN { $$ = [ "%call", $1, @{$3} ]; } - -exprlist: - { $$ = []; } - | exprlist_ne { $$ = $1; } - -exprlist_ne: - exprlist_ne COMMA argument { $$ = [ @{$1}, $3 ]; } - | argument { $$ = [ $1 ]; } - -argument: - expr { $$ = $1; } - | colonexpr { $$ = $1; } - | namedargument { $$ = $1; } - -namedargument: - NAME EQUALS expr { $$ = [ "%namedarg", $1, $3 ]; } - -colonexpr: - COLON { $$ = [ "%colon", "", "" ]; } - | expr COLON { $$ = [ "%colon", $1, "" ]; } - | COLON expr { $$ = [ "%colon", "", $2 ]; } - | expr COLON expr { $$ = [ "%colon", $1, $2 ]; } - -%% - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; diff --git a/Tools/F_scripts/f90doc/f90doc b/Tools/F_scripts/f90doc/f90doc deleted file mode 100755 index 0afe6dafe73..00000000000 --- a/Tools/F_scripts/f90doc/f90doc +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env perl -eval 'exec perl $0 ${1+"$@"}' - if 0; -warn ("Perl 5 not detected, likely a big problem") if $] < 5.0; -warn "Less than Perl 5.003. You may witness mysterious segmentation faults." - if $] < 5.003; - -use strict; - -BEGIN { - my $zero = $0; - while (-l $zero) { - my $nextzero = readlink $zero; - if (substr ($nextzero, 0, 1) eq "/") { - $zero = $nextzero; - } elsif ($zero =~ m#^(.*)/#) { - $zero = "$1/$nextzero"; - } else { - $zero = $nextzero; - } - } - if ($zero =~ m#(.*)/\w+#) { - push @INC, "$1/../common/", $1; - } else { - push @INC, "../common/", "."; - } -} - -require "htmling.pl"; -require "stmts.pl"; -require "utils.pl"; -#require "expr_parse.pl"; -#require "typing.pl"; - -#################### - -if (! @ARGV) { - print <$part in module $1"); - } else { - push (@::see_list, "module $1"); - } - } elsif ($macro =~ /^author\s+/i) { - push (@::authors, $'); - } elsif ($macro =~ /^version\s+/i) { - die "Two versions in a single !! block" if $::version_num; - $::version_num = $'; - } else { - die "Unrecognized macro $macro"; - } -} diff --git a/Tools/F_scripts/f90doc/htmling.pl b/Tools/F_scripts/f90doc/htmling.pl deleted file mode 100644 index 956513244d9..00000000000 --- a/Tools/F_scripts/f90doc/htmling.pl +++ /dev/null @@ -1,376 +0,0 @@ -package htmling; - -use strict; - -### CONSTANTS -$htmling::dblspace = " "; -$htmling::indentspace = $htmling::dblspace x 2; -$htmling::headerspace = $htmling::indentspace; -$htmling::comment_indent = $htmling::indentspace x 2; - -### PUBLIC GLOBALS -$htmling::comments_type = "smart"; -$htmling::suppress_calls = 0; -$htmling::calls_make_links = 0; -$htmling::html_filenames_original_case = 0; - -### GLOBALS -$htmling::htmlfile = ""; -$htmling::indent = 0; - -# Return the name of the HTML file for the specified PROGRAM or MODULE -sub html_filename { - my ($name) = @_; - $name = lc $name unless $htmling::html_filenames_original_case; - return $name . ".html"; -} - -# This is the main calling point from f90doc. -# Takes all top-level objects: programs, subroutines, functions, and modules. -# Warns if given something else. -sub do_toplevel { - my ($top, $outfile) = @_; - - my $type = $top->{'type'}; - unless ($type eq 'module' || $type eq 'subroutine' || $type eq 'function' || - $type eq 'program') { - warn "Warning: Unrecognized top-level object $type will not be documented.\n"; - return; - } - - # A positive-length name. Necessary because programs may not have names. - if (defined $outfile) { - $htmling::htmlfile = $outfile; - } else { - $htmling::htmlfile = html_filename ( - ($top->{'name'} eq '' ? $type : $top->{'name'})); - } - print "Generating $htmling::htmlfile...\n"; - open OUT, ">$htmling::htmlfile"; - - print OUT "\n"; - print OUT "\n"; - print OUT " $type $top->{'name'} (generated by f90doc) \n"; - print OUT "\n"; - print OUT "

", ucfirst ($type), " $top->{'name'}

\n"; - print OUT "
$type $top->{'name'}\n";
-
-  list_uses (@{$top->{'uses'}});
-  list_calls (1, keys %{$top->{'calls'}}) if exists $top->{'calls'};
-  list_html ("Types", map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Variables", map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Interfaces", map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Subroutines and functions", map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}));
-
-  print OUT "\nend $type $top->{'name'}\n";
-  do_comments ($top->{'comments'}, 1);
-
-  my @list;
-  @list = map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}});
-  print OUT "\n

Description of Types

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Variables

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Interfaces

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Subroutines and Functions

\n" if @list; - do_html (@list); - - print OUT "\n"; - close OUT; -} - -sub list_uses { - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! Uses\n"; - my ($use); - foreach $use (@_) { - my ($module, $extra) = @$use; - $extra = defined $extra ? ", $extra" : ""; - print OUT "${htmling::indentspace}", - "use $module$extra\n"; - } - } -} - -sub list_calls { - return if $htmling::suppress_calls; - my ($big, @calls) = (@_); - if (@calls) { - @calls = sort @calls; - @calls = map { "$_" } @calls - if $htmling::calls_make_links; - if ($big) { - print OUT join ("\n", - "\n${htmling::indentspace}${htmling::headerspace}! Calls", - (map { "${htmling::indentspace}call $_" } @calls), ""); - } else { - print OUT "${htmling::indentspace}! Calls: ", join (", ", @calls), "\n"; - } - } -} - -sub list_html { - my ($title) = shift; - - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! $title\n"; - my ($struct); - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - my ($href) = "$name"; - print OUT $htmling::indentspace; - if ($type eq "var") { - print OUT var2str ($struct, $href) . "\n"; - } elsif ($type eq "subroutine" || - $type eq "function") { - print OUT join (" ", attriblist ($struct), ""); - print OUT typing::type_to_f90 ($struct->{'rtype'}) . " " - if exists $struct->{'rtype'}; - my $flag; - for $flag ('recursive', 'elemental', 'pure') { - print OUT "$flag " if $struct->{$flag}; - } - print OUT "$type $href"; - print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")"; - print OUT " result ($struct->{'result'})" - if exists $struct->{'result'} && !exists $struct->{'rtype'}; - print OUT "\n"; - } else { - print OUT join (" ", attriblist ($struct), ""); - print OUT "$type $href\n"; - } - } - } -} - -sub do_html { - if (@_) { - my ($struct); - - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - if (! $htmling::indent) { - print OUT "

$name

\n"; - print OUT "
";
-         }
-
-         print OUT $htmling::indentspace x $htmling::indent;
-         if ($type eq "var") {
-             print OUT var2str ($struct) . "\n";
-         } elsif ($type eq "mprocedure") {
-             die "do_html: bare module procedure $struct->{'name'} (no enclosing module)"
-                 unless exists $struct->{'bind'};
-             print OUT
-                 "module procedure {'bind'}->{'type'}_" .
-                 lc ($struct->{'name'}) . "\">$name\n";
-         } elsif ($type eq "subroutine" || $type eq "function") {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT typing::type_to_f90 ($struct->{'rtype'}) . " "
-                 if exists $struct->{'rtype'} && !exists $struct->{'result'};
-             my $flag;
-             for $flag ('recursive', 'elemental', 'pure') {
-               print OUT "$flag " if $struct->{$flag};
-             }
-             print OUT "$type $name";
-             print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")";
-             print OUT " result ($struct->{'result'})"
-               if exists $struct->{'result'};
-             print OUT "\n";
-         } else {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT "$type $name\n";
-         }
-
-         $htmling::indent++;
-
-         if ($type eq "var" || $type eq "mprocedure") {
-         } elsif ($type eq "type") {
-           print OUT $htmling::indentspace x $htmling::indent, "private\n"
-             if exists $struct->{'privatetype'};
-           print OUT $htmling::indentspace x $htmling::indent, "sequence\n"
-             if exists $struct->{'sequencetype'};
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "interface") {
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "subroutine" || $type eq "function") {
-           my @interest = @{$struct->{'parms'}};
-           push @interest, $struct->{'result'} if exists $struct->{'result'};
-           push @interest, $name
-             if $type eq "function" && !exists $struct->{'result'} &&
-               !exists $struct->{'rtype'};
-           my $arg;
-           foreach $arg (@interest) {
-             my (@things) = values %{$struct->{'contains'}->{lc $arg}};
-             die "Confused by/no declaration for parameter $arg of $type $name"
-               if scalar @things != 1;
-             do_html ($things[0]);
-           }
-         } else {
-           die "do: I don't know what a $type is";
-         }
-
-         list_calls (0, keys %{$struct->{'calls'}}) if exists $struct->{'calls'};
-
-         $htmling::indent--;
-
-         if ($type ne "var" && $type ne "mprocedure") {
-            print OUT $htmling::indentspace x $htmling::indent . "end $type $name\n";
-         }
-
-         do_comments ($struct->{'comments'}, ! $htmling::indent);
-      }
-   }
-}
-
-# Pass comments and a flag saying if you want to end the current 
 block.
-sub do_comments {
-   my ($comments, $endpre) = @_;
-   if ($comments eq "") {
-      print OUT "
\n" if $endpre; - return; - } - - #print OUT "\n" unless $htmling::indent; - - if ($htmling::comments_type eq "preformatted") { - my ($s) = $htmling::indentspace x $htmling::indent . $htmling::comment_indent; - $comments =~ s/^/$s/m if $htmling::indent; - $comments =~ s/^\n*//s; - $comments =~ s/\n*$//s; - print OUT $comments, "\n"; - print OUT "
\n" if $endpre; - } else { - print OUT "
\n"; - print OUT "
\n" if $htmling::indent; - if ($htmling::comments_type eq "html") { - } elsif ($htmling::comments_type eq "smart") { - my @newcomments = (); - my $verbmode = 0; - my @listmode = (); - my $line; - foreach $line (split ("\n", $comments)) { - if ($verbmode) { - if ($line =~ /^>/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } elsif ($line =~ /^"; - } elsif ($line =~ /^v/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } else { - push @newcomments, $line; - } - next; - } - - # _italic_ and *bold* - while ($line =~ /(\A|\W)_(\w|\w.*?\w)_(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/_/ /g; - $line = $left . $mid . $right; - } - while ($line =~ /(\A|\W)\*(\w|\w.*?\w)\*(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/\*/ /g; - $line = $left . $mid . $right; - } - - # Lists - if ($line =~ /^( *)-/) { - if (! @listmode || length ($1) > $listmode[$#listmode]) { - push @listmode, length $1; - push @newcomments, $1 . "
    "; - } else { - while ($listmode[$#listmode] != length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . "
"; - pop @listmode; - die "Unindented to invalid position in `$line'" - unless @listmode; - } - } - push @newcomments, $1 . "
  • " . substr ($line, length ($&)); - } elsif ($line =~ /^>/) { - #warn "Verbatim mode started in list mode" if @listmode; - $verbmode = 1; - substr ($line, 0, 1) = " "; - push @newcomments, "
    " . $line;
    -            # Ignore $line =~ /^$line
    "; - } elsif ($line =~ /^\s*$/) { - push @newcomments, "

    "; - } elsif (@listmode) { - $line =~ /^( *)(\t?)/; - warn "Tabs have strange effects on indentation detection" - if length ($2) > 0; - while (@listmode && $listmode[$#listmode] > length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . ""; - pop @listmode; - } - push @newcomments, $line; - } else { - push @newcomments, $line; - } - } - my $list; - foreach $list (@listmode) { - push @newcomments, " " x $list . ""; - } - $comments = join ("\n", @newcomments); - } else { - die "Unsupported comments type `$htmling::comments_type'"; - } - $comments =~ s/

    \n(

    \n)+/

    \n/g; - $comments =~ s/

    \n$//; - $comments =~ s/^

    \n//; - $comments =~ s/

    /

    /g if $htmling::indent; - print OUT $comments . "\n"; - print OUT "
  • \n" if $htmling::indent; - print OUT "
    " unless $endpre;
    -   }
    -}
    -
    -sub var2str {
    -    my ($var, $href) = @_;
    -
    -    my ($typestr) = typing::type_to_f90 ($var->{'vartype'});
    -    my ($initial) = (!exists $var->{'initial'} ? ""
    -          : " $var->{'initop'} " . typing::expr_to_f90 ($var->{'initial'}));
    -    $href = txt2html ($var->{'name'}) unless $href;
    -    return $typestr . join (", ", "", attriblist ($var)) . " :: $href$initial";
    -}
    -
    -sub txt2html {
    -    my ($txt) = @_;
    -    $txt =~ s//>/g;
    -    return $txt;
    -}
    -
    -sub attriblist {
    -    my ($struct) = @_;
    -    my @attribs = ();
    -
    -    push @attribs, $struct->{'vis'} if exists $struct->{'vis'};
    -    push @attribs, "optional" if exists $struct->{'optional'};
    -    push @attribs, @{$struct->{'tempattribs'}}
    -        if exists $struct->{'tempattribs'};
    -
    -    return @attribs;
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/stmts.pl b/Tools/F_scripts/f90doc/stmts.pl
    deleted file mode 100644
    index 83d20a300af..00000000000
    --- a/Tools/F_scripts/f90doc/stmts.pl
    +++ /dev/null
    @@ -1,891 +0,0 @@
    -package stmts;
    -
    -use strict;
    -
    -require "expr_parse.pl";
    -require "typing.pl";
    -require "utils.pl";
    -
    -#########################################################################
    -# PUBLIC GLOBALS
    -
    -# Set to a reference to a routine to take !! comments if !! comments are
    -# to be caught.
    -$stmts::bangbang = "";
    -
    -# Set to a reference to a routine to return accumulated comments if !! comments
    -# are caught.  You should reset them after each time you call read_line or
    -# read_stmt.
    -$stmts::comments = "";
    -
    -# Set this to disable warnings.  Don't use this for a compiler!  Suitable for
    -# something like f90doc though.  This shouldn't be used once stmts supports
    -# all Fortran 90 statements and attributes; until then, it's pretty much
    -# needed; after then, it should be removed.
    -$stmts::diable_warns = 0;
    -
    -# Set this to use fixed-form Fortran, like good old Fortran 77.
    -$stmts::fixed_form = 0;
    -
    -#########################################################################
    -# PRIVATE GLOBALS
    -
    -# A "left-over" piece of a statement is stored here when semi-colons are
    -# encountered.
    -$stmts::leftover = "";
    -
    -# Number of opened files.
    -$stmts::nfile = 0;
    -
    -# List of string's values.
    -@stmts::strings = ();
    -
    -# List of structure pointers that we're currently nested in.
    -# topnest stores the top of the stack.
    -@stmts::nesting = ();
    -$stmts::topnest = undef;
    -
    -# List of structure pointers that we're currently nested in, but for a
    -# specified type.
    -%stmts::nesting_by = ();
    -
    -#########################################################################
    -# ROUTINES
    -
    -#####
    -# Reads an entire file, and returns all the top-level structures found.
    -# If specified, a given function will be called after every statement
    -# (usually this is for resetting !! comments and such).
    -#####
    -sub read_file {
    -  my ($filename, $every_stmt) = @_;
    -  stmts::open_file ($filename);
    -
    -  my ($stmt, $struct, @rval);
    -  my @toplevel = ();
    -  while ((@rval = stmts::read_stmt ()) [0]) {
    -    push @toplevel, $rval[1] if !defined $stmts::topnest && ref $rval[1];
    -    &$every_stmt () if defined $every_stmt;
    -  }
    -
    -  return @toplevel;
    -}
    -
    -#####
    -# Starts reading the specified filename.
    -#####
    -sub open_file {
    -   my ($filename) = @_;
    -   $stmts::FILE = "";
    -
    -   open IN, $filename
    -     or die "Couldn't open $filename";
    -   $stmts::{'FILE' . $stmts::nfile} = $stmts::{'IN'};
    -}
    -
    -#####
    -# Cleans up from reading the current file.
    -# This is automatically called by read_line, so most don't have to worry
    -# about it.
    -# Returns false if there are no files left.
    -#####
    -sub close_file {
    -   close IN;
    -   $stmts::nfile--;
    -   if ($stmts::nfile > 0) {
    -      # CHECK--does this still do the desired thing, in light of open_file?
    -      $stmts::{'IN'} = $stmts::{'FILE' . $stmts::nfile};
    -      return 1;
    -   } else {
    -      # Clean up strings.
    -      @stmts::strings = ();
    -      return 0;
    -   }
    -}
    -
    -#####
    -# Reads a line of Fortran 90 doing whatever it takes.  This may involve
    -# reading multiple lines from the current file, walking into files, etc.
    -# INCLUDE is parsed at this level.
    -# Note that the returned string may have various cases (lc isn't called).
    -#####
    -sub read_line {
    -
    -ALLOVERAGAIN:
    -  my $line;
    -  if ($stmts::leftover ne '') {
    -    $line = $stmts::leftover;
    -    $stmts::leftover = '';
    -  } else {
    -    $line = ;
    -    until (defined $line) {
    -      return "" unless close_file ();
    -      $line = ;
    -    }
    -    chomp $line;
    -
    -    substr ($line, 0, 1) = '!' if $stmts::fixed_form && $line =~ /^\S/;
    -  }
    -
    -  # This is used for fixed-form continuations.
    -  my $lastlen = length $line;
    -
    -  my $continue = 0;
    -
    -  while (1) {
    -    # Grab doubled comments (!!) if requested.
    -    if ($stmts::bangbang && $line =~ /^([^"'!]|('[^']*')|("[^"]*"))*(!!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -      &$stmts::bangbang ($4);
    -    }
    -
    -    # Delete comments.
    -    elsif ($line =~ /^([^"'!]|(\'[^']*')|("[^"]*"))*(!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -    }
    -
    -    # Fixed-form continuations.
    -    if ($stmts::fixed_form) {
    -
    -      # Check next line for continuation mark.
    -      $stmts::leftover = ;
    -      $stmts::leftover = '' unless defined $stmts::leftover;
    -      chomp $stmts::leftover;
    -      substr ($stmts::leftover, 0, 1) = '!' if $stmts::leftover =~ /^\S/;
    -      if ($stmts::leftover =~ /^\s....\S/) {
    -
    -        # Pad previous line with spaces if it had less than 72 characters.
    -        $line .= ' ' x (72-$lastlen) if $lastlen < 72;
    -
    -        # Add next (continuation) line to the line.
    -        $line .= substr ($stmts::leftover, 6);
    -        $lastlen = length $stmts::leftover;
    -        
    -        # Continue on to check the next line.
    -        $stmts::leftover = '';
    -        next;
    -      }
    -      
    -    # Free-form continuations.
    -    } elsif ($continue || $line =~ /&\s*$/) {
    -      $line = $` if $line =~ /&\s*$/;
    -      my $rest = ;
    -      chomp $rest;
    -      $rest = $' if $rest =~ /^\s*&/;
    -      $line = "$line$rest";
    -      # Blank lines don't stop the continuation.
    -      $continue = ($rest =~ /^\s*(?:!.*)?$/);
    -      next;
    -    }
    -
    -    last;
    -  }
    -
    -  # Semicolons.
    -  if ($line =~ /^([^;]*);(.*)$/) {
    -    $line = $1;
    -    if ($stmts::leftover eq '') {
    -      $stmts::leftover = $2;
    -    } else {
    -      $stmts::leftover .= ";$2";
    -    }
    -  }
    -
    -  # Replace strings to avoid confusion.
    -  my @quotes;
    -  while ($line =~ / " ([^"]|"")* " | ' ([^']|'')* ' /xg) {
    -    push @quotes, [length $`, length $&, $&];
    -  }
    -  for my $quote (reverse @quotes) {
    -    ## Process in reverse order so that $start is preserved despite replacement
    -    my ($start, $length, $string) = @$quote;
    -    push @stmts::strings, $string;
    -    substr ($line, $start, $length) = "\'" . $#stmts::strings . "\'";
    -  }
    -
    -  # Get rid of spaces on either end.
    -  $line = utils::trim ($line);
    -
    -  goto ALLOVERAGAIN if $line eq '';
    -
    -  #print "read line `$line'\n";
    -
    -  return $line;
    -}
    -
    -#####
    -# Returns the physical value for the given string number.
    -#####
    -sub get_string {
    -   my ($n) = @_;
    -   return $stmts::strings[$n];
    -}
    -
    -#####
    -# Reads a Fortran 90 statement from the current input.
    -# Checks for proper nesting, etc., and keeps tracks of what's in what.
    -# Possible results:
    -#    ('?', $the_line)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('module', \%structure)
    -#    ('endmodule', \%structure)
    -#    ('subroutine', \%structure)
    -#    ('endsubroutine', \%structure)
    -#    ('function', \%structure)
    -#    ('endfunction', \%structure)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('type', \%structure)
    -#    ('endtype', \%structure)
    -#    ('interface', \%structure)
    -#    ('endinterface', \%structure)
    -#    ('var', \%struct1, \%struct2, ...)
    -#    ('contains', \%parent)
    -#    ('public', $name1, $name2, ...)          empty means global default
    -#    ('private', $name1, $name2, ...)         empty means global default
    -#    ('optional', $name1, $name2, ...)
    -#    ('call', $arg1, $arg2, ...)              currently args are unparsed
    -#####
    -sub read_stmt {
    -   my ($line) = read_line ();
    -   if (! $line) {
    -      die "File ended while still nested" if @stmts::nesting;
    -      return ("", "");
    -   }
    -
    -   # MODULE PROCEDURE (must be before module)
    -   if ($line =~ /^module\s+procedure\s+(\w.*)$/i) {
    -      die "module procedure outside of interface block" unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "interface" && $stmts::topnest->{'name'} ne "";
    -      my (@list) = split (/\s*,\s*/, utils::trim ($1));
    -      my ($p);
    -      foreach $p (@list) {
    -         die "Invalid module procedure `$p'" unless $p =~ /^\w+$/;
    -         new_struct ({
    -            'type'   => "mprocedure",
    -            'name'   => $p,
    -            hashed_comments ()
    -         });
    -      }
    -      return ("mprocedure", @list);
    -   }
    -
    -   # MODULE/PROGRAM
    -   elsif ($line =~ /^(module|program)(?:\s+(\w+))?$/i) {
    -      die "$1 begun not at top level" if defined $stmts::topnest;
    -      return new_nest ({
    -         'type' => lc $1,
    -         'name' => (defined $2 ? $2 : ''),
    -         hashed_comments ()
    -      });
    -   }
    -
    -   # END MODULE/SUBROUTINE/FUNCTION/PROGRAM/TYPE/INTERFACE, or general END
    -   elsif ($line =~ /^end\s*(?:(module|subroutine|function|program|type|interface)(?:\s+(\w+))?)?$/i) {
    -      die "END statement outside of any nesting" unless defined $stmts::topnest;
    -      my $top = $stmts::topnest;
    -
    -      # We do some special "fixing up" for modules, which resolves named
    -      # references (module procedures) and computes publicity.
    -      #
    -      # Note that end_nest will ensure that the type of thing ended matches
    -      # the thing the user says it is ending, so we don't have to worry about
    -      # that.
    -      if ($top->{'type'} eq "module") {
    -
    -        # Set publicity (visibility) of objects within the module.
    -
    -        # First, the explicitly set ones.
    -        my $name;
    -        foreach $name (@{$top->{'publiclist'}}) {
    -          do_attrib ($name, "vis", 'public', "visibility");
    -        }
    -        foreach $name (@{$top->{'privatelist'}}) {
    -          do_attrib ($name, "vis", 'private', "visibility");
    -        }
    -
    -        # Second, the globally set ones (those obeying the default).
    -        my $obj;
    -        $top->{'defaultvis'} = "public" unless exists $top->{'defaultvis'};
    -        foreach $obj (@{$top->{'ocontains'}}) {
    -          $obj->{'vis'} = $top->{'defaultvis'} unless exists $obj->{'vis'};
    -        }
    -
    -        # Traverse (arbitrarily deeply) nested structures.
    -        sub traverse {
    -          my ($node) = @_;
    -          my $top = $stmts::topnest;   # HAVE NO IDEA WHY THIS IS NEEDED
    -          
    -          # Graduate nested MODULE PROCEDURE (mprocedure) to point to the
    -          # appropriate thing (either a function or a subroutine with that
    -          # name).
    -          if ($node->{'type'} eq "mprocedure") {
    -            die "Couldn't find module procedure $node->{'name'} (nothing with that name in module $top->{'name'})"
    -              unless exists $top->{'contains'}->{lc $node->{'name'}};
    -            
    -            my ($possibles) =
    -              $top->{'contains'}->{lc $node->{'name'}};
    -            die "Couldn't find module procedure $node->{'name'} in module $top->{'name'} (wrong type)"
    -              if !exists $possibles->{'subroutine'}
    -              && !exists $possibles->{'function'};
    -            die "Found both a subroutine and function to match module procedure $node->{'name'} in module $top->{'name'}"
    -              if exists $possibles->{'subroutine'}
    -              && exists $possibles->{'function'};
    -            
    -            if (exists $possibles->{'subroutine'}) {
    -              $node->{'bind'} = $possibles->{'subroutine'};
    -            } else {
    -              $node->{'bind'} = $possibles->{'function'};
    -            }
    -          }
    -
    -          # Recurse.
    -          map { traverse ($_) } @{$node->{'ocontains'}}
    -          if exists $node->{'ocontains'};
    -        }
    -        map { traverse ($_) } @{$top->{'ocontains'}};
    -      }
    -
    -      my @return_val = end_nest ($1, $2);
    -
    -      # Subroutines and functions in interface blocks must be noted at the
    -      # top level.  We do this with "interface" structures with the names
    -      # of the actual contained routines (unless this is already the
    -      # case).  Make sense?
    -      if ($top->{'type'} eq "interface" && $top->{'name'} eq "") {
    -          my $sub;
    -          foreach $sub (@{$top->{'ocontains'}}) {
    -              next if $sub->{'name'} eq $top->{'name'} ||
    -                      $sub->{'type'} eq "mprocedure";
    -
    -              my %copy = %$top;
    -              $copy{'name'} = $sub->{'name'};
    -              new_nest (\%copy);
    -              my $old_within = $sub->{'within'};
    -              new_struct ($sub);
    -              $sub->{'within'} = $old_within;
    -              end_nest ('interface', $sub->{'name'});
    -          }
    -      }
    -
    -      return @return_val;
    -   }
    -
    -   # SUBROUTINE/FUNCTION
    -   elsif ($line =~ /^(?:(.+?)\s+)?(subroutine|function)\s+(\w+)\s*(\([^()]*\))?(?:\s*result\s*\(\s*(\w+)\s*\))?$/i) {
    -      my ($type, $name, $parmstr, $rtype, $result) =
    -         (lc $2, $3,    $4,       $1,     $5);
    -
    -      die "Start of $type $name before `contains' section of $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -          if defined $stmts::topnest && ! $stmts::topnest->{'incontains'} &&
    -             $stmts::topnest->{'type'} ne "interface";
    -      if (exists $stmts::nesting_by{'subroutine'} ||
    -          exists $stmts::nesting_by{'function'}) {
    -         my $n = 0;
    -         $n += scalar @{$stmts::nesting_by{'subroutine'}}
    -            if exists $stmts::nesting_by{'subroutine'};
    -         $n += scalar @{$stmts::nesting_by{'function'}}
    -            if exists $stmts::nesting_by{'function'};
    -#FIXME  #die "Routine nested in routine nested in routine" if $n > 1;
    -      }
    -
    -      $parmstr = "()" unless defined $parmstr;
    -      $parmstr = utils::trim (substr ($parmstr, 1, length ($parmstr) - 2));
    -      my (@parms);
    -      if ($parmstr) {
    -         @parms = split (/\s*,\s*/, $parmstr);
    -         my ($parm);
    -         foreach $parm (@parms) {
    -            die "Parameter `$parm' is not just a word or *"
    -              unless $parm =~ /^\w+|\*$/;
    -            ## * as a final argument allows the calling to specify a statement
    -            ## to jump as an alternative return address.  (Legacy Fortran!)
    -            ## Thanks to Art Olin for this info.
    -         }
    -      } else {
    -         @parms = ();
    -      }
    -
    -      my $struct = {
    -         'type'      => $type,
    -         'name'      => $name,
    -         'parms'     => \@parms,
    -         hashed_comments ()
    -      };
    -      new_nest ($struct);
    -
    -      $struct->{'result'} = $result if defined $result;
    -
    -      $rtype = "" unless defined $rtype;
    -      while ($rtype =~ /(?:^|\s+)(recursive|pure|elemental)$/i ||
    -             $rtype =~ /^(recursive|pure|elemental)(?:\s+|$)/i) {
    -        $rtype = $` . $'; # actually whichever is not blank
    -        $struct->{lc $1} = 1;
    -      }
    -      if ($rtype ne '') {
    -        $struct->{'rtype'} = parse_type ($rtype);
    -        new_struct ({
    -          'type'        => 'var',
    -          'name'        => (defined $result ? $result : $name),
    -          'vartype'     => $struct->{'rtype'},
    -          'comments'    => ''
    -        });
    -      }
    -
    -      return ($type, $struct);
    -   }
    -
    -   # TYPE definition (must go before variable declarations)
    -   elsif ($line =~ /^type(?:\s+|\s*(,.*)?::\s*)(\w+)$/i) {
    -     my $struct = new_nest ({
    -       'type' => 'type',
    -       'name' => $2,
    -       hashed_comments ()
    -     });
    -     if (defined $1) {
    -       my $attrib = utils::trim (substr ($1, 1));
    -       if ($attrib =~ /^(public|private)$/i) {
    -         $struct->{'vis'} = lc $attrib;
    -       } elsif ($attrib) {
    -         warn "Invalid attribute `$attrib' for derived-type declaration--should be just public or private";
    -       }
    -     }
    -     return $struct;
    -   }
    -
    -   # INTERFACE block (for overloading) or statement (for definition of external)
    -   elsif ($line =~ /^interface(?:\s+(\S.+))?$/i) {
    -       return new_nest ({
    -           'type' => 'interface',
    -           'name' => (defined $1 ? $1 : ""),
    -           hashed_comments ()
    -       });
    -   }
    -
    -   # CONTAINS
    -   elsif ($line =~ /^contains$/i) {
    -      die "`contains' found at top level" unless defined $stmts::topnest;
    -      die "`contains' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'incontains'};
    -      die "Multiple `contains' found in same scope"
    -         if $stmts::topnest->{'incontains'};
    -      die "`contains' found in interface definition"
    -         if $stmts::topnest->{'interface'};
    -      $stmts::topnest->{'incontains'} = 1;
    -      return ("contains", $stmts::topnest);
    -   }
    -
    -   # PUBLIC/PRIVATE/SEQUENCE
    -   elsif ($line =~ /^(public|private|sequence)(?=\s+[^=(]|::|$)(\s*::\s*)?/i) {
    -     my ($what, $rest) = (lc $1, $');
    -
    -     if (defined $stmts::topnest && $stmts::topnest->{'type'} eq "type") {
    -       die "public statement not allowed in a type declaration"
    -         if $what eq 'public';
    -       die "$1 cannot be qualified inside type declaration" if $rest;
    -       $stmts::topnest->{$what . 'type'} = 1;
    -       return ($what);
    -     } else {
    -       die "sequence statement only allowed immediately inside type declaration"
    -         if $1 eq 'sequence';
    -
    -       die "$1 statement not immediately inside a module or type declaration"
    -         unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "module";
    -       if ($rest eq "") {  # Unqualified
    -         die "Unqualified $what in addition to unqualified " .
    -           $stmts::topnest->{'defaultvis'}
    -         if exists $stmts::topnest->{'defaultvis'};
    -         $stmts::topnest->{'defaultvis'} = $what;
    -         return ($what);
    -         
    -       } else {  # Qualified
    -         my @namelist = map {
    -           die "Invalid name `$_' specified in $what statement"
    -             unless /^\s*(\w+)(?:\s*(\([^()]+\)))?\s*$/i;
    -           $1 . (defined $2 ? $2 : "");
    -         } (split ',', $rest);
    -         push @{$stmts::topnest->{"${what}list"}}, @namelist;
    -         return ($what, @namelist);
    -       }
    -     }
    -   }
    -
    -    # OPTIONAL
    -    elsif ($line =~ /^optional(\s+|\s*::\s*)((\w|\s|,)+)$/i) {
    -        my $name;
    -        my @namelist = split (/\s*,\s*/, utils::trim ($2));
    -        foreach $name (@namelist) {
    -            do_attrib ($name, "optional", 1, "optional attribute");
    -        }
    -        return ('optional', @namelist);
    -    }
    -
    -   # Variable declarations
    -   elsif ($line =~ /^(integer|real|double\s*precision|character|complex|logical|type)\s*(\(|\s\w|[:,*])/i) {
    -      my ($vartype, $rest) = parse_part_as_type ($line);
    -      my (@attribs, @right);
    -      if ($rest =~ /^(.*)\:\:(.*)/) {
    -         my ($a, $b) = ($1, $2);
    -         @attribs = map (( utils::trim ($_) ), utils::balsplit (",", $a));
    -         @right = map (( utils::trim ($_) ), utils::balsplit (",", $b));
    -      } else {
    -         @attribs = ();
    -         @right = map (( &utils::trim ($_) ), utils::balsplit (",", $rest));
    -      }
    -      my ($r, @structs);
    -      foreach $r (@right) {
    -          my ($rl, $rassign) = &utils::balsplit ("=", $r);
    -          my ($rll, $starpart) = &utils::balsplit ("*", $rl);
    -          if (defined $starpart) {
    -            die "Sorry, I don't support 'character var*kind' yet; use 'character*kind var' instead";
    -          }
    -          $rll =~ /^ (\w+) (\s* \(.*\))? \s* $/x
    -              or die "Invalid variable declaration `$rll'";
    -          my ($name, $dimension) = ($1, $2);
    -          my ($initop, $initial);
    -          if (defined $rassign) {
    -            # implicit lead =
    -            $rassign =~ /^ (>?) \s* (.*) $/x
    -              or die "Invalid variable initialization `= $rassign'";
    -            ($initop, $initial) = ("=" . $1, $2);
    -          }
    -
    -          my $struct;
    -          $struct = {
    -              'type'        => 'var',
    -              'name'        => $name,
    -              'vartype'     => $vartype,
    -              hashed_comments ()
    -          };
    -          if (defined $initial) {
    -            $struct->{'initop'} = $initop;
    -            $struct->{'initial'} = expr_parse::parse_expr ($initial);
    -          }
    -          new_struct ($struct);
    -          push @structs, $struct;
    -
    -          my @attribs_copy = @attribs;
    -          push @attribs_copy, "dimension $dimension" if defined $dimension;
    -
    -          my ($attrib, @tempattribs);
    -          foreach $attrib (@attribs_copy) {
    -              if ($attrib =~ /^(public|private)$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{'vis'} = $attrib;
    -              } elsif ($attrib =~ /^optional$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{$attrib} = 1;
    -              } elsif ($attrib) {
    -                  warn "Unrecognized attribute `$attrib'"
    -                      unless $stmts::disable_warns;
    -                  push @tempattribs, $attrib;
    -              }
    -          }
    -
    -          $struct->{'tempattribs'} = \@tempattribs;
    -      }
    -
    -      return ('var', @structs);
    -   }
    -
    -   # USE
    -   elsif ($line =~ /^use\s+(\w+)($|,\s*)/i) {
    -      die "`use' found at top level" unless defined $stmts::topnest;
    -      die "`use' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'uses'};
    -      my $extra = length $' ? $' : undef;
    -      push @{$stmts::topnest->{'uses'}}, [$1, $extra];
    -
    -      return ('use', $1, $extra);
    -   }
    -   
    -   # CALL or IF (...) CALL [hack--xxx]
    -   elsif ($line =~ /^(?:if\s*\(.*\)\s*)?call\s+(\w+)\s*(?:\(\s*(.*?)\s*\))?$/i) {
    -      die "`call' found at top level" unless defined $stmts::topnest;
    -      die "`call' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'calls'};
    -      $stmts::topnest->{'calls'}->{$1} = 1;
    -      my @args = ();
    -      @args = split /\s*,\s*/, $2 if defined $2;
    -      return ('call', @args);
    -   }
    -   
    -   # Unrecognized statement
    -   else {
    -      if ($line =~ /^\w+/) {
    -         warn "Unrecognized statement beginning with word $&" unless $stmts::disable_warns;
    -      } else {
    -         warn "Unrecognized statement" unless $stmts::disable_warns;
    -      }
    -      return ('?', $line);
    -   }
    -}
    -
    -#####
    -# Returns a list that would fit right into a hash table you're making.  If
    -# there are no comments, returns the empty list.  The entry is called
    -# 'comments'.
    -#####
    -sub hashed_comments {
    -   if ($stmts::comments) {
    -      return ( 'comments', &$stmts::comments () );
    -   } else {
    -      return ();
    -   }
    -}
    -
    -#####
    -# Makes note of a new structure.  Called by new_nest, for example.
    -#####
    -sub new_struct {
    -   my ($struct) = @_;
    -   my $type = $struct->{'type'};
    -
    -   die "Basic structure must be found at a nesting level"
    -     unless defined $stmts::topnest;
    -
    -   if (exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}})) {
    -      die "Redefinition of $type $struct->{'name'} in $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -         if exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type});
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type} = $struct;
    -   } else {
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}} =
    -         { $type => $struct };
    -   }
    -   push @{$stmts::topnest->{'ocontains'}}, $struct;
    -   $struct->{'within'} = $stmts::topnest;
    -}
    -
    -#####
    -# Starts a new nesting level represented by the given structure.  The
    -# structure must define the 'type' and 'name' entries.  You should not
    -# define the 'contains' or 'defaultvis' entry.
    -#####
    -sub new_nest {
    -   my ($struct) = @_;
    -   my ($type) = $struct->{'type'};
    -
    -   $struct->{'contains'} = { };
    -   $struct->{'ocontains'} = [ ];
    -
    -   # Program unit
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "module" || $type eq "program") {
    -     $struct->{'incontains'} = 0;
    -     $struct->{'uses'} = [ ];
    -     $struct->{'interface'} = 0 if $type eq "subroutine" || $type eq "function";
    -   }
    -
    -   # Program unit with code
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "program") {
    -     $struct->{'calls'} = { };
    -   }
    -
    -   if (defined $stmts::topnest) {
    -      my ($toptype) = $stmts::topnest->{'type'};
    -      if ($toptype eq "interface" && ($struct->{'type'} eq "subroutine" || $struct->{'type'} eq "function")) {
    -         $struct->{'interface'} = 1;
    -      } else {
    -         die "Nesting in $toptype not allowed" unless $toptype eq "subroutine" || $toptype eq "function" || $toptype eq "module" || $toptype eq "program";
    -      }
    -      new_struct ($struct) unless $struct->{'name'} eq "";
    -   }
    -   push @stmts::nesting, $struct;
    -   if (exists ($stmts::nesting_by{$type})) {
    -      push @{$stmts::nesting_by{$type}}, $struct;
    -   } else {
    -      $stmts::nesting_by{$type} = [ $struct ];
    -   }
    -   $stmts::topnest = $struct;
    -   return ( $type, $struct );
    -}
    -
    -#####
    -# Ends the current nesting level.  Optionally, you can pass the 'type' that
    -# it's supposed to be as the first argument.  Optionally, you can pass the
    -# 'name' it should have after that (as the second argument).
    -#####
    -sub end_nest {
    -  my ($type, $name) = @_;
    -  $type = lc $type if defined $type;
    -  unless (defined $stmts::topnest) {
    -    if (defined $name && defined $type) {
    -      die "Ended $type $name at top level";
    -    } elsif (defined $type) {
    -      die "Ended unnamed $type at top level";
    -    } else {
    -      die "END statement at top level";
    -    }
    -  }
    -  my ($struct) = pop @stmts::nesting;
    -  die "Ended $type while in $struct->{'type'} $struct->{'name'}"
    -    if defined $type && $type ne $struct->{'type'};
    -  die "Ended $name while in $struct->{'type'} $struct->{'name'}"
    -    if defined $name && $name !~ /^\Q$struct->{'name'}\E$/i;
    -  if (@stmts::nesting) {
    -    $stmts::topnest = $stmts::nesting[$#stmts::nesting];
    -  } else {
    -    $stmts::topnest = undef;
    -  }
    -  pop @{$stmts::nesting_by{$struct->{'type'}}};
    -  return ( "end" . (defined $type ? $type : ''), $struct );
    -}
    -
    -#####
    -# Parses the basic type that prefixes the given string.
    -# Returns (parsed type, string portion remaining).
    -#####
    -sub parse_part_as_type {
    -  my ($str) = @_;
    -
    -  $str =~ /^integer|real|double\s*precision|character|complex|logical|type/i
    -    or die "parse_part_as_type: Invalid input `$str'";
    -  my ($base, $rest) = ($&, $');
    -
    -  my $level = 0;
    -  ## Wait till we are outside of all parens and see a letter, colon, or comma.
    -  while ($rest =~ /[()a-zA-Z_:,]/g) {
    -    if ($& eq '(') {
    -      $level++;
    -    } elsif ($& eq ')') {
    -      $level--;
    -      die "Unbalanced parens (too many )'s)" if $level < 0;
    -    } elsif ($level == 0) {
    -      return (parse_type ($base . $`), $& . $');
    -    }
    -  }
    -  
    -  die "Couldn't split into type and rest for `$str'";
    -
    -# Some old, presumably less-efficient code:
    -#  my ($level, $len) = (0, length ($str));
    -#  my ($i, $c);
    -#  for ($i = length ($&); $i < $len; $i++) {
    -#    $c = substr ($str, $i, 1);
    -#    if ($c eq "(") {
    -#      $level++;
    -#    } elsif ($c eq ")") {
    -#      $level--;
    -#      die "Unbalanced parens (too many )'s)" if $level < 0;
    -#    } elsif ($level == 0 && $c =~ /^\w|:|,$/) {
    -#      last;
    -#    }
    -#  }
    -#  return (parse_type (substr ($str, 0, $i)), substr ($str, $i));
    -}
    -
    -#####
    -# Parses a basic type, creating a type structure for it:
    -#     integer [( [kind=] kind_val )]
    -#     real [( [kind=] kind_val )]
    -#     double precision                  (no kind is allowed)
    -#     complex [( [kind=] kind_val )]
    -#     character [( char_stuff )]
    -#     logical [( [kind=] kind_val )]
    -#     type (type_name)
    -#
    -# integer*number, real*number, complex*number, and logical*number are also
    -# supported as nonstandard Fortran extensions for kind specification.
    -# "number" can either be a direct integer or an expression in parentheses.
    -# 
    -# char_stuff is empty or (stuff), where stuff is one of:
    -#     len_val [, [kind=] kind_val]
    -#     kind=kind_val [, [len=] len_val]
    -#     len=len_val [, kind=kind_val]
    -# kind_val and len_val are expressions; len_val can also be just `*'.
    -# 
    -# The length can also be specified using the nonstandard Fortran extension
    -# character*number.  If number is `*', it must be in parentheses (indeed,
    -# any expression other than a number must be in parentheses).
    -#####
    -sub parse_type {
    -  my ($str) = @_;
    -
    -  # print "Parsing type: $str\n";
    -
    -  $str = utils::trim ($str);
    -  $str =~ /^(integer|real|double\s*precision|complex|character|logical|type)
    -    \s* (?: \( (.*) \) | \* \s* (\d+ | \(.*\)) )?$/ix
    -    or die "Invalid type `$str'";
    -  my $base = lc $1;
    -
    -  if ($base =~ /^double\s*precision$/) {
    -    die "double precision cannot have kind specification"
    -      if defined $2 || defined $3;
    -    return $typing::double_precision;
    -  }
    -
    -  if (defined $2 || defined $3) {
    -    my $star = defined $3;
    -    my $args = utils::trim ($star ? $3 : $2);
    -
    -    if ($base eq 'type') {
    -      die "type$args invalid--use type($args)" if $star;
    -      die "type(w) for non-word w" unless $args =~ /^\w+$/;
    -      return typing::make_type ($base, $args);
    -    } elsif ($base eq 'character') {
    -      my ($kind, $len, $rest);
    -      if ($star) {
    -        if ($args =~ /^\(\s*\*\s*\)$/) {
    -          $len = '*';
    -        } else {
    -          $len = expr_parse::parse_expr ($args);
    -        }
    -      } elsif ($args =~ /^kind\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        ($kind, $rest) = expr_parse::parse_part_as_expr ($args);
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ s/^len\s*=\s*//i;
    -          $len = ($rest eq '*' ? '*' : expr_parse::parse_expr ($rest));
    -        }
    -      } elsif ($args =~ /^len\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = '*';
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ /^kind\s*=\s*/
    -            or die "kind= specifier needed when len= specifier is given";
    -          $rest = substr ($rest, length ($&));
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      } else {  # len
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = "*";
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest = substr ($rest, length ($&)) if $rest =~ /^kind\s*=\s*/i;
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      }
    -      return typing::make_character_type ($kind, $len);
    -    } else {
    -      $args =~ s/^kind\s*=\s*//i unless $star;
    -      return typing::make_type ($base, expr_parse::parse_expr ($args));
    -    }
    -  } else {
    -    die "type without (type-name) after it" if $base eq 'type';
    -    die "No default type for `$base'"
    -      unless exists $typing::default_type{$base};
    -    return $typing::default_type{$base};
    -  }
    -}
    -
    -sub do_attrib {
    -    my ($name, $attrib, $val, $attribname) = @_;
    -    my ($struct);
    -    foreach $struct (values %{$stmts::topnest->{'contains'}->{lc $name}}) {
    -        die "Redefining $attribname of $struct->{'type'} $name from " .
    -            "$struct->{$attrib} to $val" if exists $struct->{$attrib};
    -        $struct->{$attrib} = $val;
    -    }
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/typing.pl b/Tools/F_scripts/f90doc/typing.pl
    deleted file mode 100644
    index 9347b8bbb16..00000000000
    --- a/Tools/F_scripts/f90doc/typing.pl
    +++ /dev/null
    @@ -1,516 +0,0 @@
    -package typing;
    -
    -use strict;
    -
    -# Stores the type of each variable.
    -$typing::typeof = "";
    -# Stack: one typeof per scope.
    -@typing::typeofs = ();
    -
    -# Stores the definition of each type.
    -$typing::typedef = "";
    -# Stack: one typedef per scope.
    -@typing::typedefs = ();
    -
    -# Stores the definition of each function/operator.
    -$typing::code = "";
    -# Stack: one code per scope.
    -@typing::codes = ();
    -
    -
    -# DOUBLE PRECISION type.
    -$typing::double_precision = typing::make_type ('real', 8, "double precision");
    -
    -# Default character kind.
    -$typing::default_character_kind = 1;
    -
    -# Default types.
    -%typing::default_type = (
    -  'complex' => typing::make_type ('complex', 8, "complex"),
    -  'integer' => typing::make_type ('integer', 4, "integer"),
    -  'logical' => typing::make_type ('logical', 1, "logical"),
    -  'real'    => typing::make_type ('real', 4, "real"),
    -);
    -$typing::default_type{'character'} = typing::make_character_type ();
    -
    -# Types with wild sub and any other info (just a base defined).
    -$typing::wild_type = {
    -   'complex'   => typing::make_type ('complex'),
    -   'real'      => typing::make_type ('real'),
    -   'integer'   => typing::make_type ('integer'),
    -   'logical'   => typing::make_type ('logical'),
    -   'character' => typing::make_type ('character')
    -};
    -
    -
    -# Precedence of operations; based on that which is in expr_parse.y.
    -# Higher precedence indicated by larger number.
    -$typing::precedence = {
    -  '.eqv.'  => 1,
    -  '.neqv.' => 1,
    -  '.or.'   => 2,
    -  '.and.'  => 3,
    -  '.not.'  => 4,
    -  '<'      => 5,
    -  '>'      => 5,
    -  '<='     => 5,
    -  '>='     => 5,
    -  '=='     => 5,
    -  '/='     => 5,
    -  '//'     => 6,
    -  '+'      => 7,
    -  '-'      => 7,
    -  'u+'     => 8,
    -  'u-'     => 8,
    -  '*'      => 9,
    -  '/'      => 9,
    -  '**'     => 10,
    -  '%'      => 11,
    -  '%call'  => 11,
    -  '%colon' => 30, # this is a guess
    -  '%namedarg' => 30, # this is a guess
    -  '%array' => 40,    # as in "forty days and forty nights," which means
    -  '%const' => 40,    #    "a long time," here we use 40 as an approx. to infty.
    -  '%var'   => 40,
    -  '%do'    => 40,
    -};
    -
    -#####
    -# Starts a new scope.  If this is a top-level scope, initializes the codes
    -# to intrinsics and the like.
    -#####
    -sub new_scope {
    -   my ($newtypeof, $newtypedef, $newcode);
    -
    -   if (@typing::typeofs) {
    -      $typing::typeof = utils::copy_hash ($typing::typeof);
    -      $typing::typedef = utils::copy_hash ($typing::typedef);
    -      $typing::code = utils::copy_hash ($typing::code);
    -   } else {
    -      $typing::typeof = {};
    -      $typing::typedef = {};
    -      $typing::code = {};
    -      $typing::code{"//"} = [ {
    -         'parms' => [ $typing::wild_type{'character'},
    -                      $typing::wild_type{'character'} ],
    -         'return' => $typing::wild_type{'character'}
    -      } ];
    -      my ($int, $real, $logical, $char) = ( $typing::wild_type{'integer'},
    -         $typing::wild_type{'real'}, $typing::wild_type{'logical'},
    -         $typing::wild_type{'character'} );
    -      my ($op);
    -      foreach $op ("+", "-", "*", "/") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $int },
    -            { 'parms' => [ $real, $int ], 'return' => $real },
    -            { 'parms' => [ $int, $real ], 'return' => $real },
    -            { 'parms' => [ $real, $real ], 'return' => $real }
    -         ];
    -      }
    -      $typing::code->{"**"} = [
    -         { 'parms' => [ $int, $int ], 'return' => $int },
    -         { 'parms' => [ $real, $int ], 'return' => $real },
    -         { 'parms' => [ $int, $real ], 'return' => $real },
    -         { 'parms' => [ $real, $real ], 'return' => $real },
    -      ];
    -      foreach $op ("u+", "u-") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int ], 'return' => $int },
    -            { 'parms' => [ $real ], 'return' => $real }
    -         ];
    -      }
    -      foreach $op ("<", "<=", "==", "/=", ">", ">=") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $logical },
    -            { 'parms' => [ $real, $int ], 'return' => $logical },
    -            { 'parms' => [ $int, $real ], 'return' => $logical },
    -            { 'parms' => [ $real, $real ], 'return' => $logical },
    -            { 'parms' => [ $char, $char ], 'return' => $logical }
    -         ];
    -      }
    -      foreach $op (".or.", ".and.", ".eqv.", ".neqv.") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $logical, $logical ], 'return' => $logical }
    -         ];
    -      }
    -      $typing::code->{".not."} = [
    -         { 'parms' => [ $logical ], 'return' => $logical }
    -      ];
    -      $typing::code->{"//"} = [
    -         { 'parms' => [ $char, $char ], 'return' => $char }
    -      ];
    -   }
    -
    -   push @typing::typeofs, $typing::typeof;
    -   push @typing::typedefs, $typing::typedef;
    -   push @typing::codes, $typing::code;
    -}
    -
    -#####
    -# Ends an old scope.
    -#####
    -sub end_scope {
    -   pop @typing::typeofs;
    -   pop @typing::typedefs;
    -   pop @typing::codes;
    -
    -   if ($typing::typeofs) {
    -      $typing::typeof = $typing::typeofs[$#typing::typeofs];
    -      $typing::typedef = $typing::typedefs[$#typing::typedefs];
    -      $typing::code = $typing::codes[$#typing::codes];
    -   }
    -}
    -
    -#####
    -# Creates a new type with specified base and sub.
    -# Note that sub corresponds to kind for built-in types.
    -# sub can be left out for a wild type.
    -# A third argument, print, can specify how the type should print.  Used for
    -# default types, double precision, etc.
    -#####
    -sub make_type {
    -  my ($base, $sub, $print) = @_;
    -  my $type = { 'base' => $base };
    -  $type->{'sub'} = $sub if $sub;
    -  $type->{'print'} = $print;
    -  return $type;
    -}
    -
    -#####
    -# Creates a new complex type with specified types of "sides."
    -#####
    -sub make_complex_type {
    -  my ($type1, $type2) = @_;
    -  my ($base1, $base2) = ($type1->{'base'}, $type2->{'base'});
    -  die "Complex constant must have real and/or integer parts, but I found types $base1 and $base2"
    -    unless ($base1 eq 'integer' || $base1 eq 'real') &&
    -           ($base2 eq 'integer' || $base2 eq 'real');
    -  my $which;
    -  # From Metcalf and Reed's Fortran 90 Explained, if one of the types is an
    -  # integer then the kind of the complex is the kind of the other type.
    -  if ($base1 eq 'integer') {
    -    $which = $type2;
    -  } elsif ($base2 eq 'integer') {
    -    $which = $type1;
    -  } else {
    -    if ($type1->{'sub'} > $type2->{'sub'}) {
    -      $which = $type1;
    -    } else {
    -      $which = $type2;
    -    }
    -  }
    -  return {
    -    'base'    => 'complex',
    -    'sub'     => $which
    -  };
    -}
    -
    -#####
    -# Creates a new character type with specified sub (kind) and len.
    -#####
    -sub make_character_type {
    -  my ($sub, $len) = @_;
    -  $sub = $typing::default_character_kind unless defined $sub;
    -  $sub = [ "%const", $typing::default_type{'integer'}, $sub ] unless ref $sub;
    -  $len = "1" unless defined $len;
    -  $len = [ "%const", $typing::default_type{'integer'}, $len ]
    -    unless ref $len || $len eq "*";
    -  return {
    -    'base' => 'character',
    -    'sub'  => $sub,
    -    'len'  => $len
    -  };
    -}
    -
    -#####
    -# Returns true iff the given type was created to be the default of its kind.
    -# This has no meaning for compound types (hence it returns false).  For
    -# characters, there's a slight bug in that it will say that the type was
    -# created default even if you specify the default explicitly.  No biggie.
    -# Note that the defaultness is only for the KIND, not the LENGTH.
    -# 
    -# I could fix the above-mentioned problem by storing a 'default' entry just for
    -# the default types.  Then is_default_kind just translates to an exists test.
    -# This is much simpler and avoids the weird checks for double precision numbers
    -# (0.0d0 ==> don't show a kind.  This is really "default").  This would be
    -# kinda nice but 'default' is probably the wrong word.
    -#####
    -sub is_default_kind {
    -   my ($type) = @_;
    -
    -   if ($type->{'base'} eq "character") {
    -     my ($top, @rest) = @{$type->{'sub'}};
    -     return ($top eq "%const" && $rest[0] eq $typing::default_type{'integer'}
    -          && $rest[1] == $typing::default_character_kind);
    -   } else {
    -      return (exists $typing::default_type{$type->{'base'}} && $typing::default_type{$type->{'base'}} eq $type);
    -   }
    -}
    -
    -#####
    -# Converts the given type to a string, written in Fortran 90 code.
    -# Only displays the kind if it was specified explicitly.  Slight bug:
    -# if you say character (kind=1) :: c, then it will print character :: c.
    -# (This is only for characters with default kind.  For other types with
    -# default kind explicitly specified, it is printed.)
    -#####
    -sub type_to_f90 {
    -  my ($type) = @_;
    -
    -  # This covers the case where the kind is the default, except for characters.
    -  return $type->{'print'} if defined $type->{'print'};
    -
    -  my $mods = "";
    -  if ($type->{'base'} eq "character") {
    -    if ($type->{'len'} eq "*") {
    -      $mods = "len=*";
    -    } elsif ($type->{'len'}->[0] ne "%const" ||
    -             $type->{'len'}->[1] != $typing::default_type{'integer'} ||
    -             $type->{'len'}->[2] ne "1") {
    -      $mods = "len=" . expr_to_f90 ($type->{'len'});
    -    }
    -    unless (is_default_kind ($type)) {
    -      $mods .= ", " unless $mods eq '';
    -      $mods .= "kind=" . expr_to_f90 ($type->{'sub'});
    -    }
    -  } elsif ($type->{'base'} eq "type") {
    -    $mods = "$type->{'sub'}";
    -  } else {
    -    $mods = "kind=" . expr_to_f90 ($type->{'sub'});
    -  }
    -  $mods = " ($mods)" unless $mods eq '';
    -  return $type->{'base'} . $mods;
    -}
    -
    -#####
    -# Converts an expression right back to a string, doing "no" conversion (i.e.,
    -# output is in Fortran 90).  Optionally returns the precedence of the outmost
    -# operation in the expression (see $typing::precedence).
    -#####
    -sub expr_to_f90 {
    -  my ($exprptr) = @_;
    -  my ($op, @children) = @$exprptr;
    -
    -  die "Unrecognized operation $op",%$op," (has no precedence?)"
    -    unless exists $typing::precedence->{$op};
    -  my $prec = $typing::precedence->{$op};
    -
    -  my $answer;
    -  if ($op eq "%") {
    -    my ($struct, $elem) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($struct);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s%$elem";
    -  } elsif ($op eq "%var") {
    -    $answer = $children[0];
    -  } elsif ($op eq "%const") {
    -    my ($type, $val) = @children;
    -    if ($type->{'base'} eq 'complex') {
    -      if (!is_default_kind ($type->{'sub'})) {
    -        my ($k1, $k2) = ("", "");
    -        $k1 = "_$type->{'sub'}->{'sub'}" unless $val->[0] =~ /D[+-]?\d+$/i;
    -        $k2 = "_$type->{'sub'}->{'sub'}" unless $val->[1] =~ /D[+-]?\d+$/i;
    -        $answer = "($val->[0]$k1, $val->[1]$k2)";
    -      } else {
    -        $answer = "($val->[0], $val->[1])";
    -      }
    -    } elsif (is_default_kind ($type) || $val =~ /D[+-]?\d+$/i) {
    -      $answer = $val;
    -    } else {
    -      $answer = "${val}_$type->{'sub'}";
    -    }
    -  } elsif ($op eq "%array") {
    -    $answer = "(/ " . join (", ", map { (expr_to_f90 ($_))[0] } @children)
    -            . " /)";
    -  } elsif ($op eq "%colon") {
    -    my ($left, $right) = @children;
    -    $left = (expr_to_f90 ($left))[0] if $left ne '';
    -    $right = (expr_to_f90 ($right))[0] if $right ne '';
    -    $answer = $left . ":" . $right;  # : has ultimately low precedence
    -  } elsif ($op eq "%namedarg") {
    -    my ($left, $right) = @children;
    -    $answer = $left . " = " .
    -              (expr_to_f90 ($right))[0];  # = has ultimately low precedence
    -  } elsif ($op eq "%do") {
    -    my ($child, $var, @args) = @children;
    -    $answer = "(" . expr_to_f90 ($child) . ", " . $var . " = " .
    -              join (", ", map { (expr_to_f90 ($_))[0] } @args) . ")";
    -  } elsif ($op eq "%call") {
    -    ($op, @children) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($op);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s (" . join (", ", map ((expr_to_f90 ($_))[0], @children))
    -      . ")";
    -  } elsif (scalar @children == 1) {
    -    $op = substr ($op, 1) if substr ($op, 0, 1) eq 'u';
    -    my ($s, $sprec) = expr_to_f90 ($children[0]);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$op$s";
    -  } elsif (scalar @children == 2) {
    -    my ($s1, $sprec1) = expr_to_f90 ($children[0]);
    -    $s1 = "($s1)" if $prec > $sprec1;
    -    my ($s2, $sprec2) = expr_to_f90 ($children[1]);
    -    $s2 = "($s2)" if $prec > $sprec2;
    -    $answer = "$s1 $op $s2";
    -  } else {
    -    die "expr_to_f90: Unrecognized operation $op with " . (scalar @children) .
    -      " children";
    -  }
    -
    -  if (wantarray) {
    -    return ($answer, $prec);
    -  } else {
    -    return $answer;
    -  }
    -}
    -
    -#####
    -# Computes the type of the given expression (which is passed by reference).
    -# Returns a reference to the actual type.
    -#####
    -sub expr_type {
    -   my ($exprptr) = @_;
    -   my ($op, @children) = @$exprptr;
    -
    -   if ($op eq "%") {
    -      my ($struct, $elem) = @children;
    -      my ($type) = expr_type ($struct);
    -      die "expr_type: \%$elem failed: left part is not a compound type" unless $type->{'base'} eq "type";
    -      my ($typedef) = $typing::typedef->{$type->{'sub'}};
    -      my ($elemtype) = $typedef->{$elem};
    -      die "expr_type: \%$elem failed: left part does not include $elem" unless $elemtype;
    -      return $elemtype;
    -   } elsif ($op eq "%var") {
    -      my ($var) = @children;
    -      my ($vartype) = $typing::typeof->{$var};
    -      die "expr_type: Variable $var undefined" unless $vartype;
    -      return $vartype;
    -   } elsif ($op eq "%const") {
    -      my ($type, $val) = @children;
    -      return $type;
    -   } elsif ($op eq "%array") {
    -      # HERE
    -   } elsif ($op eq "%colon") {
    -      my ($string, $left, $right) = @children;
    -      my ($stringtype) = expr_type ($string);
    -      die "expr_type: colon notation for non-character string" if $stringtype->{'base'} ne "character";
    -      die "expr_type: colon notation for character array" if $stringtype->{'dimension'};
    -      return typing::make_character_type ($stringtype->{'sub'}, "*");
    -   } elsif ($op eq "%call") {
    -      ($op, @children) = @children;
    -      my ($subop, @subchildren) = @$op;
    -      if ($subop eq "%var") {
    -         ($op) = @subchildren;
    -         # Fall through: we allow overloaded function name in this special case.
    -      } else {
    -         # Function call without overloading or an array reference.
    -         my ($optype) = expr_type ($op);
    -
    -         if ($optype->{'dimension'}) {  # array reference
    -            return make_type ($optype->{'base'}, $optype->{'sub'});
    -         } else {
    -            die "expr_type: Array/function call for something that is neither" unless $optype->{'base'} eq "interface";
    -            # HERE function call without overloading.
    -         }
    -      }
    -   }
    -
    -   my ($opcodes) = $typing::code->{$op};
    -   die "Operation/function $op undefined" unless $opcodes;
    -   my (@childtypes) = ();
    -   my ($child);
    -   foreach $child (@children) {
    -      print "childtypes was: @childtypes\n";
    -      print "type of $child is ", expr_type ($child), "\n";
    -      push @childtypes, expr_type ($child);
    -      print "childtypes is now: @childtypes\n";
    -   }
    -   my ($opcode);
    -   foreach $opcode (@$opcodes) {
    -      print "children: @children\n";
    -      print "childtypes: @childtypes\n";
    -      if (typing::subtypes_list (\@childtypes, $opcode->{'parms'})) {
    -         my ($parm);
    -         my ($ret) = $opcode->{'return'};
    -         if ($ret->{'base'} eq "character" && ! $ret->{'len'}) {
    -            $ret->{'len'} = 0;
    -find_len:
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  if ($parm->{'len'} eq "*") {
    -                     $ret->{'len'} = "*";
    -                     last find_len;
    -                  } else {
    -                     $ret->{'len'} += $parm->{'len'};
    -                  }
    -               }
    -            }
    -         }
    -         if ($ret->{'sub'}) {
    -            return $ret;
    -         } else {
    -            # Make intrinsic type's kind: look for all parameters with the same
    -            # base type, and use the maximum kind out of those.
    -            my ($maxkind) = -1;
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  $maxkind = $parm->{'sub'} if $maxkind < $parm->{'sub'};
    -               }
    -            }
    -            die "expr_type: Internal error caused by new_scope" if $maxkind < 0;
    -            return { %$ret, 'sub' => $maxkind };
    -         }
    -      }
    -   }
    -   die "Operation/function $op defined but not for this (these) type(s)";
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type.
    -# This currently only supports intrinsic types (integer*4 subtypes integer*?).
    -#####
    -sub subtypes {
    -   my ($t1, $t2) = @_;
    -   return 0 if $t1->{'base'} ne $t2->{'base'};
    -   if ($t1->{'base'} eq "type") {
    -      return 0 if $t1->{'sub'} eq $t2->{'sub'};
    -   } else {
    -      if ($t1->{'base'} eq "character") {
    -         if ($t1->{'len'}) {
    -            return 0 unless $t1->{'len'};
    -            return 0 if $t2->{'len'} != $t1->{'len'};
    -         }
    -      }
    -      if ($t1->{'base'} eq "interface") {
    -         # HERE fill this in when I do function types ("interface").
    -      }
    -      if ($t1->{'sub'}) {
    -         return 0 unless $t1->{'sub'};
    -         return 0 if $t2->{'sub'} ne $t1->{'sub'};
    -      }
    -   }
    -   return 1;
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type, where the first
    -# and second type are (conceptually) tuples.  That is, the lengths must be
    -# equal, and each element must subtype the corresponding element.
    -# The lists are passed as references.
    -#####
    -sub subtypes_list {
    -   my ($l1ptr, $l2ptr) = @_;
    -   my (@l1) = @$l1ptr;
    -   my (@l2) = @$l2ptr;
    -   return 0 if $#l1 != $#l2;
    -
    -   print "l1 is: @l1\n";
    -   print "l2 is: @l2\n";
    -
    -   my ($i);
    -   for ($i = 0; $i <= $#l1; $i++) {
    -      print "calling subtypes with $l1[$i] and $l2[$i]\n";
    -      return 0 unless typing::subtypes ($l1[$i], $l2[$i]);
    -   }
    -   return 1;
    -}
    diff --git a/Tools/F_scripts/f90doc/utils.pl b/Tools/F_scripts/f90doc/utils.pl
    deleted file mode 100644
    index 8e409f0db1c..00000000000
    --- a/Tools/F_scripts/f90doc/utils.pl
    +++ /dev/null
    @@ -1,87 +0,0 @@
    -package utils;
    -
    -use strict;
    -
    -sub copy_list {
    -   my ($listref) = @_;
    -   my @list;
    -   @list = @$listref;
    -   \@list;
    -}
    -
    -sub copy_hash {
    -   my ($hashref) = @_;
    -   my %hash;
    -   %hash = %$hashref;
    -   \%hash;
    -}
    -
    -sub hash2str {
    -   my ($hash) = @_;
    -   my ($key, $s);
    -   $s = "{\n";
    -   foreach $key (keys %$hash) {
    -      $s .= "   $key => $hash->{$key}\n";
    -   }
    -   $s .= "}";
    -}
    -
    -sub trim {
    -   my ($s) = @_;
    -   $s =~ s/^\s*//;
    -   $s =~ s/\s*$//;
    -   $s;
    -}
    -
    -# balsplit (sep, string) splits string into pieces divided by sep when
    -# sep is "outside" ()s.  Returns a list just like split.
    -sub balsplit {
    -   my ($sep, $str) = @_;
    -   my ($i, $c);
    -   my ($len, $level, $left) = (length ($str), 0, 0);
    -   my (@list) = ();
    -
    -   for ($i = 0; $i < $len; $i++) {
    -      $c = substr ($str, $i, 1);
    -      if ($c eq "(") {
    -         $level++;
    -      } elsif ($c eq ")") {
    -         $level--;
    -         die "balsplit: Unbalanced parens (too many )'s)" if $level < 0;
    -      } elsif ($c eq $sep && $level == 0) {
    -         push (@list, substr ($str, $left, $i-$left));
    -         $left = $i + 1;
    -      }
    -   }
    -
    -   push (@list, substr ($str, $left));
    -   return @list;
    -}
    -
    -# Takes the first word of each element of the list.
    -sub leftword {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      $x =~ s/^\s*//;
    -      $x =~ /^\w*/;
    -      push (@out, $&);
    -   }
    -   @out;
    -}
    -
    -sub remove_blanks {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      push (@out, $x) unless $x =~ /^\s*$/;
    -   }
    -   @out;
    -}
    -
    -sub do_nothing {
    -}
    -
    -1;
    
    From 487267625412e4f8a4fa1ab2492cb578955c4239 Mon Sep 17 00:00:00 2001
    From: Weiqun Zhang 
    Date: Thu, 23 Jun 2022 12:41:20 -0700
    Subject: [PATCH 004/111] GNU Make: No need to query mpif90 if Fortran is not
     used. (#2852)
    
    This minimize potential issues.
    ---
     Tools/GNUMake/sites/Make.unknown | 7 ++++++-
     1 file changed, 6 insertions(+), 1 deletion(-)
    
    diff --git a/Tools/GNUMake/sites/Make.unknown b/Tools/GNUMake/sites/Make.unknown
    index 332a7a558de..2ecf6a50ddb 100644
    --- a/Tools/GNUMake/sites/Make.unknown
    +++ b/Tools/GNUMake/sites/Make.unknown
    @@ -29,6 +29,8 @@ ifeq ($(USE_MPI),TRUE)
     
       ifeq ($(LINK_WITH_FORTRAN_COMPILER),TRUE)
         MPI_OTHER_COMP := mpicxx
    +  else ifeq ($(BL_NO_FORT),TRUE)
    +    MPI_OTHER_COMP := mpicxx
       else
         MPI_OTHER_COMP := mpif90
       endif
    @@ -55,7 +57,10 @@ ifeq ($(USE_MPI),TRUE)
          mpi_link_flags := $(filter-out $(mpi_filter), $(mpi_link_flags))
       endif
     
    -  LIBRARIES += $(mpi_link_flags) $(mpicxx_link_libs)
    +  LIBRARIES += $(mpi_link_flags)
    +  ifneq ($(MPI_OTHER_COMP),mpicxx)
    +    LIBRARIES += $(mpicxx_link_libs)
    +  endif
     
       # OpenMPI specific flag
       # Uncomment if statement if flag causes issue with another compiler.
    
    From 3d3ad213ca4b60421c9a80328e1316b23435958f Mon Sep 17 00:00:00 2001
    From: kngott 
    Date: Thu, 23 Jun 2022 13:39:59 -0700
    Subject: [PATCH 005/111] NERSC Programming Environment prototype (#2848)
    
    ---
     Tools/GNUMake/sites/Make.nersc | 25 ++++++++++++++++++++++---
     1 file changed, 22 insertions(+), 3 deletions(-)
    
    diff --git a/Tools/GNUMake/sites/Make.nersc b/Tools/GNUMake/sites/Make.nersc
    index c8c938a627a..f894dcd7d7c 100644
    --- a/Tools/GNUMake/sites/Make.nersc
    +++ b/Tools/GNUMake/sites/Make.nersc
    @@ -25,11 +25,16 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    +    ifdef NPE_VERSION
    +      CFLAGS += -Xcompiler="$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicc -show 2> /dev/null)))"
    +      CXXFLAGS += -Xcompiler="$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicxx -show 2> /dev/null)))"
    +    else
           CFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))"
           CXXFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))"
    +    endif
       else ifeq ($(USE_MPI),FALSE)
    -      CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    -      CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
    +    CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    +    CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
       endif
     
       ifeq ($(USE_MPI),TRUE)
    @@ -41,7 +46,9 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           LIBRARIES += -lmpichf90
         endif
     
    -    includes += $(shell CC --cray-print-opts=cflags)
    +    ifndef NPE_VERSION
    +      includes += $(shell CC --cray-print-opts=cflags)
    +    endif
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    @@ -51,11 +58,23 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
         endif
     
    +    ifdef NPE_VERSION
    +      includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
    +    endif
    +
         comm := ,
         ifneq ($(BL_NO_FORT),TRUE)
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpifort -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs))
    +      endif
         else
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpicxx -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs))
    +      endif
         endif
     
         ifneq ($(CUDA_ROOT),)
    
    From 027f2ff77fed33a191cfc735d8adaabb42d21743 Mon Sep 17 00:00:00 2001
    From: Weiqun Zhang 
    Date: Thu, 23 Jun 2022 16:15:57 -0700
    Subject: [PATCH 006/111] Fix make help (#2854)
    
    This reverts the change in #2845, which fixed an issue with `make print-%`, but broke
    `make help`.  This is now fixed in a different way.  Both `make print-%` and `make help`
    should work now.
    ---
     Tools/GNUMake/Make.rules       | 9 +++++++--
     Tools/GNUMake/sites/Make.nersc | 8 ++++----
     2 files changed, 11 insertions(+), 6 deletions(-)
    
    diff --git a/Tools/GNUMake/Make.rules b/Tools/GNUMake/Make.rules
    index 8b014678500..5d6caa60e06 100644
    --- a/Tools/GNUMake/Make.rules
    +++ b/Tools/GNUMake/Make.rules
    @@ -512,9 +512,14 @@ endif
     # e.g. libraries, simply do "make print-libraries".  This will
     # print out the value.
     print-%:
    -	@echo $* is '$($*)'
    +	@echo $* is "$($*)"
     	@echo '    origin = $(origin $*)'
    -	@echo '     value = $(value  $*)'
    +	@echo '     value = $(subst ','"'"',$(value  $*))'
    +# We need to use subst on the result of $(value) because it contains single
    +# quotes.  Shell command echo does not like things like 'x'$(filiter-out)'y',
    +# because what it sees is 'x', $(filter-out), and 'y'.  With the substition, it
    +# will see 'x', "'", '$(filter-out)', "'", and 'y', with $(filter-out) inside a
    +# pair of single quotes.
     
     .PHONY: help
     help:
    diff --git a/Tools/GNUMake/sites/Make.nersc b/Tools/GNUMake/sites/Make.nersc
    index f894dcd7d7c..426b9525887 100644
    --- a/Tools/GNUMake/sites/Make.nersc
    +++ b/Tools/GNUMake/sites/Make.nersc
    @@ -26,11 +26,11 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
     
       ifeq ($(USE_CUDA),TRUE)
         ifdef NPE_VERSION
    -      CFLAGS += -Xcompiler="$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicc -show 2> /dev/null)))"
    -      CXXFLAGS += -Xcompiler="$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicxx -show 2> /dev/null)))"
    +      CFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicc -show 2> /dev/null)))'
    +      CXXFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicxx -show 2> /dev/null)))'
         else
    -      CFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))"
    -      CXXFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))"
    +      CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))'
    +      CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))'
         endif
       else ifeq ($(USE_MPI),FALSE)
         CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    
    From 21fe4b3016a796b99c409760cfad7ae00a7475ba Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 28 Jun 2022 19:53:09 +0200
    Subject: [PATCH 007/111] CMake: FindDependency CUDAToolkit (#2849)
    
    If we install AMReX with CUDA support using a modern
    CMake, we need to repopulate targets such as `CUDA::curand`
    from `find_dependency` for downstream.
    Downstream users find us via `find_package` and that target
    link dependency showed up to be unpopulated in MFIX.
    ---
     Tools/CMake/AMReXConfig.cmake.in | 10 ++++++----
     1 file changed, 6 insertions(+), 4 deletions(-)
    
    diff --git a/Tools/CMake/AMReXConfig.cmake.in b/Tools/CMake/AMReXConfig.cmake.in
    index 6b0cdd3fd74..64a112da181 100644
    --- a/Tools/CMake/AMReXConfig.cmake.in
    +++ b/Tools/CMake/AMReXConfig.cmake.in
    @@ -223,10 +223,12 @@ endif ()
     # CUDA
     #
     # AMReX 21.06+ supports CUDA_ARCHITECTURES
    -if(CMAKE_VERSION VERSION_LESS 3.20)
    -   if (@AMReX_CUDA@)
    -      include(AMReX_SetupCUDA)
    -   endif ()
    +if (@AMReX_CUDA@)
    +    if (CMAKE_VERSION VERSION_LESS 3.20)
    +        include(AMReX_SetupCUDA)
    +    else ()
    +        find_dependency(CUDAToolkit REQUIRED)
    +    endif ()
     endif ()
     
     include( "${CMAKE_CURRENT_LIST_DIR}/AMReXTargets.cmake" )
    
    From d2cb54668b5e49fd35a60164f40ad6f36720f806 Mon Sep 17 00:00:00 2001
    From: Jon Rood 
    Date: Tue, 28 Jun 2022 13:27:02 -0600
    Subject: [PATCH 008/111] Fix gnu make on Crusher for mpi_gtl_hsa (#2857)
    
    Update environment variable at OLCF for mpi_gtl_hsa.
    ---
     Tools/GNUMake/sites/Make.olcf | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Tools/GNUMake/sites/Make.olcf b/Tools/GNUMake/sites/Make.olcf
    index 651971c6c95..fcccfc8de08 100644
    --- a/Tools/GNUMake/sites/Make.olcf
    +++ b/Tools/GNUMake/sites/Make.olcf
    @@ -60,7 +60,7 @@ ifeq ($(which_computer),spock)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += $(PE_MPICH_GTL_DIR_gfx908) -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx908) -lmpi_gtl_hsa
         endif
       endif
     endif
    @@ -80,7 +80,7 @@ ifeq ($(which_computer),crusher)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa
         endif
       endif
     endif
    
    From 2c5f475d451aede47fe2cad2bbd8681c9ca1f456 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Tue, 28 Jun 2022 12:51:19 -0700
    Subject: [PATCH 009/111] Write runtime attribs to checkpoints on GPUs (#2856)
    
    ---
     Src/Particle/AMReX_WriteBinaryParticleData.H | 14 ++++++++++++++
     1 file changed, 14 insertions(+)
    
    diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H
    index 5a71d015763..70fcbfda237 100644
    --- a/Src/Particle/AMReX_WriteBinaryParticleData.H
    +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H
    @@ -231,12 +231,26 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l
                         }
                     }
     
    +                for (int j = 0; j < ptd.m_num_runtime_int; j++) {
    +                    if (write_int_comp_d_ptr[PC::SuperParticleType::NInt + j]) {
    +                        idata_d_ptr[iout_index] = ptd.m_runtime_idata[j][pindex];
    +                        iout_index++;
    +                    }
    +                }
    +
                     for (int j = 0; j < PC::SuperParticleType::NReal; j++) {
                         if (write_real_comp_d_ptr[j]) {
                             rdata_d_ptr[rout_index] = p.rdata(j);
                             rout_index++;
                         }
                     }
    +
    +                for (int j = 0; j < ptd.m_num_runtime_real; j++) {
    +                    if (write_real_comp_d_ptr[PC::SuperParticleType::NReal + j]) {
    +                        rdata_d_ptr[rout_index] = ptd.m_runtime_rdata[j][pindex];
    +                        rout_index++;
    +                    }
    +                }
                 }
             });
     
    
    From b2b9150ada12af878a07e0628be03668a9d17270 Mon Sep 17 00:00:00 2001
    From: Burlen Loring 
    Date: Tue, 28 Jun 2022 13:42:41 -0700
    Subject: [PATCH 010/111] update the SENSEI in situ coupling for SENSEI v4.0.0
     (#2785)
    
    In this release, an install of VTK is no longer required.
    To compile AMReX w/ SENSEI use:
    
    ```cmake
    -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake
    ```
    
    Note:  may be `lib` or `lib64` or something else depending on
    your OS and is determined by CMake at configure time. See the CMake
    GNUInstallDirs documentation for more information.
    ---
     .github/workflows/sensei.yml                  |   6 +-
     .../source/Visualization.rst                  |  10 +-
     Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H      |   8 +-
     Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp    | 118 +++++++--------
     Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp   |   2 +-
     Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H  |   8 +-
     .../SENSEI/AMReX_AmrMeshDataAdaptor.cpp       | 116 +++++++-------
     .../SENSEI/AMReX_AmrMeshInSituBridge.cpp      |   2 +-
     .../SENSEI/AMReX_AmrMeshParticleDataAdaptor.H |   8 +-
     .../AMReX_AmrMeshParticleDataAdaptorI.H       |   8 +-
     .../SENSEI/AMReX_AmrParticleDataAdaptor.H     |   8 +-
     .../SENSEI/AMReX_AmrParticleDataAdaptorI.H    |   8 +-
     Src/Extern/SENSEI/AMReX_InSituUtils.H         |  20 +--
     Src/Extern/SENSEI/AMReX_InSituUtils.cpp       |   8 +-
     Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H |  24 +--
     .../SENSEI/AMReX_ParticleDataAdaptorI.H       | 142 +++++++++---------
     Tools/CMake/AMReXThirdPartyLibraries.cmake    |   2 +-
     17 files changed, 250 insertions(+), 248 deletions(-)
    
    diff --git a/.github/workflows/sensei.yml b/.github/workflows/sensei.yml
    index 19121889d6b..f551f46bec0 100644
    --- a/.github/workflows/sensei.yml
    +++ b/.github/workflows/sensei.yml
    @@ -17,9 +17,9 @@ jobs:
           CC: clang
           CXXFLAGS: "-Werror -Wshadow -Woverloaded-virtual -Wunreachable-code -fno-operator-names"
           CMAKE_GENERATOR: Ninja
    -      CMAKE_PREFIX_PATH: /root/install/sensei/develop/lib/cmake
    +      CMAKE_PREFIX_PATH: /root/install/sensei/v4.0.0/lib64/cmake
         container:
    -      image: ryankrattiger/sensei:fedora33-vtk-mpi-20210616
    +      image: senseiinsitu/ci:fedora35-amrex-20220613
         steps:
         - uses: actions/checkout@v2
         - name: Setup
    @@ -27,7 +27,7 @@ jobs:
         - name: Configure
           run: |
             cd build
    -        cmake ..                  \
    +        cmake ..                     \
                 -DCMAKE_BUILD_TYPE=Debug \
                 -DAMReX_ENABLE_TESTS=ON  \
                 -DAMReX_FORTRAN=OFF      \
    diff --git a/Docs/sphinx_documentation/source/Visualization.rst b/Docs/sphinx_documentation/source/Visualization.rst
    index ea8b4ab8c0b..59f95f76090 100644
    --- a/Docs/sphinx_documentation/source/Visualization.rst
    +++ b/Docs/sphinx_documentation/source/Visualization.rst
    @@ -873,9 +873,12 @@ and point to the CMake configuration installed with SENSEI.
     
     .. code-block:: bash
     
    -   cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=/lib/cmake ..
    +   cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake ..
     
    -When CMake generates the make files proceed as usual.
    +When CMake generates the make files proceed as usual. Note:  may be
    +`lib` or `lib64` or something else depending on what CMake decided to use for
    +your particular OS. See the CMake GNUInstallDirs documentation for more
    +information.
     
     .. code-block:: bash
     
    @@ -952,8 +955,7 @@ dataset.
     
     Obtaining SENSEI
     -----------------
    -SENSEI is hosted on Kitware's Gitlab site at https://gitlab.kitware.com/sensei/sensei
    -It's best to checkout the latest release rather than working on the master branch.
    +SENSEI is hosted on github at https://github.com/SENSEI-insitu/SENSEI.git
     
     To ease the burden of wrangling back end installs SENSEI provides two platforms
     with all dependencies pre-installed, a VirtualBox VM, and a NERSC Cori
    diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H
    index 061ff14c301..602a6298126 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H
    @@ -30,10 +30,10 @@ public:
       int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override;
       int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override;
     #endif
    -  int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override;
    -  int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
    +  int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override;
    +  int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
       int ReleaseData() override;
     
     protected:
    diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp
    index 135c21ef0e2..aa801eb0993 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp
    +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp
    @@ -1,22 +1,22 @@
     #include "AMReX_AmrDataAdaptor.H"
     
    +#include "senseiConfig.h"
     #include "MPIUtils.h"
     #include "STLUtils.h"
    -#include "VTKUtils.h"
    +#include "SVTKUtils.h"
     #include "Profiler.h"
     #include "Error.h"
     
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
     
     #include 
     #include 
    @@ -62,15 +62,15 @@ int DescriptorMap::Initialize(const DescriptorList &descriptors)
     
                 if (itype.cellCentered())
                 {
    -                this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j);
    +                this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j);
                 }
                 else if (itype.nodeCentered())
                 {
    -                this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j);
    +                this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j);
                 }
                 else
                 {
    -                this->Map[vtkDataObject::FIELD][arrayName] = std::make_pair(i,j);
    +                this->Map[svtkDataObject::FIELD][arrayName] = std::make_pair(i,j);
                 }
             }
         }
    @@ -156,7 +156,7 @@ struct AmrDataAdaptor::InternalsType
         int PinMesh;
         amrex::InSituUtils::DescriptorMap SimMetadata;
     #if SENSEI_VERSION_MAJOR < 3
    -    std::vector ManagedObjects;
    +    std::vector ManagedObjects;
     #endif
         std::vector> Masks;
     };
    @@ -225,11 +225,11 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id,
         metadata->GlobalView = true;
     
         metadata->MeshName = "mesh";
    -    metadata->MeshType = VTK_OVERLAPPING_AMR;
    -    metadata->BlockType = VTK_UNIFORM_GRID;
    +    metadata->MeshType = SVTK_OVERLAPPING_AMR;
    +    metadata->BlockType = SVTK_UNIFORM_GRID;
         metadata->NumBlocks = 0;
         metadata->NumBlocksLocal = {-1};
    -    metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum();
    +    metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum();
         metadata->StaticMesh = 0;
     
         // TODO
    @@ -318,14 +318,14 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id,
                 std::string arrayName = desc.name(j);
                 metadata->ArrayName.push_back(arrayName);
                 metadata->ArrayComponents.push_back(1);
    -            metadata->ArrayType.push_back(InSituUtils::amrex_tt::vtk_type_enum());
    +            metadata->ArrayType.push_back(InSituUtils::amrex_tt::svtk_type_enum());
     
                 if (itype.cellCentered())
    -                metadata->ArrayCentering.push_back(vtkDataObject::CELL);
    +                metadata->ArrayCentering.push_back(svtkDataObject::CELL);
                 else if (itype.nodeCentered())
    -                metadata->ArrayCentering.push_back(vtkDataObject::POINT);
    +                metadata->ArrayCentering.push_back(svtkDataObject::POINT);
                 else
    -                metadata->ArrayCentering.push_back(vtkDataObject::FIELD);
    +                metadata->ArrayCentering.push_back(svtkDataObject::FIELD);
             }
     
         }
    @@ -557,8 +557,8 @@ int AmrDataAdaptor::GetNumberOfArrays(const std::string &meshName,
             return -1;
         }
     
    -    if ((association != vtkDataObject::POINT) &&
    -        (association != vtkDataObject::CELL))
    +    if ((association != svtkDataObject::POINT) &&
    +        (association != svtkDataObject::CELL))
         {
             SENSEI_ERROR("Invalid association " << association)
             return -1;
    @@ -590,7 +590,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName,
         if (this->Internals->SimMetadata.GetName(association, index, arrayName))
         {
             SENSEI_ERROR("No array named \"" << arrayName << "\" in "
    -            << sensei::VTKUtils::GetAttributesName(association)
    +            << sensei::SVTKUtils::GetAttributesName(association)
                 << " data")
             return -1;
         }
    @@ -603,7 +603,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName,
     
     //-----------------------------------------------------------------------------
     int AmrDataAdaptor::GetMesh(const std::string &meshName,
    -    bool structureOnly, vtkDataObject *&mesh)
    +    bool structureOnly, svtkDataObject *&mesh)
     {
         amrex::ignore_unused(structureOnly);
     
    @@ -626,8 +626,8 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName,
     
         unsigned int nLevels = InSituUtils::NumActiveLevels(levels);
     
    -    // initialize new vtk datasets
    -    vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New();
    +    // initialize new svtk datasets
    +    svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New();
     #if SENSEI_VERSION_MAJOR < 3
         Internals->ManagedObjects.push_back(amrMesh);
     #endif
    @@ -685,12 +685,12 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName,
                 int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())};
                 int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())};
     
    -            // vtk's representation of box metadata
    -            vtkAMRBox block(cboxLo, cboxHi);
    +            // svtk's representation of box metadata
    +            svtkAMRBox block(cboxLo, cboxHi);
                 amrMesh->SetAMRBox(i, j, block);
                 amrMesh->SetAMRBlockSourceIndex(i, j, gid++);
     
    -            // skip building a vtk amrMesh for the non local boxes
    +            // skip building a svtk amrMesh for the non local boxes
                 if (dmap[j] != rank)
                     continue;
     
    @@ -705,14 +705,14 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName,
                 int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())};
                 int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())};
     
    -            // new vtk uniform amrMesh, node centered
    -            vtkUniformGrid *ug = vtkUniformGrid::New();
    +            // new svtk uniform amrMesh, node centered
    +            svtkUniformGrid *ug = svtkUniformGrid::New();
                 ug->SetOrigin(origin);
                 ug->SetSpacing(spacing);
                 ug->SetExtent(nboxLo[0], nboxHi[0],
                     nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]);
     
    -            // pass the block into vtk
    +            // pass the block into svtk
                 amrMesh->SetDataSet(i, j, ug);
                 ug->Delete();
             }
    @@ -722,7 +722,7 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
    +int AmrDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh,
         const std::string &meshName)
     {
         sensei::TimeEvent<64> event("AmrDataAdaptor::AddGhostCellsArray");
    @@ -733,7 +733,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
             return -1;
         }
     
    -    vtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
    +    svtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
         if (!amrMesh)
         {
             SENSEI_ERROR("Invalid mesh type "
    @@ -780,7 +780,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
                 if (dMap[j] != rank)
                     continue;
     
    -            vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j);
    +            svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j);
     
                 if (!blockMesh)
                 {
    @@ -790,24 +790,24 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
     
                 long nCells = blockMesh->GetNumberOfCells();
     
    -            // transfer mask array into vtk
    -            vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New();
    -            ga->SetName("vtkGhostType");
    +            // transfer mask array into svtk
    +            svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New();
    +            ga->SetName("svtkGhostType");
                 ga->SetArray(mask[j], nCells, 0);
                 blockMesh->GetCellData()->AddArray(ga);
                 ga->Delete();
     
                 // for debug can visualize the ghost cells
                 // FIXME -- a bug in Catalyst ignores internal ghost zones
    -            // when using the VTK writrer. Until that bug gets fixed, one
    +            // when using the SVTK writrer. Until that bug gets fixed, one
                 // can manually inject this copy using a PV Python filter
    -            ga = vtkUnsignedCharArray::New();
    +            ga = svtkUnsignedCharArray::New();
                 ga->SetName("GhostType");
                 ga->SetArray(mask[j], nCells, 1);
                 blockMesh->GetCellData()->AddArray(ga);
                 ga->Delete();
     
    -            // because VTK takes ownership
    +            // because SVTK takes ownership
                 mask[j] = nullptr;
             }
         }
    @@ -816,7 +816,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh,
    +int AmrDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh,
         const std::string &meshName)
     {
         amrex::ignore_unused(mesh);
    @@ -834,7 +834,7 @@ int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
    +int AmrDataAdaptor::AddArray(svtkDataObject* mesh, const std::string &meshName,
         int association, const std::string &arrayName)
     {
         sensei::TimeEvent<64> event("AmrDataAdaptor::AddArray");
    @@ -848,7 +848,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
             return -1;
         }
     
    -    vtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
    +    svtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
         if (!amrMesh)
         {
             SENSEI_ERROR("Invalid mesh type "
    @@ -861,8 +861,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
             return -1;
         }
     
    -    if ((association != vtkDataObject::CELL) &&
    -        (association != vtkDataObject::POINT))
    +    if ((association != svtkDataObject::CELL) &&
    +        (association != svtkDataObject::POINT))
         {
             SENSEI_ERROR("Invalid association " << association)
             return -1;
    @@ -878,7 +878,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
         if (this->Internals->SimMetadata.GetIndex(arrayName, association, fab, comp))
         {
             SENSEI_ERROR("Failed to locate descriptor for "
    -            << sensei::VTKUtils::GetAttributesName(association)
    +            << sensei::SVTKUtils::GetAttributesName(association)
                 << " data array \"" << arrayName << "\"")
             return -1;
         }
    @@ -894,8 +894,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
             amrex::MultiFab& state = levels[i]->get_new_data(fab);
             unsigned int ng = state.nGrow();
     
    -        if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) &&
    -            !((association == vtkDataObject::POINT) && state.is_nodal()))
    +        if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) &&
    +            !((association == svtkDataObject::POINT) && state.is_nodal()))
             {
                 SENSEI_ERROR("association does not match MultiFAB centering")
                 return -1;
    @@ -926,7 +926,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
                 int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())};
                 int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())};
     
    -            // skip building a vtk mesh for the non local boxes
    +            // skip building a svtk mesh for the non local boxes
                 if (dmap[j] != rank)
                     continue;
     
    @@ -938,7 +938,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
                 int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())};
     
                 // get the block mesh
    -            vtkUniformGrid *ug = amrMesh->GetDataSet(i, j);
    +            svtkUniformGrid *ug = amrMesh->GetDataSet(i, j);
     
                 // node centered size
                 long nlen = 1;
    @@ -953,9 +953,9 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
                 // pointer to the data
                 amrex_real *pcd = state[j].dataPtr(comp);
     
    -            // allocate vtk array
    -            InSituUtils::amrex_tt::vtk_type *da =
    -                InSituUtils::amrex_tt::vtk_type::New();
    +            // allocate svtk array
    +            InSituUtils::amrex_tt::svtk_type *da =
    +                InSituUtils::amrex_tt::svtk_type::New();
     
                 // set component name
                 da->SetName(arrayName.c_str());
    @@ -981,7 +981,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
     
     #if defined(SENSEI_DEBUG)
                 // mark level id
    -            vtkFloatArray *la = vtkFloatArray::New();
    +            svtkFloatArray *la = svtkFloatArray::New();
                 la->SetName("amrex_level_id");
                 la->SetNumberOfTuples(clen);
                 la->Fill(i);
    @@ -989,7 +989,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName,
                 la->Delete();
     
                 // mark mpi rank
    -            vtkFloatArray *ra = vtkFloatArray::New();
    +            svtkFloatArray *ra = svtkFloatArray::New();
                 ra->SetName("amrex_mpi_rank");
                 ra->SetNumberOfTuples(clen);
                 ra->Fill(rank);
    diff --git a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp
    index 26f63d2a101..018669a4bfc 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp
    +++ b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp
    @@ -29,7 +29,7 @@ AmrInSituBridge::update(Amr *dataSource)
             data_adaptor->SetDataSource(dataSource);
             data_adaptor->SetDataTime(dataSource->cumTime());
             data_adaptor->SetDataTimeStep(dataSource->levelSteps(0));
    -        ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1;
    +        ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1;
             data_adaptor->ReleaseData();
             data_adaptor->Delete();
     
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H
    index 5a8a88552af..54277505bd4 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H
    @@ -34,10 +34,10 @@ public:
       int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override;
       int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override;
     #endif
    -  int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override;
    -  int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
    +  int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override;
    +  int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
       int ReleaseData() override;
     
     protected:
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp
    index 2e4968cc8b2..34b92c1d25d 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp
    @@ -2,18 +2,18 @@
     
     #include "Profiler.h"
     #include "Error.h"
    -#include "VTKUtils.h"
    -
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    +#include "SVTKUtils.h"
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
     
     #include 
     #include 
    @@ -58,11 +58,11 @@ int MeshStateMap::Initialize(
     
                 if (state.is_cell_centered())
                 {
    -                this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j);
    +                this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j);
                 }
                 else if (state.is_nodal())
                 {
    -                this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j);
    +                this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j);
                 }
             }
         }
    @@ -83,7 +83,7 @@ struct AmrMeshDataAdaptor::InternalsType
         std::vector> Names;
         amrex::InSituUtils::MeshStateMap StateMetadata;
     #if SENSEI_VERSION_MAJOR < 3
    -    std::vector ManagedObjects;
    +    std::vector ManagedObjects;
     #endif
     };
     
    @@ -149,13 +149,13 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id,
         metadata->GlobalView = true;
     
         metadata->MeshName = "mesh";
    -    metadata->MeshType = VTK_OVERLAPPING_AMR;
    -    metadata->BlockType = VTK_UNIFORM_GRID;
    +    metadata->MeshType = SVTK_OVERLAPPING_AMR;
    +    metadata->BlockType = SVTK_UNIFORM_GRID;
         metadata->NumBlocks = 0;
         metadata->NumCells = 0;
         metadata->NumPoints = 0;
         metadata->NumBlocksLocal = {-1};
    -    metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum();
    +    metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum();
         metadata->StaticMesh = 0;
     
         // num levels
    @@ -224,7 +224,7 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id,
                 {pdLo[0], pdHi[0], pdLo[1], pdHi[1], pdLo[2], pdHi[2]});
         }
     
    -    // global extent (note: VTK uses point centered indexing)
    +    // global extent (note: SVTK uses point centered indexing)
         const amrex::Box& cdom = this->Internals->Mesh->Geom(0).Domain();
         amrex::Box ndom = surroundingNodes(cdom);
     
    @@ -261,19 +261,19 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id,
             // scalar, vector, tensor
             metadata->ArrayComponents[j] = 1;
             // POD type
    -        metadata->ArrayType[j] = InSituUtils::amrex_tt::vtk_type_enum();
    +        metadata->ArrayType[j] = InSituUtils::amrex_tt::svtk_type_enum();
             // mesh centering
             if (state0.is_cell_centered())
             {
    -            metadata->ArrayCentering[j] = vtkDataObject::CELL;
    +            metadata->ArrayCentering[j] = svtkDataObject::CELL;
             }
             else if (state0.is_nodal())
             {
    -            metadata->ArrayCentering[j] = vtkDataObject::POINT;
    +            metadata->ArrayCentering[j] = svtkDataObject::POINT;
             }
             else
             {
    -            metadata->ArrayCentering[j] = vtkDataObject::FIELD;
    +            metadata->ArrayCentering[j] = svtkDataObject::FIELD;
             }
         }
     
    @@ -396,8 +396,8 @@ int AmrMeshDataAdaptor::GetNumberOfArrays(const std::string &meshName,
             return -1;
         }
     
    -    if ((association != vtkDataObject::POINT) &&
    -        (association != vtkDataObject::CELL))
    +    if ((association != svtkDataObject::POINT) &&
    +        (association != svtkDataObject::CELL))
         {
             SENSEI_ERROR("Invalid association " << association)
             return -1;
    @@ -427,7 +427,7 @@ int AmrMeshDataAdaptor::GetArrayName(const std::string &meshName,
         if (this->Internals->StateMetadata.GetName(association, index, arrayName))
         {
             SENSEI_ERROR("No array named \"" << arrayName << "\" in "
    -            << sensei::VTKUtils::GetAttributesName(association)
    +            << sensei::SVTKUtils::GetAttributesName(association)
                 << " data")
             return -1;
         }
    @@ -475,7 +475,7 @@ int AmrMeshDataAdaptor::GetMeshHasGhostCells(const std::string &meshName, int &n
     
     //-----------------------------------------------------------------------------
     int AmrMeshDataAdaptor::GetMesh(const std::string &meshName,
    -    bool structureOnly, vtkDataObject *&mesh)
    +    bool structureOnly, svtkDataObject *&mesh)
     {
         amrex::ignore_unused(structureOnly);
     
    @@ -498,8 +498,8 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName,
     
         int nLevels = this->Internals->Mesh->finestLevel() + 1;
     
    -    // initialize new vtk datasets
    -    vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New();
    +    // initialize new svtk datasets
    +    svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New();
     #if SENSEI_VERSION_MAJOR < 3
         Internals->ManagedObjects.push_back(amrMesh);
     #endif
    @@ -560,12 +560,12 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName,
                 int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())};
                 int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())};
     
    -            // vtk's representation of box metadata
    -            vtkAMRBox block(cboxLo, cboxHi);
    +            // svtk's representation of box metadata
    +            svtkAMRBox block(cboxLo, cboxHi);
                 amrMesh->SetAMRBox(i, j, block);
                 amrMesh->SetAMRBlockSourceIndex(i, j, gid++);
     
    -            // skip building a vtk amrMesh for the non local boxes
    +            // skip building a svtk amrMesh for the non local boxes
                 if (dmap[j] != rank)
                     continue;
     
    @@ -580,14 +580,14 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName,
                 int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())};
                 int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())};
     
    -            // new vtk uniform amrMesh, node centered
    -            vtkUniformGrid *ug = vtkUniformGrid::New();
    +            // new svtk uniform amrMesh, node centered
    +            svtkUniformGrid *ug = svtkUniformGrid::New();
                 ug->SetOrigin(origin);
                 ug->SetSpacing(spacing);
                 ug->SetExtent(nboxLo[0], nboxHi[0],
                     nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]);
     
    -            // pass the block into vtk
    +            // pass the block into svtk
                 amrMesh->SetDataSet(i, j, ug);
                 ug->Delete();
             }
    @@ -597,7 +597,7 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh,
    +int AmrMeshDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh,
         const std::string &meshName)
     {
         amrex::ignore_unused(mesh);
    @@ -613,7 +613,7 @@ int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
    +int AmrMeshDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh,
         const std::string &meshName)
     {
         if (meshName != "mesh")
    @@ -622,7 +622,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
             return -1;
         }
     
    -    vtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
    +    svtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
         if (!amrMesh)
         {
             SENSEI_ERROR("Invalid mesh type "
    @@ -701,7 +701,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
                 if (dmap[j] != rank)
                     continue;
     
    -            vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j);
    +            svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j);
     
                 if (!blockMesh)
                 {
    @@ -711,18 +711,18 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
     
                 long nCells = blockMesh->GetNumberOfCells();
     
    -            // transfer mask array into vtk
    -            vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New();
    -            ga->SetName("vtkGhostType");
    +            // transfer mask array into svtk
    +            svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New();
    +            ga->SetName("svtkGhostType");
                 ga->SetArray(mask[j], nCells, 0);
                 blockMesh->GetCellData()->AddArray(ga);
                 ga->Delete();
     
                 // for debug can visualize the ghost cells
                 // FIXME -- a bug in Catalyst ignores internal ghost zones
    -            // when using the VTK writrer. Until that bug gets fixed, one
    +            // when using the SVTK writer. Until that bug gets fixed, one
                 // can manually inject this copy using a PV Python filter
    -            ga = vtkUnsignedCharArray::New();
    +            ga = svtkUnsignedCharArray::New();
                 ga->SetName("GhostType");
                 ga->SetArray(mask[j], nCells, 1);
                 blockMesh->GetCellData()->AddArray(ga);
    @@ -734,7 +734,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh,
     }
     
     //-----------------------------------------------------------------------------
    -int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
    +int AmrMeshDataAdaptor::AddArray(svtkDataObject* mesh,
         const std::string &meshName, int association,
         const std::string &arrayName)
     {
    @@ -747,7 +747,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
             return -1;
         }
     
    -    vtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
    +    svtkOverlappingAMR *amrMesh = dynamic_cast(mesh);
         if (!amrMesh)
         {
             SENSEI_ERROR("Invalid mesh type "
    @@ -760,8 +760,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
             return -1;
         }
     
    -    if ((association != vtkDataObject::CELL) &&
    -        (association != vtkDataObject::CELL))
    +    if ((association != svtkDataObject::CELL) &&
    +        (association != svtkDataObject::CELL))
         {
             SENSEI_ERROR("Invalid association " << association)
             return -1;
    @@ -774,7 +774,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
         if (this->Internals->StateMetadata.GetIndex(arrayName, association, fab, comp))
         {
             SENSEI_ERROR("Failed to locate descriptor for "
    -            << sensei::VTKUtils::GetAttributesName(association)
    +            << sensei::SVTKUtils::GetAttributesName(association)
                 << " data array \"" << arrayName << "\"")
             return -1;
         }
    @@ -792,8 +792,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
             unsigned int ng = state.nGrow();
     
             // check centering
    -        if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) &&
    -            !((association == vtkDataObject::POINT) && state.is_nodal()))
    +        if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) &&
    +            !((association == svtkDataObject::POINT) && state.is_nodal()))
             {
                 SENSEI_ERROR("association does not match MultiFab centering")
                 return -1;
    @@ -824,7 +824,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
                 int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())};
                 int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())};
     
    -            // skip building a vtk mesh for the non local boxes
    +            // skip building a svtk mesh for the non local boxes
                 if (dmap[j] != rank)
                     continue;
     
    @@ -836,7 +836,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
                 int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())};
     
                 // get the block mesh
    -            vtkUniformGrid *ug = amrMesh->GetDataSet(i, j);
    +            svtkUniformGrid *ug = amrMesh->GetDataSet(i, j);
     
                 // node centered size
                 long nlen = 1;
    @@ -851,9 +851,9 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
                 // pointer to the data
                 amrex_real *pcd = state[j].dataPtr(comp);
     
    -            // allocate vtk array
    -            InSituUtils::amrex_tt::vtk_type *da =
    -                InSituUtils::amrex_tt::vtk_type::New();
    +            // allocate svtk array
    +            InSituUtils::amrex_tt::svtk_type *da =
    +                InSituUtils::amrex_tt::svtk_type::New();
     
                 // set component name
                 da->SetName(arrayName.c_str());
    @@ -879,7 +879,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
     
     #if defined(SENSEI_DEBUG)
                 // mark level id
    -            vtkFloatArray *la = vtkFloatArray::New();
    +            svtkFloatArray *la = svtkFloatArray::New();
                 la->SetName("amrex_level_id");
                 la->SetNumberOfTuples(clen);
                 la->Fill(i);
    @@ -887,7 +887,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh,
                 la->Delete();
     
                 // mark mpi rank
    -            vtkFloatArray *ra = vtkFloatArray::New();
    +            svtkFloatArray *ra = svtkFloatArray::New();
                 ra->SetName("amrex_mpi_rank");
                 ra->SetNumberOfTuples(clen);
                 ra->Fill(rank);
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp
    index 55adb1b5c59..cd6b6794171 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp
    @@ -35,7 +35,7 @@ AmrMeshInSituBridge::update(unsigned int step, double time,
             data_adaptor->SetDataSource(mesh, states, names);
             data_adaptor->SetDataTime(time);
             data_adaptor->SetDataTimeStep(step);
    -        ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1;
    +        ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1;
             data_adaptor->ReleaseData();
             data_adaptor->Delete();
     
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H
    index 61e4d510745..fbd5227824f 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H
    @@ -45,10 +45,10 @@ public:
       int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override;
     #endif
       int GetNumberOfMeshes(unsigned int &numMeshes) override;
    -  int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override;
    -  int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
    +  int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override;
    +  int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
       int ReleaseData() override;
     
     protected:
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H
    index a93357d5043..4cbb53203b6 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H
    @@ -148,7 +148,7 @@ template
     int AmrMeshParticleDataAdaptor::GetMesh(
       const std::string &meshName,
       bool structureOnly,
    -  vtkDataObject *&mesh)
    +  svtkDataObject *&mesh)
     {
       if(meshName == m_meshName)
       {
    @@ -164,7 +164,7 @@ int AmrMeshParticleDataAdaptor::
     
     template
     int AmrMeshParticleDataAdaptor::AddGhostNodesArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName)
     {
       if(meshName == m_meshName)
    @@ -181,7 +181,7 @@ int AmrMeshParticleDataAdaptor::
     
     template
     int AmrMeshParticleDataAdaptor::AddGhostCellsArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName)
     {
       if(meshName == m_meshName)
    @@ -198,7 +198,7 @@ int AmrMeshParticleDataAdaptor::
     
     template
     int AmrMeshParticleDataAdaptor::AddArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName,
       int association,
       const std::string &arrayName)
    diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H
    index 886a7df6d18..3f7a945e019 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H
    @@ -43,10 +43,10 @@ public:
       int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override;
     #endif
       int GetNumberOfMeshes(unsigned int &numMeshes) override;
    -  int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override;
    -  int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
    +  int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override;
    +  int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
       int ReleaseData() override;
     
     protected:
    diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H
    index 813466fc0f8..9035cd0c39c 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H
    @@ -146,7 +146,7 @@ template
     int AmrParticleDataAdaptor::GetMesh(
       const std::string &meshName,
       bool structureOnly,
    -  vtkDataObject *&mesh)
    +  svtkDataObject *&mesh)
     {
       if(meshName == m_meshName)
       {
    @@ -162,7 +162,7 @@ int AmrParticleDataAdaptor::GetM
     
     template
     int AmrParticleDataAdaptor::AddGhostNodesArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName)
     {
       if(meshName == m_meshName)
    @@ -179,7 +179,7 @@ int AmrParticleDataAdaptor::AddG
     
     template
     int AmrParticleDataAdaptor::AddGhostCellsArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName)
     {
       if(meshName == m_meshName)
    @@ -196,7 +196,7 @@ int AmrParticleDataAdaptor::AddG
     
     template
     int AmrParticleDataAdaptor::AddArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName,
       int association,
       const std::string &arrayName)
    diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.H b/Src/Extern/SENSEI/AMReX_InSituUtils.H
    index e7c212d7b4e..2799e21b367 100644
    --- a/Src/Extern/SENSEI/AMReX_InSituUtils.H
    +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.H
    @@ -2,10 +2,10 @@
     #define AMReX_InSituUtils_H
     #include 
     
    -#include 
    -#include 
    -#include 
    -#include 
    +#include 
    +#include 
    +#include 
    +#include 
     
     #include 
     #include 
    @@ -47,21 +47,21 @@ enum PointGhostTypes
     
     
     
    -// traits helper for mapping between amrex_real and vtkDataArray
    +// traits helper for mapping between amrex_real and svtkDataArray
     template  struct amrex_tt {};
     
    -#define amrex_tt_specialize(cpp_t, vtk_t, vtk_t_e)      \
    +#define amrex_tt_specialize(cpp_t, svtk_t, svtk_t_e)    \
     template <>                                             \
     struct amrex_tt                                  \
     {                                                       \
    -    using vtk_type = vtk_t;                             \
    +    using svtk_type = svtk_t;                           \
                                                             \
         static                                              \
    -    constexpr int vtk_type_enum() { return vtk_t_e; }   \
    +    constexpr int svtk_type_enum() { return svtk_t_e; } \
     };
     
    -amrex_tt_specialize(float, vtkFloatArray, VTK_FLOAT)
    -amrex_tt_specialize(double, vtkDoubleArray, VTK_DOUBLE)
    +amrex_tt_specialize(float, svtkFloatArray, SVTK_FLOAT)
    +amrex_tt_specialize(double, svtkDoubleArray, SVTK_DOUBLE)
     
     
     // helpers to modify values
    diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp
    index 64c429e8bb6..d13c8738aeb 100644
    --- a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp
    +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp
    @@ -1,7 +1,7 @@
     #include "AMReX_InSituUtils.H"
     
     #include "Error.h"
    -#include "VTKUtils.h"
    +#include "SVTKUtils.h"
     
     namespace amrex {
     namespace InSituUtils {
    @@ -14,7 +14,7 @@ int StateMap::GetIndex(const std::string &name, int centering,
     
         if (cit == this->Map.end())
         {
    -        SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering)
    +        SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering)
               << " arrays")
             return -1;
         }
    @@ -23,7 +23,7 @@ int StateMap::GetIndex(const std::string &name, int centering,
         if (nit == cit->second.end())
         {
             SENSEI_ERROR("No array named \"" << name  << "\" in "
    -            << sensei::VTKUtils::GetAttributesName(centering)
    +            << sensei::SVTKUtils::GetAttributesName(centering)
                 << " centered data")
             return -1;
         }
    @@ -41,7 +41,7 @@ int StateMap::GetName(int centering, int id, std::string &name)
     
         if (cit == this->Map.end())
         {
    -        SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering)
    +        SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering)
               << " arrays")
             return -1;
         }
    diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H
    index 73ca142ec0b..f284b15831b 100644
    --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H
    +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H
    @@ -8,7 +8,7 @@
     #include 
     
     #include 
    -class vtkPolyData;
    +class svtkPolyData;
     
     namespace amrex
     {
    @@ -40,22 +40,22 @@ public:
       void SetPinMesh(int val);
     
       // get particle id numbers
    -  int AddParticlesIDArray(vtkDataObject* mesh);
    +  int AddParticlesIDArray(svtkDataObject* mesh);
     
       // get particle cpu numbers (process each particle was generated on)
    -  int AddParticlesCPUArray(vtkDataObject* mesh);
    +  int AddParticlesCPUArray(svtkDataObject* mesh);
     
       // get particle integer arrays in Structs of Arrays format
    -  int AddParticlesSOAIntArray(const std::string &arrayName, vtkDataObject* mesh);
    +  int AddParticlesSOAIntArray(const std::string &arrayName, svtkDataObject* mesh);
     
       // get particle real arrays in Structs of Arrays format
    -  int AddParticlesSOARealArray(const std::string &arrayName, vtkDataObject* mesh);
    +  int AddParticlesSOARealArray(const std::string &arrayName, svtkDataObject* mesh);
     
       // get particle integer arrays in Array Of Structs format
    -  int AddParticlesAOSIntArray(const std::string &arrayName, vtkDataObject* mesh);
    +  int AddParticlesAOSIntArray(const std::string &arrayName, svtkDataObject* mesh);
     
       // get particle real arrays in Array Of Structs format
    -  int AddParticlesAOSRealArray(const std::string &arrayName, vtkDataObject* mesh);
    +  int AddParticlesAOSRealArray(const std::string &arrayName, svtkDataObject* mesh);
     
       // SENSEI API
     #if SENSEI_VERSION_MAJOR >= 3
    @@ -68,10 +68,10 @@ public:
       int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override;
     #endif
       int GetNumberOfMeshes(unsigned int &numMeshes) override;
    -  int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override;
    -  int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override;
    -  int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
    +  int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override;
    +  int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override;
    +  int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override;
       int ReleaseData() override;
     
     protected:
    @@ -79,7 +79,7 @@ protected:
       ~ParticleDataAdaptor() = default;
     
     private:
    -  vtkPolyData* BuildParticles();
    +  svtkPolyData* BuildParticles();
     
       const std::string m_particlesName = "particles";
     
    diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H
    index 26174f83e1b..8a2d15562d3 100644
    --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H
    +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H
    @@ -1,13 +1,13 @@
     #include "Profiler.h"
     #include "Error.h"
    -#include "VTKUtils.h"
    +#include "SVTKUtils.h"
     #include "MeshMetadata.h"
    -// vtk includes
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    +// svtk includes
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
     
     
     
    @@ -194,7 +194,7 @@ int ParticleDataAdaptor::GetNumb
       unsigned int &numberOfArrays)
     {
       numberOfArrays = 0;
    -  if(association == vtkDataObject::POINT)
    +  if(association == svtkDataObject::POINT)
       {
         numberOfArrays = m_realStructs.size()
                        + m_intStructs.size()
    @@ -213,7 +213,7 @@ int ParticleDataAdaptor::GetArra
       unsigned int index,
       std::string &arrayName)
     {
    -  if(association == vtkDataObject::POINT)
    +  if(association == svtkDataObject::POINT)
       {
         if(index < m_realStructs.size())
         {
    @@ -253,7 +253,7 @@ template 
     int ParticleDataAdaptor::GetMesh(
       const std::string &meshName,
       bool structureOnly,
    -  vtkDataObject *&mesh)
    +  svtkDataObject *&mesh)
     {
       mesh = nullptr;
       int nprocs = 1;
    @@ -266,7 +266,7 @@ int ParticleDataAdaptor::GetMesh
         SENSEI_ERROR("No mesh named \"" << meshName << "\"")
         return -1;
       }
    -  vtkMultiBlockDataSet* mb = vtkMultiBlockDataSet::New();
    +  svtkMultiBlockDataSet* mb = svtkMultiBlockDataSet::New();
     
       if (structureOnly)
       {
    @@ -275,7 +275,7 @@ int ParticleDataAdaptor::GetMesh
       }
     
       mb->SetNumberOfBlocks(nprocs);
    -  vtkPolyData *pd = BuildParticles();
    +  svtkPolyData *pd = BuildParticles();
       mb->SetBlock(rank, pd);
       pd->Delete();
       mesh = mb;
    @@ -286,7 +286,7 @@ int ParticleDataAdaptor::GetMesh
     //-----------------------------------------------------------------------------
     template 
     int ParticleDataAdaptor::AddGhostNodesArray(
    -  vtkDataObject*,
    +  svtkDataObject*,
       const std::string &meshName)
     {
       if (meshName != m_particlesName)
    @@ -300,7 +300,7 @@ int ParticleDataAdaptor::AddGhos
     //-----------------------------------------------------------------------------
     template 
     int ParticleDataAdaptor::AddGhostCellsArray(
    -  vtkDataObject*,
    +  svtkDataObject*,
       const std::string &meshName)
     {
       if (meshName != m_particlesName)
    @@ -314,7 +314,7 @@ int ParticleDataAdaptor::AddGhos
     //-----------------------------------------------------------------------------
     template 
     int ParticleDataAdaptor::AddArray(
    -  vtkDataObject* mesh,
    +  svtkDataObject* mesh,
       const std::string &meshName,
       int association,
       const std::string &arrayName)
    @@ -325,7 +325,7 @@ int ParticleDataAdaptor::AddArra
         return -1;
       }
     
    -  if (association != vtkDataObject::POINT)
    +  if (association != svtkDataObject::POINT)
       {
         SENSEI_ERROR("Invalid association " << association);
         return -1;
    @@ -393,10 +393,10 @@ int ParticleDataAdaptor::GetMesh
       metadata->MeshName = m_particlesName;
     
       // container mesh type (all)
    -  metadata->MeshType = VTK_MULTIBLOCK_DATA_SET;
    +  metadata->MeshType = SVTK_MULTIBLOCK_DATA_SET;
     
       // block mesh type (all)
    -  metadata->BlockType = VTK_POLY_DATA;
    +  metadata->BlockType = SVTK_POLY_DATA;
     
       // global number of blocks (all)
       metadata->NumBlocks = nprocs;
    @@ -412,9 +412,9 @@ int ParticleDataAdaptor::GetMesh
     
       // type enum of point data (unstructured, optional)
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -  metadata->CoordinateType = VTK_FLOAT;
    +  metadata->CoordinateType = SVTK_FLOAT;
     #else
    -  metadata->CoordinateType = VTK_DOUBLE;
    +  metadata->CoordinateType = SVTK_DOUBLE;
     #endif
     
       // total number of points in all blocks (all, optional)
    @@ -467,19 +467,19 @@ int ParticleDataAdaptor::GetMesh
       metadata->ArrayCentering = {};
       for(auto s : m_realStructs)
       {
    -    metadata->ArrayCentering.push_back(vtkDataObject::POINT);
    +    metadata->ArrayCentering.push_back(svtkDataObject::POINT);
       }
       for(auto s : m_intStructs)
       {
    -    metadata->ArrayCentering.push_back(vtkDataObject::POINT);
    +    metadata->ArrayCentering.push_back(svtkDataObject::POINT);
       }
       for(auto s : m_realArrays)
       {
    -    metadata->ArrayCentering.push_back(vtkDataObject::POINT);
    +    metadata->ArrayCentering.push_back(svtkDataObject::POINT);
       }
       for(auto s : m_intArrays)
       {
    -    metadata->ArrayCentering.push_back(vtkDataObject::POINT);
    +    metadata->ArrayCentering.push_back(svtkDataObject::POINT);
       }
     
       // number of components of each array (all)
    @@ -506,26 +506,26 @@ int ParticleDataAdaptor::GetMesh
       for(auto s : m_realStructs)
       {
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -    metadata->ArrayType.push_back(VTK_FLOAT);
    +    metadata->ArrayType.push_back(SVTK_FLOAT);
     #else
    -    metadata->ArrayType.push_back(VTK_DOUBLE);
    +    metadata->ArrayType.push_back(SVTK_DOUBLE);
     #endif
       }
       for(auto s : m_intStructs)
       {
    -    metadata->ArrayType.push_back(VTK_INT);
    +    metadata->ArrayType.push_back(SVTK_INT);
       }
       for(auto s : m_realArrays)
       {
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -    metadata->ArrayType.push_back(VTK_FLOAT);
    +    metadata->ArrayType.push_back(SVTK_FLOAT);
     #else
    -    metadata->ArrayType.push_back(VTK_DOUBLE);
    +    metadata->ArrayType.push_back(SVTK_DOUBLE);
     #endif
       }
       for(auto s : m_intArrays)
       {
    -    metadata->ArrayType.push_back(VTK_INT);
    +    metadata->ArrayType.push_back(SVTK_INT);
       }
     
       // global min,max of each array (all, optional)
    @@ -646,19 +646,19 @@ int ParticleDataAdaptor::GetMesh
     
     //-----------------------------------------------------------------------------
     template 
    -vtkPolyData* ParticleDataAdaptor::BuildParticles()
    +svtkPolyData* ParticleDataAdaptor::BuildParticles()
     {
       // return particle data pd
    -  vtkPolyData* pd  = vtkPolyData::New();
    +  svtkPolyData* pd  = svtkPolyData::New();
     
       const auto& particles = this->m_particles->GetParticles();
       long long numParticles = this->m_particles->TotalNumberOfParticles(true, true);
     
       // allocate vertex storage for particles
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -  vtkNew coords;
    +  svtkNew coords;
     #else
    -  vtkNew coords;
    +  svtkNew coords;
     #endif
       coords->SetName("coords");
       coords->SetNumberOfComponents(3);
    @@ -669,12 +669,12 @@ vtkPolyData* ParticleDataAdaptor
       double *pCoords = coords->GetPointer(0);
     #endif
     
    -  // use this to index into the VTK array as we copy level by level and tile by
    +  // use this to index into the SVTK array as we copy level by level and tile by
       // tile
       long long ptId = 0;
     
       // allocate connectivity array for particles
    -  vtkNew vertex;
    +  svtkNew vertex;
       vertex->AllocateExact(numParticles, 1);
     
       // points->SetNumberOfPoints(numParticles);
    @@ -717,8 +717,8 @@ vtkPolyData* ParticleDataAdaptor
         }
       }
     
    -  // pass the particle coordinates into VTK's point data structure.
    -  vtkNew points;
    +  // pass the particle coordinates into SVTK's point data structure.
    +  svtkNew points;
       points->SetData(coords);
     
       // add point and vertex data to output mesh
    @@ -731,14 +731,14 @@ vtkPolyData* ParticleDataAdaptor
     //-----------------------------------------------------------------------------
     template 
     int ParticleDataAdaptor::AddParticlesIDArray(
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
    -  auto vtk_particles = dynamic_cast(mesh);
    +  auto svtk_particles = dynamic_cast(mesh);
       const auto& particles = this->m_particles->GetParticles();
       auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true);
     
    - // allocate a VTK array for the data
    -  vtkNew idArray;
    + // allocate a SVTK array for the data
    +  svtkNew idArray;
       idArray->SetName("id");
       idArray->SetNumberOfComponents(1);
       idArray->SetNumberOfValues(nptsOnProc);
    @@ -767,8 +767,8 @@ int ParticleDataAdaptor::AddPart
         }
       }
     
    -  // the association for this array is vtkDataObject::POINT
    -  vtk_particles->GetPointData()->AddArray(idArray);
    +  // the association for this array is svtkDataObject::POINT
    +  svtk_particles->GetPointData()->AddArray(idArray);
     
       return 0;
     }
    @@ -776,14 +776,14 @@ int ParticleDataAdaptor::AddPart
     //-----------------------------------------------------------------------------
     template 
     int ParticleDataAdaptor::AddParticlesCPUArray(
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
    -  auto vtk_particles = dynamic_cast(mesh);
    +  auto svtk_particles = dynamic_cast(mesh);
       const auto& particles = this->m_particles->GetParticles();
       auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true);
     
    -  // allocate a VTK array for the data
    -  vtkNew cpuArray;
    +  // allocate a SVTK array for the data
    +  svtkNew cpuArray;
       cpuArray->SetName("cpu");
       cpuArray->SetNumberOfComponents(1);
       cpuArray->SetNumberOfValues(nptsOnProc);
    @@ -811,8 +811,8 @@ int ParticleDataAdaptor::AddPart
         }
       }
     
    -  // the association for this array is vtkDataObject::POINT
    -  vtk_particles->GetPointData()->AddArray(cpuArray);
    +  // the association for this array is svtkDataObject::POINT
    +  svtk_particles->GetPointData()->AddArray(cpuArray);
     
       return 0;
     }
    @@ -821,7 +821,7 @@ int ParticleDataAdaptor::AddPart
     template 
     int ParticleDataAdaptor::AddParticlesSOARealArray(
       const std::string &arrayName,
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
       const long nParticles = this->m_particles->TotalNumberOfParticles(true, true);
     
    @@ -847,11 +847,11 @@ int ParticleDataAdaptor::AddPart
         }
       }
     
    -  // allocate the vtkArray
    +  // allocate the svtkArray
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -  vtkNew data;
    +  svtkNew data;
     #else
    -  vtkNew data;
    +  svtkNew data;
     #endif
       data->SetName(arrayName.c_str());
       data->SetNumberOfComponents(nComps);
    @@ -896,9 +896,9 @@ int ParticleDataAdaptor::AddPart
       int rank = 0;
       MPI_Comm_rank(this->GetCommunicator(), &rank);
     
    -  auto blocks = dynamic_cast(mesh);
    +  auto blocks = dynamic_cast(mesh);
     
    -  auto block = dynamic_cast(blocks->GetBlock(rank));
    +  auto block = dynamic_cast(blocks->GetBlock(rank));
       block->GetPointData()->AddArray(data);
     
       return 0;
    @@ -908,7 +908,7 @@ int ParticleDataAdaptor::AddPart
     template 
     int ParticleDataAdaptor::AddParticlesSOAIntArray(
       const std::string &arrayName,
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
       // get the particles from the particle container
       auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true);
    @@ -931,7 +931,7 @@ int ParticleDataAdaptor::AddPart
         return -1;
       }
     
    -  vtkNew data;
    +  svtkNew data;
       data->SetName(arrayName.c_str());
       data->SetNumberOfComponents(1);
       data->SetNumberOfValues(nptsOnProc);
    @@ -967,9 +967,9 @@ int ParticleDataAdaptor::AddPart
       int rank = 0;
       MPI_Comm_rank(this->GetCommunicator(), &rank);
     
    -  auto blocks = dynamic_cast(mesh);
    +  auto blocks = dynamic_cast(mesh);
     
    -  auto block = dynamic_cast(blocks->GetBlock(rank));
    +  auto block = dynamic_cast(blocks->GetBlock(rank));
       block->GetPointData()->AddArray(data);
     
       return 0;
    @@ -979,7 +979,7 @@ int ParticleDataAdaptor::AddPart
     template 
     int ParticleDataAdaptor::AddParticlesAOSRealArray(
       const std::string &arrayName,
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
       // get the particles from the particle container
       const auto& particles = this->m_particles->GetParticles();
    @@ -1007,11 +1007,11 @@ int ParticleDataAdaptor::AddPart
         }
       }
     
    -  // allocate the vtk array
    +  // allocate the svtk array
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
    -  vtkNew data;
    +  svtkNew data;
     #else
    -  vtkNew data;
    +  svtkNew data;
     #endif
     
       data->SetName(arrayName.c_str());
    @@ -1053,9 +1053,9 @@ int ParticleDataAdaptor::AddPart
       int rank = 0;
       MPI_Comm_rank(this->GetCommunicator(), &rank);
     
    -  auto blocks = dynamic_cast(mesh);
    +  auto blocks = dynamic_cast(mesh);
     
    -  auto block = dynamic_cast(blocks->GetBlock(rank));
    +  auto block = dynamic_cast(blocks->GetBlock(rank));
       block->GetPointData()->AddArray(data);
     
       return 0;
    @@ -1065,7 +1065,7 @@ int ParticleDataAdaptor::AddPart
     template 
     int ParticleDataAdaptor::AddParticlesAOSIntArray(
       const std::string &arrayName,
    -  vtkDataObject* mesh)
    +  svtkDataObject* mesh)
     {
       // get the particles from the particle container
       const auto& particles = this->m_particles->GetParticles();
    @@ -1090,8 +1090,8 @@ int ParticleDataAdaptor::AddPart
         return -1;
       }
     
    -  // allocate vtkArray
    -  vtkNew data;
    +  // allocate svtkArray
    +  svtkNew data;
       data->SetName(arrayName.c_str());
       data->SetNumberOfComponents(1);
       data->SetNumberOfValues(nptsOnProc);
    @@ -1121,9 +1121,9 @@ int ParticleDataAdaptor::AddPart
       int rank = 0;
       MPI_Comm_rank(this->GetCommunicator(), &rank);
     
    -  auto blocks = dynamic_cast(mesh);
    +  auto blocks = dynamic_cast(mesh);
     
    -  auto block = dynamic_cast(blocks->GetBlock(rank));
    +  auto block = dynamic_cast(blocks->GetBlock(rank));
       block->GetPointData()->AddArray(data);
     
     
    diff --git a/Tools/CMake/AMReXThirdPartyLibraries.cmake b/Tools/CMake/AMReXThirdPartyLibraries.cmake
    index 1afbcac4ee2..2b0a90febe1 100644
    --- a/Tools/CMake/AMReXThirdPartyLibraries.cmake
    +++ b/Tools/CMake/AMReXThirdPartyLibraries.cmake
    @@ -45,7 +45,7 @@ endif ()
     # Sensei
     #
     if (AMReX_SENSEI)
    -    find_package(SENSEI REQUIRED)
    +    find_package( SENSEI 4.0.0 REQUIRED )
         target_link_libraries( amrex PUBLIC sensei )
     endif ()
     
    
    From e4c83cfddc8afb1bd091c45a6ad3040d23f019bc Mon Sep 17 00:00:00 2001
    From: Jon Rood 
    Date: Wed, 29 Jun 2022 11:08:42 -0600
    Subject: [PATCH 011/111] Add lib64 library location for ZFP since it may exist
     there instead of lib. (#2860)
    
    ---
     Tools/GNUMake/packages/Make.hdf5 | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Tools/GNUMake/packages/Make.hdf5 b/Tools/GNUMake/packages/Make.hdf5
    index d09fe43a082..35e2ff3e404 100644
    --- a/Tools/GNUMake/packages/Make.hdf5
    +++ b/Tools/GNUMake/packages/Make.hdf5
    @@ -27,7 +27,7 @@ ifeq ($(USE_HDF5_ZFP),TRUE)
           ZFP_ABSPATH = $(abspath $(ZFP_HOME))
           H5Z_ABSPATH = $(abspath $(H5Z_HOME))
           INCLUDE_LOCATIONS += $(ZFP_ABSPATH)/include $(H5Z_ABSPATH)/include
    -      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(H5Z_ABSPATH)/lib
    +      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(ZFP_ABSPATH)/lib64 $(H5Z_ABSPATH)/lib
           LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib
         endif
       endif
    
    From 6f9a46c7e834046970d46d684927a078671355bc Mon Sep 17 00:00:00 2001
    From: PaulMullowney <60452402+PaulMullowney@users.noreply.github.com>
    Date: Wed, 29 Jun 2022 11:09:57 -0600
    Subject: [PATCH 012/111] Adding control APIs and namespacing for core
     algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859)
    
    Co-authored-by: Paul Mullowney 
    ---
     Src/Base/AMReX.cpp | 16 ++++++++++++++--
     1 file changed, 14 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp
    index f06806babcd..2c454166126 100644
    --- a/Src/Base/AMReX.cpp
    +++ b/Src/Base/AMReX.cpp
    @@ -123,6 +123,11 @@ namespace {
     #ifdef AMREX_USE_HYPRE
     namespace {
         int init_hypre = 1;
    +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
    +    int hypre_spgemm_use_vendor = 0;
    +    int hypre_spmv_use_vendor = 0;
    +    int hypre_sptrans_use_vendor = 0;
    +#endif
     }
     #endif
     
    @@ -489,6 +494,11 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
     
     #ifdef AMREX_USE_HYPRE
             pp.queryAdd("init_hypre", init_hypre);
    +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
    +        pp.queryAdd("hypre_spgemm_use_vendor", hypre_spgemm_use_vendor);
    +        pp.queryAdd("hypre_spmv_use_vendor", hypre_spmv_use_vendor);
    +        pp.queryAdd("hypre_sptrans_use_vendor", hypre_sptrans_use_vendor);
    +#endif
     #endif
         }
     
    @@ -526,7 +536,7 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
     #ifdef AMREX_USE_HYPRE
         if (init_hypre) {
             HYPRE_Init();
    -#ifdef HYPRE_USING_CUDA
    +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
     
     #if defined(HYPRE_RELEASE_NUMBER) && (HYPRE_RELEASE_NUMBER >= 22400)
     
    @@ -543,7 +553,9 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
     #endif
             /* This API below used to be HYPRE_SetSpGemmUseCusparse(). This was changed in commit
                Hypre master commit dfdd1cd12f */
    -        HYPRE_SetSpGemmUseVendor(false);
    +        HYPRE_SetSpGemmUseVendor(hypre_spgemm_use_vendor);
    +        HYPRE_SetSpMVUseVendor(hypre_spmv_use_vendor);
    +        HYPRE_SetSpTransUseVendor(hypre_sptrans_use_vendor);
             HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE);
             HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE);
             HYPRE_SetUseGpuRand(true);
    
    From 8fb23ec17a58284af6bdafbcda3eea0d86d8ce69 Mon Sep 17 00:00:00 2001
    From: Jon Rood 
    Date: Wed, 29 Jun 2022 16:52:35 -0600
    Subject: [PATCH 013/111] Refactor Make.nrel to use MPT for MPI with the Intel
     compiler on Eagle. (#2861)
    
    ---
     Tools/GNUMake/sites/Make.nrel | 43 +++++++++++++++++++----------------
     1 file changed, 24 insertions(+), 19 deletions(-)
    
    diff --git a/Tools/GNUMake/sites/Make.nrel b/Tools/GNUMake/sites/Make.nrel
    index 68ac8e5116f..ca705698ea1 100644
    --- a/Tools/GNUMake/sites/Make.nrel
    +++ b/Tools/GNUMake/sites/Make.nrel
    @@ -40,27 +40,32 @@ else ifeq ($(which_computer), rhodes)
       endif
     endif
     
    -# Account for Intel-MPI, MPICH, OpenMPI, and HPE MPT
     ifeq ($(USE_MPI),TRUE)
    +  CXX := mpicxx
    +  CC  := mpicc
    +  FC  := mpif90
    +  F90 := mpif90
       ifeq ($(COMP), intel)
    -    CXX := mpiicpc
    -    CC  := mpiicc
    -    FC  := mpiifort
    -    F90 := mpiifort
    -  else
    -    CXX := mpicxx
    -    CC  := mpicc
    -    FC  := mpif90
    -    F90 := mpif90
    -    ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -link_info)
    -      LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    -    else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -showme:link)
    -      LIBRARIES += $(mpif90_link_flags)
    -    else
    -      # MPT case (no option available to query link flags)
    -      LIBRARIES += -lmpi
    +    ifeq ($(which_computer), eagle)
    +        # Always assume MPT on Eagle
    +        export MPICXX_CXX := icpc
    +        export MPICC_CC   := icc
    +        export MPIF90_F90 := ifort
    +    else ifeq ($(which_computer), rhodes)
    +        CXX := mpiicpc
    +        CC  := mpiicc
    +        FC  := mpiifort
    +        F90 := mpiifort
         endif
       endif
    +  ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -link_info)
    +    LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    +  else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -showme:link)
    +    LIBRARIES += $(mpif90_link_flags)
    +  else
    +    # MPT case (no option available to query link flags)
    +    LIBRARIES += -lmpi
    +  endif
     endif
    
    From be813d024e6b314e41c727734b8e53481898e08e Mon Sep 17 00:00:00 2001
    From: Weiqun Zhang 
    Date: Fri, 1 Jul 2022 10:29:13 -0700
    Subject: [PATCH 014/111] Hypre: add version check (#2865)
    
    These HYPRE_SetSp* are only available in hypre >= 22500.
    ---
     Src/Base/AMReX.cpp | 6 ++++--
     1 file changed, 4 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp
    index 2c454166126..76488bf81e0 100644
    --- a/Src/Base/AMReX.cpp
    +++ b/Src/Base/AMReX.cpp
    @@ -551,11 +551,13 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
             HYPRE_SetGPUMemoryPoolSize( mempool_bin_growth, mempool_min_bin,
                                         mempool_max_bin, mempool_max_cached_bytes );
     #endif
    -        /* This API below used to be HYPRE_SetSpGemmUseCusparse(). This was changed in commit
    -           Hypre master commit dfdd1cd12f */
    +#if (HYPRE_RELEASE_NUMBER >= 22500)
             HYPRE_SetSpGemmUseVendor(hypre_spgemm_use_vendor);
             HYPRE_SetSpMVUseVendor(hypre_spmv_use_vendor);
             HYPRE_SetSpTransUseVendor(hypre_sptrans_use_vendor);
    +#elif (HYPRE_USING_CUDA)
    +        HYPRE_SetSpGemmUseCusparse(hypre_spgemm_use_vendor);
    +#endif
             HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE);
             HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE);
             HYPRE_SetUseGpuRand(true);
    
    From d736ef299b724b96b34d41103dfc5318d0ecdee4 Mon Sep 17 00:00:00 2001
    From: Weiqun Zhang 
    Date: Fri, 1 Jul 2022 11:00:15 -0700
    Subject: [PATCH 015/111] Update CHANGES for 22.07 (#2866)
    
    ---
     CHANGES | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     1 file changed, 76 insertions(+)
    
    diff --git a/CHANGES b/CHANGES
    index 8104566abe2..7982e1ddcb8 100644
    --- a/CHANGES
    +++ b/CHANGES
    @@ -1,3 +1,79 @@
    +# 22.07
    +
    +  -- Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859)
    +
    +  -- update the SENSEI in situ coupling for SENSEI v4.0.0 (#2785)
    +
    +  -- Write runtime attribs to checkpoints on GPUs (#2856)
    +
    +  -- Fix gnu make on Crusher for mpi_gtl_hsa (#2857)
    +
    +  -- CMake: FindDependency CUDAToolkit (#2849)
    +
    +  -- NERSC Programming Environment prototype (#2848)
    +
    +  -- GNU Make: No need to query mpif90 if Fortran is not used. (#2852)
    +
    +  -- Remove f90doc (#2851)
    +
    +  -- Explicitly invoke python3 (#2850)
    +
    +  -- Maintain the high end of the 'roundoff domain' in both float and double precision (#2839)
    +
    +  -- add Ok to coordsys (#2844)
    +
    +  -- ParamParse: Add Files at Runtime (#2842)
    +
    +  -- Fix a pathological case for 2d EB (#2840)
    +
    +  -- add fvolumesum to GNUmakefile (#2836)
    +
    +  -- Clamp particles shifted from plo boundary against rhi, rather than back to plo (#2814)
    +
    +  -- Fix: CMake NVTX not only Hypre (#2837)
    +
    +  -- Update sensei CI container for sensei v4.0 integration (#2834)
    +
    +  -- HIP Memory Advise : Set managed memory to coarse grain (#2835)
    +
    +  -- CMake: Fix `export` with `AMReX_INSTALL=OFF` (#2838)
    +
    +  -- make PODVector work with PolymorphicArenaAllocator (#2829)
    +
    +  -- Re-implement FaceLinear::interp() for InterpFromCoarseLevel (#2831)
    +
    +  -- Make regrid method of Amr class public (#2833)
    +
    +  -- amrex::Any (#2827)
    +
    +  -- Fix line integral computation (#2830)
    +
    +  -- Fix a bug in multigrid grids (#2823)
    +
    +  -- Add html, additional sections to README.md (#2775)
    +
    +  -- Allow StateDataPhysBCFunct to operate on face-centered data (#2819)
    +
    +  -- Fix Parser ODR (#2820)
    +
    +  -- CMake: Cleanup old nvToolsExt (#2817)
    +
    +  -- Handle the case where we don't have enough device memory for the snd_buffer (#2705)
    +
    +  -- CMake: 3.17+ (#2813)
    +
    +  -- Landon/fix bug ghost particles (#2812)
    +
    +  -- Follow-on to 2809; update selectActualNeighbors as well. (#2810)
    +
    +  -- Generalize the type of callables that can be passed into the neighbor list build function (#2809)
    +
    +  -- Add AVX2 instructions flag. (#2803)
    +
    +  -- Avoid M_PI because it's not in the C++ standard (#2807)
    +
    +  -- In the array version of FillPatchTwoLevels, allow specifying an (#2800)
    +
     # 22.06
     
       -- Fix solvability issue in the nodal solver RAP approach (#2783, #2801)
    
    From 19c70685cdb0c3322712e9f442092b1140cfe7ec Mon Sep 17 00:00:00 2001
    From: Erik 
    Date: Fri, 1 Jul 2022 18:24:24 -0400
    Subject: [PATCH 016/111] Carry over fix for ngbxy.smallEnd typo (#2868)
    
    This a typo that got correct in other places but didn't get fixed here.
    ---
     Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp
    index 995393e05f8..4dc1076dec8 100644
    --- a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp
    +++ b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp
    @@ -39,7 +39,7 @@ AmrCoreAdv::DefineVelocityAtLevel (int lev, Real time)
                                                                           facevel[lev][2].array(mfi)) };
     
                 const Box& psibox = Box(IntVect(AMREX_D_DECL(std::min(ngbxx.smallEnd(0)-1, ngbxy.smallEnd(0)-1),
    -                                                         std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(0)-1),
    +                                                         std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(1)-1),
                                                              0)),
                                         IntVect(AMREX_D_DECL(std::max(ngbxx.bigEnd(0),   ngbxy.bigEnd(0)+1),
                                                              std::max(ngbxx.bigEnd(1)+1, ngbxy.bigEnd(1)),
    
    From 2b42fb56a96e752d301916ca23160098c5369386 Mon Sep 17 00:00:00 2001
    From: drangara <69211175+drangara@users.noreply.github.com>
    Date: Fri, 1 Jul 2022 18:44:35 -0400
    Subject: [PATCH 017/111] Remove some hard checks in check_mvmc for 3D (#2864)
    
    Removing some hard checks in 3D coarsening logic as it appears that those are not necessarily bad states, and a soft failure to coarsen should suffice.
    ---
     Src/EB/AMReX_EB2_3D_C.H | 18 ------------------
     1 file changed, 18 deletions(-)
    
    diff --git a/Src/EB/AMReX_EB2_3D_C.H b/Src/EB/AMReX_EB2_3D_C.H
    index 14543f81d25..3ea77f149fe 100644
    --- a/Src/EB/AMReX_EB2_3D_C.H
    +++ b/Src/EB/AMReX_EB2_3D_C.H
    @@ -200,11 +200,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nxm = 0;
         } else if (n == 2) {
             nxm = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xlo-face");
         }
     
         int nxp = -1;
    @@ -213,11 +210,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nxp = 0;
         } else if (n == 2) {
             nxp = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xhi-face");
         }
     
         // y-faces
    @@ -227,11 +221,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nym = 0;
         } else if (n == 2) {
             nym = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on ylo-face");
         }
     
         int nyp = -1;
    @@ -240,11 +231,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nyp = 0;
         } else if (n == 2) {
             nyp = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on yhi-face");
         }
     
         // z-faces
    @@ -254,11 +242,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nzm = 0;
         } else if (n == 2) {
             nzm = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zlo-face");
         }
     
         int nzp = -1;
    @@ -267,11 +252,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine)
             nzp = 0;
         } else if (n == 2) {
             nzp = 1;
    -    } else if (n == 4) {
    -        ierr = 1;
         } else {
             ierr = 1;
    -        amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zhi-face");
         }
     
         if (nxm == 1 && nym == 1 && nzm == 1 && nxp == 1 && nyp == 1 && nzp == 1) {
    
    From dc8b734b6a70583602150cfbee1b7d51f8dacdeb Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Fri, 1 Jul 2022 17:19:20 -0700
    Subject: [PATCH 018/111] Cache the neighbor comm tags for the CPU
     implementation of fillNeighbors. (#2862)
    
    * Cache the neighbor comm tags for the CPU implementation of fillNeighbors.
    
    * fix areMasksValid function
    ---
     Src/Particle/AMReX_NeighborParticles.H        |  5 ++
     Src/Particle/AMReX_NeighborParticlesCPUImpl.H |  6 +-
     Src/Particle/AMReX_NeighborParticlesI.H       | 55 +++++++++++++------
     3 files changed, 46 insertions(+), 20 deletions(-)
    
    diff --git a/Src/Particle/AMReX_NeighborParticles.H b/Src/Particle/AMReX_NeighborParticles.H
    index 36d2c5351d7..344d39f778e 100644
    --- a/Src/Particle/AMReX_NeighborParticles.H
    +++ b/Src/Particle/AMReX_NeighborParticles.H
    @@ -348,6 +348,11 @@ protected:
         ///
         void BuildMasks ();
     
    +    ///
    +    /// Are the masks computed by the above function still valid?
    +    ///
    +    bool areMasksValid ();
    +
         void GetNeighborCommTags ();
     
         void GetCommTagsBox (Vector& tags, const int lev, const Box& in_box);
    diff --git a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H
    index d5fb9fc40ee..4d5ecb4fcc8 100644
    --- a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H
    +++ b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H
    @@ -7,8 +7,10 @@ void
     NeighborParticleContainer
     ::fillNeighborsCPU () {
         BL_PROFILE("NeighborParticleContainer::fillNeighborsCPU");
    -    BuildMasks();
    -    GetNeighborCommTags();
    +    if (!areMasksValid()) {
    +        BuildMasks();
    +        GetNeighborCommTags();
    +    }
         cacheNeighborInfo();
         updateNeighborsCPU(false);
     }
    diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H
    index a07cfab92c4..fc5788145b7 100644
    --- a/Src/Particle/AMReX_NeighborParticlesI.H
    +++ b/Src/Particle/AMReX_NeighborParticlesI.H
    @@ -119,6 +119,30 @@ NeighborParticleContainer
         this->Redistribute();
     }
     
    +template 
    +bool
    +NeighborParticleContainer
    +::areMasksValid () {
    +
    +    BL_PROFILE("NeighborParticleContainer::areMasksValid");
    +
    +    resizeContainers(this->numLevels());
    +
    +    for (int lev = 0; lev < this->numLevels(); ++lev)
    +    {
    +        BoxArray ba = this->ParticleBoxArray(lev);
    +        const DistributionMapping& dmap = this->ParticleDistributionMap(lev);
    +
    +        if (mask_ptr[lev] == nullptr ||
    +            ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) ||
    +            ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap))
    +        {
    +            return false;
    +        }
    +    }
    +    return true;
    +}
    +
     template 
     void
     NeighborParticleContainer
    @@ -136,30 +160,25 @@ NeighborParticleContainer
             BoxArray ba = this->ParticleBoxArray(lev);
             const DistributionMapping& dmap = this->ParticleDistributionMap(lev);
     
    -        if (mask_ptr[lev] == nullptr ||
    -            ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) ||
    -            ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap))
    -        {
    -            const Geometry& geom = this->Geom(lev);
    +        const Geometry& geom = this->Geom(lev);
     
    -            mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells);
    -            mask_ptr[lev]->setVal(-1, m_num_neighbor_cells);
    +        mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells);
    +        mask_ptr[lev]->setVal(-1, m_num_neighbor_cells);
     
     #ifdef AMREX_USE_OMP
     #pragma omp parallel
     #endif
    -            for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector());
    -                 mfi.isValid(); ++mfi) {
    -                const Box& box = mfi.tilebox();
    -                const int grid_id = mfi.index();
    -                const int tile_id = mfi.LocalTileIndex();
    -                (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid,  1);
    -                (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile,  1);
    -                (*mask_ptr[lev])[mfi].template setVal(lev    , box, MaskComps::level, 1);
    -            }
    -
    -            mask_ptr[lev]->FillBoundary(geom.periodicity());
    +        for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector());
    +             mfi.isValid(); ++mfi) {
    +            const Box& box = mfi.tilebox();
    +            const int grid_id = mfi.index();
    +            const int tile_id = mfi.LocalTileIndex();
    +            (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid,  1);
    +            (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile,  1);
    +            (*mask_ptr[lev])[mfi].template setVal(lev    , box, MaskComps::level, 1);
             }
    +
    +        mask_ptr[lev]->FillBoundary(geom.periodicity());
         }
     }
     
    
    From cbdc6580ee3d78cccdd37172e4ba077ee181f483 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 5 Jul 2022 01:41:03 +0200
    Subject: [PATCH 019/111] SENSEI 4.0: Fix Build for Particles (#2869)
    
    ## Summary
    
    This part causes a compile error now in WarpX.
    
    cc  @burlen @kwryankrattiger
    
    ## Additional background
    
    X-ref: Blocks WarpX 22.07 release https://github.com/ECP-WarpX/WarpX/pull/3211
    
    Follow-up to:
    - #2785
    - #2834
    ---
     Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H
    index bede5908cdc..9208c8a753b 100644
    --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H
    +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H
    @@ -86,7 +86,7 @@ int AmrMeshParticleInSituBridge::update(
     
         data_adaptor->SetDataTime(time);
         data_adaptor->SetDataTimeStep(step);
    -    ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1;
    +    ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1;
         data_adaptor->ReleaseData();
         data_adaptor->Delete();
     
    
    From 557aae84902f63a84edc8b49831ee66af7d1a46a Mon Sep 17 00:00:00 2001
    From: Erik 
    Date: Wed, 6 Jul 2022 08:54:24 -0700
    Subject: [PATCH 020/111] point to new location of AMReX images, AMReX website
     repo (#2867)
    
    ---
     README.md | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/README.md b/README.md
    index 72c182470e1..da3a1abcbd2 100644
    --- a/README.md
    +++ b/README.md
    @@ -1,5 +1,5 @@
     
    -AMReX Logo +AMReX Logo

    @@ -71,7 +71,7 @@ in a wide variety of other scientific simulations, some of which, can be seen in our application [gallery](https://amrex-codes.github.io/amrex/gallery.html).

    ## Get Help From c849dd1994388cebd78a6a1624e80bc3ab640970 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 8 Jul 2022 08:06:37 -0700 Subject: [PATCH 021/111] New EB optimization parameter: eb2.num_coarsen_opt (#2872) At the beginning of EB generation, we chop the entire finest domain into boxes and find out the type of the boxes. We then collect the completely covered boxes and cut boxes into two BoxArrays. This process can be costly because of the number of calls to the implicit functions. In this commit, we have introduced a new ParmParse parameter, eb2.num_coarsen_opt with a default value of zero. If for instance it is set to 3, we start the box type categorization at a resolution that is coarsened by a factor of 2^3. For the provisional cut boxes, we refine them by a factor of 2, Then we chop them into small boxes and categorize the new boxes. This process is performed recursively until we are at the original finest resolution. The users should be aware that, if eb2.num_coaren_opt is too big, this could produce in erroneous results because evaluating the implicit function on coarse boxes could miss fine structures in the EB. Thank Robert Marskar for sharing this algorithm. --- Src/Base/AMReX_Box.cpp | 4 +- Src/EB/AMReX_EB2.H | 13 ++- Src/EB/AMReX_EB2.cpp | 34 +++++-- Src/EB/AMReX_EB2_IndexSpaceI.H | 8 +- Src/EB/AMReX_EB2_IndexSpace_STL.H | 2 +- Src/EB/AMReX_EB2_IndexSpace_STL.cpp | 6 +- Src/EB/AMReX_EB2_Level.H | 114 ++++++++++++++--------- Src/EB/AMReX_EB2_Level_STL.H | 2 +- Src/EB/AMReX_EB2_Level_STL.cpp | 4 +- Tests/LinearSolvers/CellEB2/inputs.rt.2d | 1 + Tests/LinearSolvers/CellEB2/inputs.rt.3d | 1 + 11 files changed, 121 insertions(+), 68 deletions(-) diff --git a/Src/Base/AMReX_Box.cpp b/Src/Base/AMReX_Box.cpp index f93818e784d..e61942c2a48 100644 --- a/Src/Base/AMReX_Box.cpp +++ b/Src/Base/AMReX_Box.cpp @@ -126,7 +126,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; @@ -161,7 +161,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index ad56d532520..ff897276510 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -66,7 +66,7 @@ public: IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face); + bool extend_domain_face, int num_coarsen_opt); IndexSpaceImp (IndexSpaceImp const&) = delete; IndexSpaceImp (IndexSpaceImp &&) = delete; @@ -95,27 +95,32 @@ private: #include bool ExtendDomainFace (); +int NumCoarsenOpt (); template void Build (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, bool build_coarse_level_by_coarsening = true, - bool extend_domain_face = ExtendDomainFace()) + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()) { BL_PROFILE("EB2::Initialize()"); IndexSpace::push(new IndexSpaceImp(gshop, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + extend_domain_face, + num_coarsen_opt)); } void Build (const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, - bool build_coarse_level_by_coarsening = true); + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()); int maxCoarseningLevel (const Geometry& geom); int maxCoarseningLevel (IndexSpace const* ebis, const Geometry& geom); diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 3bdf44ee4e9..16f683cb019 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -21,12 +21,14 @@ AMREX_EXPORT Vector > IndexSpace::m_instance; AMREX_EXPORT int max_grid_size = 64; AMREX_EXPORT bool extend_domain_face = true; +AMREX_EXPORT int num_coarsen_opt = 0; void Initialize () { ParmParse pp("eb2"); pp.queryAdd("max_grid_size", max_grid_size); pp.queryAdd("extend_domain_face", extend_domain_face); + pp.queryAdd("num_coarsen_opt", num_coarsen_opt); amrex::ExecOnFinalize(Finalize); } @@ -41,6 +43,11 @@ bool ExtendDomainFace () return extend_domain_face; } +int NumCoarsenOpt () +{ + return num_coarsen_opt; +} + void IndexSpace::push (IndexSpace* ispace) { @@ -74,7 +81,8 @@ const IndexSpace* TopIndexSpaceIfPresent() noexcept { void Build (const Geometry& geom, int required_coarsening_level, - int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening) + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face, int a_num_coarsen_opt) { ParmParse pp("eb2"); std::string geom_type; @@ -85,7 +93,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::AllRegularIF rif; EB2::GeometryShop gshop(rif); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "box") { @@ -102,7 +111,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(bf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "cylinder") { @@ -127,7 +137,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(cf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "plane") { @@ -141,7 +152,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(pf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "sphere") { @@ -158,7 +170,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "torus") { @@ -177,7 +190,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "parser") { @@ -188,7 +202,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::ParserIF pif(parser.compile<3>()); EB2::GeometryShop gshop(pif,parser); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "stl") { @@ -206,7 +221,8 @@ Build (const Geometry& geom, int required_coarsening_level, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + a_extend_domain_face, + a_num_coarsen_opt)); } else { diff --git a/Src/EB/AMReX_EB2_IndexSpaceI.H b/Src/EB/AMReX_EB2_IndexSpaceI.H index 192df9f43a0..cdad6b31f6f 100644 --- a/Src/EB/AMReX_EB2_IndexSpaceI.H +++ b/Src/EB/AMReX_EB2_IndexSpaceI.H @@ -4,7 +4,7 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face) + bool extend_domain_face, int num_coarsen_opt) { // build finest level (i.e., level 0) first AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); @@ -20,7 +20,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(geom.Domain()); m_ngrow.push_back(ngrow_finest); m_gslevel.reserve(max_coarsening_level+1); - m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face); + m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face, + num_coarsen_opt); for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) { @@ -44,7 +45,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, if (build_coarse_level_by_coarsening) { amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); } else { - m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face); + m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face, + num_coarsen_opt-ilev); } } else { break; diff --git a/Src/EB/AMReX_EB2_IndexSpace_STL.H b/Src/EB/AMReX_EB2_IndexSpace_STL.H index 83edab1f9e4..4c4bb61ca35 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_STL.H +++ b/Src/EB/AMReX_EB2_IndexSpace_STL.H @@ -19,7 +19,7 @@ public: const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face); + bool extend_domain_face, int num_coarsen_opt); IndexSpaceSTL (IndexSpaceSTL const&) = delete; IndexSpaceSTL (IndexSpaceSTL &&) = delete; diff --git a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp index 00fc12a7879..2c2c77e4c0e 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp +++ b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp @@ -7,7 +7,7 @@ IndexSpaceSTL::IndexSpaceSTL (const std::string& stl_file, Real stl_scale, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face) + bool extend_domain_face, int num_coarsen_opt) { Gpu::LaunchSafeGuard lsg(true); // Always use GPU @@ -29,7 +29,7 @@ IndexSpaceSTL::IndexSpaceSTL (const std::string& stl_file, Real stl_scale, m_ngrow.push_back(ngrow_finest); m_stllevel.reserve(max_coarsening_level+1); m_stllevel.emplace_back(this, stl_tools, geom, EB2::max_grid_size, ngrow_finest, - extend_domain_face); + extend_domain_face, num_coarsen_opt); for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) { @@ -54,7 +54,7 @@ IndexSpaceSTL::IndexSpaceSTL (const std::string& stl_file, Real stl_scale, amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); } else { m_stllevel.emplace_back(this, stl_tools, cgeom, EB2::max_grid_size, ng, - extend_domain_face); + extend_domain_face, num_coarsen_opt-ilev); } } else { break; diff --git a/Src/EB/AMReX_EB2_Level.H b/Src/EB/AMReX_EB2_Level.H index d47917328c5..c42ff2ad5bc 100644 --- a/Src/EB/AMReX_EB2_Level.H +++ b/Src/EB/AMReX_EB2_Level.H @@ -98,12 +98,13 @@ class GShopLevel : public Level { public: - GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, int ngrow, bool extend_domain_face); + GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, + int ngrow, bool extend_domain_face, int num_crse_opt); GShopLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, GShopLevel& fineLevel); GShopLevel (IndexSpace const* is, const Geometry& geom); void define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); }; template @@ -113,7 +114,7 @@ GShopLevel::GShopLevel (IndexSpace const* is, const Geometry& geom) template GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : Level(is, geom) { if (std::is_same::value) { @@ -122,13 +123,13 @@ GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& return; } - define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } template void GShopLevel::define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) { if (amrex::Verbose() > 0 && extend_domain_face == false) { amrex::Print() << "AMReX WARNING: extend_domain_face=false is not recommended!\n"; @@ -166,57 +167,84 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, Box bounding_box = (extend_domain_face) ? domain : domain_grown; bounding_box.surroundingNodes(); - BoxList bl(domain); - bl.maxSize(max_grid_size); - if (m_ngrow != 0) { - const IntVect& domlo = domain.smallEnd(); - const IntVect& domhi = domain.bigEnd(); - for (auto& b : bl) { - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - if (m_ngrow[idim] != 0) { - if (b.smallEnd(idim) == domlo[idim]) { - b.growLo(idim,m_ngrow[idim]); - } - if (b.bigEnd(idim) == domhi[idim]) { - b.growHi(idim,m_ngrow[idim]); - } + BoxList cut_boxes; + BoxList covered_boxes; + + const int nprocs = ParallelDescriptor::NProcs(); + const int iproc = ParallelDescriptor::MyProc(); + + num_crse_opt = std::max(0,std::min(8,num_crse_opt)); + for (int clev = num_crse_opt; clev >= 0; --clev) { + IntVect crse_ratio(1 << clev); + if (domain.coarsenable(crse_ratio)) { + Box const& crse_bounding_box = amrex::coarsen(bounding_box, crse_ratio); + Geometry const& crse_geom = amrex::coarsen(geom, crse_ratio); + BoxList test_boxes; + if (cut_boxes.isEmpty()) { + covered_boxes.clear(); + test_boxes = BoxList(crse_geom.Domain()); + test_boxes.maxSize(max_grid_size); + } else { + test_boxes.swap(cut_boxes); + test_boxes.coarsen(crse_ratio); + test_boxes.maxSize(max_grid_size); + } + + const Long nboxes = test_boxes.size(); + const auto& boxes = test_boxes.data(); + for (Long i = iproc; i < nboxes; i += nprocs) { + const Box& vbx = boxes[i]; + const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); + auto box_type = gshop.getBoxType(gbx&crse_bounding_box,crse_geom,RunOn::Gpu); + if (box_type == gshop.allcovered) { + covered_boxes.push_back(amrex::refine(vbx, crse_ratio)); + } else if (box_type == gshop.mixedcells) { + cut_boxes.push_back(amrex::refine(vbx, crse_ratio)); } } + + amrex::AllGatherBoxes(cut_boxes.data()); } } - m_grids.define(std::move(bl)); - m_dmap.define(m_grids); - - Vector cut_boxes; - Vector covered_boxes; + amrex::AllGatherBoxes(covered_boxes.data()); - for (MFIter mfi(m_grids, m_dmap); mfi.isValid(); ++mfi) - { - const Box& vbx = mfi.validbox(); - const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); - int box_type = gshop.getBoxType(gbx & bounding_box, geom, RunOn::Gpu); - if (box_type == gshop.allcovered) { - covered_boxes.push_back(vbx); - } else if (box_type == gshop.mixedcells) { - cut_boxes.push_back(vbx); - } + if (m_ngrow != 0) { + auto grow_at_domain_boundary = [&] (BoxList& bl) + { + const IntVect& domlo = domain.smallEnd(); + const IntVect& domhi = domain.bigEnd(); + for (auto& b : bl) { + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (m_ngrow[idim] != 0) { + if (b.smallEnd(idim) == domlo[idim]) { + b.growLo(idim,m_ngrow[idim]); + } + if (b.bigEnd(idim) == domhi[idim]) { + b.growHi(idim,m_ngrow[idim]); + } + } + } + } + }; + grow_at_domain_boundary(covered_boxes); + grow_at_domain_boundary(cut_boxes); } - amrex::AllGatherBoxes(cut_boxes); - amrex::AllGatherBoxes(covered_boxes); - - if ( cut_boxes.empty() && - !covered_boxes.empty()) + if ( cut_boxes.isEmpty() && + !covered_boxes.isEmpty()) { amrex::Abort("AMReX_EB2_Level.H: Domain is completely covered"); } - if (!covered_boxes.empty()) { - m_covered_grids = BoxArray(BoxList(std::move(covered_boxes))); + if (!covered_boxes.isEmpty()) { + if (num_crse_opt > 2) { // don't want the box too big + covered_boxes.maxSize(max_grid_size*4); + } + m_covered_grids = BoxArray(std::move(covered_boxes)); } - if (cut_boxes.empty()) { + if (cut_boxes.isEmpty()) { m_grids = BoxArray(); m_dmap = DistributionMapping(); m_allregular = true; @@ -224,7 +252,7 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, return; } - m_grids = BoxArray(BoxList(std::move(cut_boxes))); + m_grids = BoxArray(std::move(cut_boxes)); m_dmap = DistributionMapping(m_grids); m_mgf.define(m_grids, m_dmap); diff --git a/Src/EB/AMReX_EB2_Level_STL.H b/Src/EB/AMReX_EB2_Level_STL.H index f29460d7a92..19cb31ef93b 100644 --- a/Src/EB/AMReX_EB2_Level_STL.H +++ b/Src/EB/AMReX_EB2_Level_STL.H @@ -13,7 +13,7 @@ class STLLevel public: STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, STLLevel& fineLevel); diff --git a/Src/EB/AMReX_EB2_Level_STL.cpp b/Src/EB/AMReX_EB2_Level_STL.cpp index 00f29958714..53243cd754a 100644 --- a/Src/EB/AMReX_EB2_Level_STL.cpp +++ b/Src/EB/AMReX_EB2_Level_STL.cpp @@ -3,12 +3,12 @@ namespace amrex { namespace EB2 { STLLevel::STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : GShopLevel(is, geom) { BL_PROFILE("EB2::STLLevel()-fine"); - define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } STLLevel::STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.2d b/Tests/LinearSolvers/CellEB2/inputs.rt.2d index 8dfd8a7bb3f..4afdf526259 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.2d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.2d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.3d b/Tests/LinearSolvers/CellEB2/inputs.rt.3d index 9a8037a68c0..64fcef6281b 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.3d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.3d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 From 7660c885d46779367344adf88af75e630a0bc77a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 8 Jul 2022 08:48:14 -0700 Subject: [PATCH 022/111] Allow zero components MultiFab and BaseFab (#2873) This is useful for particle I/O that does not have any mesh data. yt needs a header file associated with a MultiFab. --- Src/Base/AMReX_BaseFab.H | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 3a9f5eea018..52b3b16a615 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -350,10 +350,22 @@ public: * order, with the component index coming last. In other words, * dataPtr returns a pointer to all the Nth components. */ - T* dataPtr (int n = 0) noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + T* dataPtr (int n = 0) noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } //! Same as above except works on const FABs. - const T* dataPtr (int n = 0) const noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + const T* dataPtr (int n = 0) const noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } T* dataPtr (const IntVect& iv, int n = 0) noexcept; @@ -1882,9 +1894,9 @@ BaseFab::define () { AMREX_ASSERT(this->dptr == 0); AMREX_ASSERT(this->domain.numPts() > 0); - AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); AMREX_ASSERT(this->nvar >= 0); if (this->nvar == 0) return; + AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); this->truesize = this->nvar*this->domain.numPts(); this->ptr_owner = true; From a633d2bff1db1a3335efd077a34b6a8dcfb4e793 Mon Sep 17 00:00:00 2001 From: Luca Fedeli Date: Fri, 8 Jul 2022 20:34:18 +0200 Subject: [PATCH 023/111] Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874) We have observed some MPI issues at very large scale when WarpX is compiled using Fujitsu MPI (i.e., with the Fujitsu compiler). These issues seem to be related to the use of MPI Gatherv with MPI_Datatype. This PR implements a possible workaround, initially proposed by @WeiqunZhang . The idea is that, when WarpX is compiled with the Fujitsu compiler, simpler integer arrays instead of MPI_Datatype are used in the routine where the issue was observed. --- Src/AmrCore/AMReX_TagBox.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index 6a989ffbbf1..668f1d18530 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -649,7 +649,24 @@ TagBoxArray::collate (Gpu::PinnedVector& TheGlobalCollateSpace) const // const IntVect* psend = (count > 0) ? TheLocalCollateSpace.data() : nullptr; IntVect* precv = TheGlobalCollateSpace.data(); + + //Issues have been observed with the following call at very large scale when using + //FujitsuMPI. The issue seems to be related to the use of MPI_Datatype. We can + //bypasses the issue by exchanging simpler integer arrays. +#ifndef __FUJITSU ParallelDescriptor::Gatherv(psend, count, precv, countvec, offset, IOProcNumber); +#else + const int* psend_int = psend->begin(); + int* precv_int = precv->begin(); + Long count_int = count * AMREX_SPACEDIM; + auto countvec_int = std::vector(countvec.size()); + auto offset_int = std::vector(offset.size()); + const auto mul_funct = [](const auto el){return el*AMREX_SPACEDIM;}; + std::transform(countvec.begin(), countvec.end(), countvec_int.begin(), mul_funct); + std::transform(offset.begin(), offset.end(), offset_int.begin(), mul_funct); + ParallelDescriptor::Gatherv( + psend_int, count_int, precv_int, countvec_int, offset_int, IOProcNumber); +#endif #else TheGlobalCollateSpace = std::move(TheLocalCollateSpace); From 40b3d2176b17785191050482a2ead5539993fac6 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 13 Jul 2022 13:24:15 -0700 Subject: [PATCH 024/111] Add extra braces in initialization of GpuArray (#2876) It should not be needed since C++14. But some compilers seem to need the double braces. --- Src/Base/AMReX_TableData.H | 12 ++++++------ Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index e44758bde6d..f44157160a7 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -77,8 +77,8 @@ struct Table2D { T* AMREX_RESTRICT p = nullptr; Long jstride = 0; - GpuArray begin{1,1}; - GpuArray end{0,0}; + GpuArray begin{{1,1}}; + GpuArray end{{0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table2D () noexcept {} @@ -142,8 +142,8 @@ struct Table3D T* AMREX_RESTRICT p = nullptr; Long jstride = 0; Long kstride = 0; - GpuArray begin{1,1,1}; - GpuArray end{0,0,0}; + GpuArray begin{{1,1,1}}; + GpuArray end{{0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table3D () noexcept {} @@ -213,8 +213,8 @@ struct Table4D Long jstride = 0; Long kstride = 0; Long nstride = 0; - GpuArray begin{1,1,1,1}; - GpuArray end{0,0,0,0}; + GpuArray begin{{1,1,1,1}}; + GpuArray end{{0,0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table4D () noexcept {} diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 1215eda1f6c..b63aaa8d001 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -119,7 +119,7 @@ public: #endif private: - GpuArray m_sigma{AMREX_D_DECL(1_rt,1_rt,1_rt)}; + GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; Real m_s_phi_eb = std::numeric_limits::lowest(); Vector m_phi_eb; int m_rz = false; From 73dbf2f909bdc6c497eb5245b4e707b4814e699f Mon Sep 17 00:00:00 2001 From: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:53:35 -0400 Subject: [PATCH 025/111] Fix the segmentation fault in selecting actual neighbor particles. (#2877) --- Src/Particle/AMReX_NeighborParticlesGPUImpl.H | 2 +- Src/Particle/AMReX_NeighborParticlesI.H | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H index 6e112318757..81bef1302e2 100644 --- a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H @@ -121,7 +121,7 @@ buildNeighborCopyOp (bool use_boundary_neighbor) { BL_PROFILE("NeighborParticleContainer::buildNeighborCopyOp()"); - AMREX_ASSERT(hasNeighbors() == false); + AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor); const int lev = 0; const auto& geom = this->Geom(lev); diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index fc5788145b7..e0c4c066a75 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -857,8 +857,8 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) auto pperm = bins.permutationPtr(); auto poffset = bins.offsetsPtr(); - unsigned int np_boundary = 0; - unsigned int* p_np_boundary = &np_boundary; + Gpu::Buffer np_boundary({0}); + unsigned int* p_np_boundary = np_boundary.data(); constexpr unsigned int max_unsigned_int = std::numeric_limits::max(); AMREX_FOR_1D ( np_real, i, @@ -899,9 +899,9 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) } } });// end amrex_for_1d - Gpu::streamSynchronize(); - m_boundary_particle_ids[lev][index].resize(np_boundary); + unsigned int* p_np_boundary_h = np_boundary.copyToHost(); + m_boundary_particle_ids[lev][index].resize(*p_np_boundary_h); }// end mypariter }// end lev From b673d81723c5585a1290126233d38f50833939d7 Mon Sep 17 00:00:00 2001 From: Max Katz Date: Mon, 18 Jul 2022 15:14:19 -0400 Subject: [PATCH 026/111] Add option to derefine to AMRErrorTag (#2875) This allows a refinement field to specify *derefinement* (by setting a zone's tagging value to the clear value). --- Src/AmrCore/AMReX_ErrorList.H | 5 +++++ Src/AmrCore/AMReX_ErrorList.cpp | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H index 90f49b02749..8cf67ea5567 100644 --- a/Src/AmrCore/AMReX_ErrorList.H +++ b/Src/AmrCore/AMReX_ErrorList.H @@ -383,6 +383,7 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); Real m_min_time = std::numeric_limits::lowest(); Real m_max_time = std::numeric_limits::max(); int m_volume_weighting = 0; + int m_derefine = 0; RealBox m_realbox; AMRErrorTagInfo& SetMaxLevel (int max_level) noexcept { @@ -405,6 +406,10 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); m_volume_weighting = volume_weighting; return *this; } + AMRErrorTagInfo& SetDerefine (int derefine) noexcept { + m_derefine = derefine; + return *this; + } }; class AMRErrorTag diff --git a/Src/AmrCore/AMReX_ErrorList.cpp b/Src/AmrCore/AMReX_ErrorList.cpp index 1594ba740a9..7f37324123a 100644 --- a/Src/AmrCore/AMReX_ErrorList.cpp +++ b/Src/AmrCore/AMReX_ErrorList.cpp @@ -293,6 +293,11 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto threshold = m_value[level]; auto const volume_weighting = m_info.m_volume_weighting; auto geomdata = geom.data(); + auto tag_update = tagval; + if (m_info.m_derefine) { + tag_update = clearval; + } + if (m_test == GRAD) { ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept @@ -301,7 +306,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold) { tagma[bi](i,j,k) = tag_update;} #else auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); @@ -310,7 +315,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); #endif if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } #endif }); @@ -323,7 +328,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tag_update;} #else auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); @@ -333,7 +338,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, #endif if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold * amrex::Math::abs(dat(i,j,k))) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } #endif }); @@ -344,7 +349,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; if (datma[bi](i,j,k) * vol <= threshold) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } }); } @@ -354,7 +359,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; if (datma[bi](i,j,k) * vol >= threshold) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } }); } @@ -364,7 +369,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { if (datma[bi](i,j,k) >= fac) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } }); } From 8e40952af9ab0600174f491c81100132f9b24c6e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 20 Jul 2022 12:10:26 -0700 Subject: [PATCH 027/111] Add Frontier to GNU Make (#2879) --- Tools/GNUMake/Make.machines | 5 +++++ Tools/GNUMake/sites/Make.olcf | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Tools/GNUMake/Make.machines b/Tools/GNUMake/Make.machines index 6903ba05125..d68e594d80f 100644 --- a/Tools/GNUMake/Make.machines +++ b/Tools/GNUMake/Make.machines @@ -60,6 +60,11 @@ ifdef OLCF_ROCM_ROOT which_site := olcf which_computer := crusher endif + + ifeq ($(findstring frontier, $(host_name)), frontier) + which_site := olcf + which_computer := frontier + endif endif ifeq ($(findstring theta, $(host_name)), theta) diff --git a/Tools/GNUMake/sites/Make.olcf b/Tools/GNUMake/sites/Make.olcf index fcccfc8de08..69f557786df 100644 --- a/Tools/GNUMake/sites/Make.olcf +++ b/Tools/GNUMake/sites/Make.olcf @@ -2,7 +2,7 @@ # For Summit et al. at OLCF # -OLCF_MACHINES := summit ascent spock crusher +OLCF_MACHINES := summit ascent spock crusher frontier ifneq ($(which_computer), $(findstring $(which_computer), $(OLCF_MACHINES))) $(error Unknown OLCF computer, $(which_computer)) @@ -84,3 +84,23 @@ ifeq ($(which_computer),crusher) endif endif endif + +ifeq ($(which_computer),frontier) + ifeq ($(USE_HIP),TRUE) + # MI250X + AMD_ARCH=gfx90a + endif + + ifeq ($(USE_MPI),TRUE) + includes += $(shell CC --cray-print-opts=cflags) + ifneq ($(BL_NO_FORT),TRUE) + LIBRARIES += $(shell ftn --cray-print-opts=libs) + else + LIBRARIES += $(shell CC --cray-print-opts=libs) + endif + # for gpu aware mpi + ifeq ($(USE_HIP),TRUE) + LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa + endif + endif +endif From 4b7e20057a3dff84beae21812d826d24e19f2109 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 21 Jul 2022 10:25:57 -0700 Subject: [PATCH 028/111] HIP: Remove the call to hipDeviceSetSharedMemConfig (#2884) AMD devices do not support shared cache banking. Thanks @afanfa for reporting this. (#2883) --- Src/Base/AMReX_GpuDevice.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 8d42363f0a7..c0e9b3e6785 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -397,11 +397,7 @@ Device::initialize_gpu () // check compute capability - if (sizeof(Real) == 8) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeEightByte)); - } else if (sizeof(Real) == 4) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeFourByte)); - } + // AMD devices do not support shared cache banking. AMREX_HIP_SAFE_CALL(hipStreamCreate(&gpu_default_stream)); for (int i = 0; i < max_gpu_streams; ++i) { From 7cf77dc60e149ebe822f6b5428556f9208e150fa Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 26 Jul 2022 11:01:21 -0700 Subject: [PATCH 029/111] MinLoc and MaxLoc Support (#2885) Add struct ValLocPair that can be used by ReduceOps/ReduceData and ParReduce to find the location of the min/max value. Add warp shuffle down function for more general types. This is needed for MinLoc/MaxLoc with CUDA < 11, because we don't use CUB for earlier versions of CUDA. The Intel GPU support is not done yet. We need to allocate enough shared local memory when the size of ValLocPair is larger than the size of unsigned long long. --- Src/Base/AMReX_GpuReduce.H | 44 ++++++++++++++++++++++++++++++++++++-- Src/Base/AMReX_Reduce.H | 40 ++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/Src/Base/AMReX_GpuReduce.H b/Src/Base/AMReX_GpuReduce.H index 9b48138940c..3907ca385f6 100644 --- a/Src/Base/AMReX_GpuReduce.H +++ b/Src/Base/AMReX_GpuReduce.H @@ -8,6 +8,7 @@ #include #include #include +#include #if !defined(AMREX_USE_CUB) && defined(AMREX_USE_CUDA) && defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 11) #define AMREX_USE_CUB 1 @@ -249,15 +250,54 @@ void deviceReduceLogicalOr (int * dest, int source, Gpu::Handler const& h) noexc #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +namespace detail { + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T shuffle_down (T x, int offset) noexcept +{ + return AMREX_HIP_OR_CUDA(__shfl_down(x, offset), + __shfl_down_sync(0xffffffff, x, offset)); +} + +// If other sizeof is needed, we can implement it later. +template = 0> +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T multi_shuffle_down (T x, int offset) noexcept +{ + constexpr int nwords = (sizeof(T) + sizeof(unsigned int) - 1) / sizeof(unsigned int); + T y; + auto py = reinterpret_cast(&y); + auto px = reinterpret_cast(&x); + for (int i = 0; i < nwords; ++i) { + py[i] = shuffle_down(px[i],offset); + } + return y; +} + +} + template struct warpReduce { + // Not all arithmetic types can be taken by shuffle_down, but it's good enough. + template ::value,int> = 0> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T operator() (T x) const noexcept + { + for (int offset = warpSize/2; offset > 0; offset /= 2) { + T y = detail::shuffle_down(x, offset); + x = F()(x,y); + } + return x; + } + + template ::value,int> = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator() (T x) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - AMREX_HIP_OR_CUDA(T y = __shfl_down(x, offset);, - T y = __shfl_down_sync(0xffffffff, x, offset); ) + T y = detail::multi_shuffle_down(x, offset); x = F()(x,y); } return x; diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 9c07b7b4a2a..05b56b97fa9 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -9,9 +9,35 @@ #include #include +#include namespace amrex { +template +struct ValLocPair +{ + TV value; + TI index; + + static constexpr ValLocPair max () { + return ValLocPair{std::numeric_limits::max(), TI()}; + } + + static constexpr ValLocPair lowest () { + return ValLocPair{std::numeric_limits::lowest(), TI()}; + } + + friend constexpr bool operator< (ValLocPair const& a, ValLocPair const& b) + { + return a.value < b.value; + } + + friend constexpr bool operator> (ValLocPair const& a, ValLocPair const& b) + { + return a.value > b.value; + } +}; + namespace Reduce { namespace detail { #ifdef AMREX_USE_GPU @@ -133,7 +159,12 @@ struct ReduceOpMin void local_update (T& d, T const& s) const noexcept { d = amrex::min(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::max(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::max(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::max(); } }; struct ReduceOpMax @@ -161,7 +192,12 @@ struct ReduceOpMax void local_update (T& d, T const& s) const noexcept { d = amrex::max(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::lowest(); } }; struct ReduceOpLogicalAnd From 06753e60aca7d063b28be93379c948e92afb8c5e Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 26 Jul 2022 12:54:35 -0700 Subject: [PATCH 030/111] `TagBoxArray::collate`: Fujitsu Clang (#2889) `mpiFCC -Nclang` only defines `__CLANG_FUJITSU`, not `__FUJITSU` as in the classic compiler mode. --- Src/AmrCore/AMReX_TagBox.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index 668f1d18530..9654e1cac7a 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -650,10 +650,10 @@ TagBoxArray::collate (Gpu::PinnedVector& TheGlobalCollateSpace) const const IntVect* psend = (count > 0) ? TheLocalCollateSpace.data() : nullptr; IntVect* precv = TheGlobalCollateSpace.data(); - //Issues have been observed with the following call at very large scale when using - //FujitsuMPI. The issue seems to be related to the use of MPI_Datatype. We can - //bypasses the issue by exchanging simpler integer arrays. -#ifndef __FUJITSU + // Issues have been observed with the following call at very large scale when using + // FujitsuMPI. The issue seems to be related to the use of MPI_Datatype. We can + // bypasses the issue by exchanging simpler integer arrays. +#if !(defined(__FUJITSU) || defined(__CLANG_FUJITSU)) ParallelDescriptor::Gatherv(psend, count, precv, countvec, offset, IOProcNumber); #else const int* psend_int = psend->begin(); From ce0fb7412dff3ceeec00941ba525e7ecf5ce8015 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Tue, 26 Jul 2022 16:20:38 -0700 Subject: [PATCH 031/111] Fix host / device sync bug in PODVector (#2890) --- Src/Base/AMReX_PODVector.H | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 7217b4e814e..bfae2c01627 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -608,7 +608,10 @@ namespace amrex void AllocateBuffer (size_type a_capacity) noexcept { pointer new_data = allocate(a_capacity); - if (m_data) detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + if (m_data) { + detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + amrex::Gpu::streamSynchronize(); + } deallocate(m_data, capacity()); m_data = new_data; m_capacity = a_capacity; @@ -621,9 +624,10 @@ namespace amrex pointer new_data = allocate(a_capacity); if (m_data) { - memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); + memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); memCopyImpl(new_data + a_index + a_count, m_data + a_index, (size() - a_index)*sizeof(T), *this); + amrex::Gpu::streamSynchronize(); } deallocate(m_data, capacity()); m_data = new_data; From 51542c85ac18642a2cfb69ea3df3cf544d3d6f42 Mon Sep 17 00:00:00 2001 From: philip-blakely <46958218+philip-blakely@users.noreply.github.com> Date: Wed, 27 Jul 2022 17:29:26 +0100 Subject: [PATCH 032/111] Multi-materials and derived variable output (#2888) ## Summary Output small plots if only derived variables are specified. Also, make DeriveFuncFab a std::function<> instead of plain function-pointer. ## Additional background We have been implementing small-plots for outputing variables at gauges (e.g. pressure at specific gauge locations). We may want to output the derived variable pressure only, and not all state-variables. The if-condition was incorrect in this case. Further, multi-material simulations require a material index in order to compute derived variables, in addition to existing parameters. Making DeriveFuncFab a std::function is sufficient for our purposes. --- Src/Amr/AMReX_Amr.cpp | 2 +- Src/Amr/AMReX_Derive.H | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/Amr/AMReX_Amr.cpp b/Src/Amr/AMReX_Amr.cpp index 66ec4664c5a..02f0452eac9 100644 --- a/Src/Amr/AMReX_Amr.cpp +++ b/Src/Amr/AMReX_Amr.cpp @@ -910,7 +910,7 @@ Amr::writeSmallPlotFile () // Don't continue if we have no variables to plot. - if (stateSmallPlotVars().size() == 0) { + if (stateSmallPlotVars().size() == 0 && deriveSmallPlotVars().size() == 0) { return; } diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H index 2a7c2e26713..7d5b32d7aa6 100644 --- a/Src/Amr/AMReX_Derive.H +++ b/Src/Amr/AMReX_Derive.H @@ -84,9 +84,9 @@ extern "C" const int* level, const int* grid_no) ; } -typedef void (*DeriveFuncFab) (const amrex::Box& bx, amrex::FArrayBox& derfab, int dcomp, int ncomp, - const amrex::FArrayBox& datafab, const amrex::Geometry& geomdata, - amrex::Real time, const int* bcrec, int level); + typedef std::function DeriveFuncFab; class DescriptorList; From 6a47d89fd12cb06d48e3e0d85eea415274e84a69 Mon Sep 17 00:00:00 2001 From: kngott Date: Wed, 27 Jul 2022 17:03:04 -0700 Subject: [PATCH 033/111] Add Comm Sync to Redistribute (#2891) --- Src/Base/AMReX_FabArrayCommI.H | 4 ++-- Src/Particle/AMReX_ParticleContainerI.H | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_FabArrayCommI.H b/Src/Base/AMReX_FabArrayCommI.H index c894fe0b2c7..3d3fe1743a2 100644 --- a/Src/Base/AMReX_FabArrayCommI.H +++ b/Src/Base/AMReX_FabArrayCommI.H @@ -10,7 +10,7 @@ FabArray::FBEP_nowait (int scomp, int ncomp, const IntVect& nghost, bool enforce_periodicity_only, bool override_sync) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: FB"); BL_PROFILE("FillBoundary_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!fbd, "FillBoundary_nowait() called when comm operation already in progress."); @@ -316,7 +316,7 @@ FabArray::ParallelCopy_nowait (const FabArray& src, const FabArrayBase::CPC * a_cpc, bool to_ghost_cells_only) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC"); BL_PROFILE("FabArray::ParallelCopy_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!pcd, "ParallelCopy_nowait() called when comm operation already in progress."); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index f6f51c572cf..be7763486ab 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1073,6 +1073,8 @@ void ParticleContainer ::Redistribute (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: Redist"); + #ifdef AMREX_USE_GPU if ( Gpu::inLaunchRegion() ) { @@ -1085,6 +1087,8 @@ ParticleContainer #else RedistributeCPU(lev_min, lev_max, nGrow, local, remove_negative); #endif + + BL_PROFILE_SYNC_STOP(); } template Date: Thu, 28 Jul 2022 14:14:19 -0400 Subject: [PATCH 034/111] Let `selectActualNeighbors` return right after starting if there are (#2886) no particles for communication. --- Src/Particle/AMReX_NeighborParticlesI.H | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index e0c4c066a75..bcdaeebdbbf 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -813,6 +813,11 @@ NeighborParticleContainer:: selectActualNeighbors (CheckPair&& check_pair, int num_cells) { BL_PROFILE("NeighborParticleContainer::selectActualNeighbors"); + const auto& geom_fine = this->Geom(0); + const auto& ba_fine = this->ParticleBoxArray(0); + if (ba_fine.size() == 1 && !geom_fine.isAnyPeriodic()) { + return; + } for (int lev = 0; lev < this->numLevels(); ++lev) { From 5a3b3037950937343b7eafd292e5032cb8c7221c Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 1 Aug 2022 09:34:44 -0700 Subject: [PATCH 035/111] Update CHANGES for 22.08 (#2894) --- CHANGES | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGES b/CHANGES index 7982e1ddcb8..9c2657f7114 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,39 @@ +# 22.08 + + -- Let `selectActualNeighbors` return right after starting if there are no + particles for communication. (#2886) + + -- Add Comm Sync to Redistribute (#2891) + + -- Multi-materials and derived variable output (#2888) + + -- Fix host / device sync bug in PODVector (#2890) + + -- MinLoc and MaxLoc Support (#2885) + + -- HIP: Remove the call to hipDeviceSetSharedMemConfig (#2884) + + -- Add Frontier to GNU Make (#2879) + + -- Add option to derefine to AMRErrorTag (#2875) + + -- Fix the segmentation fault in selecting actual neighbor particles. (#2877) + + -- Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874) + `TagBoxArray::collate`: Fujitsu Clang (#2889) + + -- Allow zero components MultiFab and BaseFab (#2873) + + -- New EB optimization parameter: eb2.num_coarsen_opt (#2872) + + -- SENSEI 4.0: Fix Build for Particles (#2869) + + -- Cache the neighbor comm tags for the CPU implementation of fillNeighbors. (#2862) + + -- Remove some hard checks in check_mvmc for 3D (#2864) + + -- Carry over fix for ngbxy.smallEnd typo (#2868) + # 22.07 -- Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859) From 94693291667bd0435819aa09cf28a293da226bf4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 1 Aug 2022 09:43:21 -0700 Subject: [PATCH 036/111] MLMG interface (#2858) These changes are made to support a generic type (i.e., amrex::Any) in MLMG. This is still work in progress. But it should not break any existing codes. --- Src/Base/AMReX_Any.H | 5 +- Src/Base/AMReX_BaseFab.H | 2 +- Src/Base/AMReX_FabArray.H | 2 +- .../MLMG/AMReX_MLABecLaplacian.cpp | 4 +- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 5 + Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp | 7 + Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H | 4 +- .../MLMG/AMReX_MLCellABecLap.cpp | 11 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 27 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp | 451 +++++++- .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 2 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 21 +- Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 137 ++- Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp | 277 ++++- Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H | 486 +++++++++ Src/LinearSolvers/MLMG/AMReX_MLMG.H | 42 +- Src/LinearSolvers/MLMG/AMReX_MLMG.cpp | 979 +++++------------- .../MLMG/AMReX_MLNodeLaplacian.H | 8 +- .../MLMG/AMReX_MLNodeLaplacian.cpp | 29 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H | 38 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp | 156 ++- 21 files changed, 1801 insertions(+), 892 deletions(-) create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H diff --git a/Src/Base/AMReX_Any.H b/Src/Base/AMReX_Any.H index b57aa9a39ef..31c824825a4 100644 --- a/Src/Base/AMReX_Any.H +++ b/Src/Base/AMReX_Any.H @@ -60,15 +60,18 @@ public: private: struct innards_base { virtual const std::type_info& Type () const = 0; + virtual ~innards_base () = default; }; template struct innards : innards_base { - innards(MF && mf) + innards (MF && mf) : m_mf(std::forward(mf)) {} + virtual ~innards () = default; + virtual const std::type_info& Type () const override { return typeid(MF); } diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 52b3b16a615..f0e50ecac48 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -260,7 +260,7 @@ public: */ void clear () noexcept; - // Release ownership of memory + //! Release ownership of memory std::unique_ptr release () noexcept; //! Returns how many bytes used diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index 6eef7caa579..8be30fc8763 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -2848,7 +2848,7 @@ FabArray::SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_ngho FabArray* tmp = new FabArray( boxArray(), DistributionMap(), ncomp, src_nghost, MFInfo(), Factory() ); amrex::Copy(*tmp, *this, scomp, 0, ncomp, src_nghost); - this->setVal(0.0, scomp, ncomp, dst_nghost); + this->setVal(typename FAB::value_type(0), scomp, ncomp, dst_nghost); this->ParallelCopy_nowait(*tmp,0,scomp,ncomp,src_nghost,dst_nghost,period,FabArrayBase::ADD); // All local. Operation complete. diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp index 89dbb268e10..e5a9b0b31af 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp @@ -323,10 +323,10 @@ MLABecLaplacian::applyMetricTermsCoeffs () for (int alev = 0; alev < m_num_amr_levels; ++alev) { const int mglev = 0; - applyMetricTerm(alev, mglev, m_a_coeffs[alev][mglev]); + applyMetricTermToMF(alev, mglev, m_a_coeffs[alev][mglev]); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - applyMetricTerm(alev, mglev, m_b_coeffs[alev][mglev][idim]); + applyMetricTermToMF(alev, mglev, m_b_coeffs[alev][mglev][idim]); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 45464bbeb9c..a33d70b4771 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -40,6 +40,11 @@ public: Real eps_rel, Real eps_abs); + int solve (Any& solnL, + const Any& rhsL, + Real eps_rel, + Real eps_abs); + void setVerbose (int _verbose) { verbose = _verbose; } int getVerbose () const { return verbose; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp index c32b0d6199d..76144e6d42f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp @@ -78,6 +78,13 @@ MLCGSolver::solve (MultiFab& sol, } } +int +MLCGSolver::solve (Any& sol, const Any& rhs, Real eps_rel, Real eps_abs) +{ + AMREX_ASSERT(sol.is()); // xxxxx TODO: MLCGSolver Any + return solve(sol.get(), rhs.get(), eps_rel, eps_abs); +} + int MLCGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 985bc9855b4..8849a2be292 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -59,9 +59,9 @@ public: virtual MultiFab const* getACoeffs (int amrlev, int mglev) const = 0; virtual Array getBCoeffs (int amrlev, int mglev) const = 0; - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const final override; + virtual void applyInhomogNeumannTerm (int amrlev, Any& rhs) const final override; - virtual void applyOverset (int amlev, MultiFab& rhs) const override; + virtual void applyOverset (int amlev, Any& rhs) const override; #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypre (Hypre::Interface hypre_interface) const override; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp index b5580b3c15c..af094d89406 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp @@ -108,7 +108,7 @@ MLCellABecLap::define (const Vector& a_geom, amrlev = 0; for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { MultiFab foo(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 0, MFInfo().SetAlloc(false)); - if (! isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { + if (! amrex::isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { auto osm = std::make_unique(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 1); osm->ParallelCopy(*m_overset_mask[amrlev][mglev]); @@ -193,13 +193,16 @@ MLCellABecLap::getFluxes (const Vector >& a_flux } void -MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const +MLCellABecLap::applyInhomogNeumannTerm (int amrlev, Any& a_rhs) const { bool has_inhomog_neumann = hasInhomogNeumannBC(); bool has_robin = hasRobinBC(); if (!has_inhomog_neumann && !has_robin) return; + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + int ncomp = getNComp(); const int mglev = 0; @@ -414,9 +417,11 @@ MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const } void -MLCellABecLap::applyOverset (int amrlev, MultiFab& rhs) const +MLCellABecLap::applyOverset (int amrlev, Any& a_rhs) const { if (m_overset_mask[amrlev][0]) { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); const int ncomp = getNComp(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index f1168e5c41e..457f7565df3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -109,6 +110,8 @@ public: virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const override; + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, const MultiFab& fine_sol, const MultiFab& fine_rhs) override; @@ -132,9 +135,12 @@ public: virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location loc) const override; - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; + virtual void applyMetricTerm (int amrlev, int mglev, Any& rhs) const final override; virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void prepareForSolve () override; @@ -146,6 +152,18 @@ public: const Array& flux, const FArrayBox& sol, Location loc, const int face_only=0) const = 0; + // This could be turned into template if needed. + void applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const override; + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + struct BCTL { BoundCond type; Real location; @@ -210,12 +228,17 @@ protected: // boundary cell flags for covered, not_covered, outside_domain Vector > > m_maskvals; + Vector > m_norm_fine_mask; + mutable Vector m_fluxreg; private: void defineAuxData (); void defineBC (); + + void computeVolInv () const; + mutable Vector > m_volinv; // used by solvability fix }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp index 8f6921950e7..e4c9cef953f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifndef BL_NO_FORT @@ -9,6 +10,11 @@ namespace amrex { +#ifdef AMREX_SOFT_PERF_COUNTERS +// perf_counters +MLCellLinOp::Counters MLCellLinOp::perf_counters; +#endif + namespace { // Have to put it here due to CUDA extended lambda limitation struct ABCTag { @@ -97,6 +103,7 @@ MLCellLinOp::defineAuxData () m_undrrelxr.resize(m_num_amr_levels); m_maskvals.resize(m_num_amr_levels); m_fluxreg.resize(m_num_amr_levels-1); + m_norm_fine_mask.resize(m_num_amr_levels-1); const int ncomp = getNComp(); @@ -136,6 +143,9 @@ MLCellLinOp::defineAuxData () m_dmap[amrlev+1][0], m_dmap[amrlev][0], m_geom[amrlev+1][0], m_geom[amrlev][0], ratio, amrlev+1, ncomp); + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(m_grids[amrlev][0], m_dmap[amrlev][0], m_grids[amrlev+1][0], + ratio, 1, 0)); } #if (AMREX_SPACEDIM != 3) @@ -530,18 +540,6 @@ MLCellLinOp::solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const M MultiFab::Xpay(resid, Real(-1.0), b, 0, 0, ncomp, 0); } -void -MLCellLinOp::fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata) -{ - BL_PROFILE("MLCellLinOp::fillSolutionBC()"); - if (crse_bcdata != nullptr) { - updateSolBC(amrlev, *crse_bcdata); - } - const int mglev = 0; - applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, - m_bndry_sol[amrlev].get()); -} - void MLCellLinOp::correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, BCMode bc_mode, const MultiFab* crse_bcdata) @@ -1316,7 +1314,20 @@ MLCellLinOp::BndryCondLoc::setLOBndryConds (const Geometry& geom, const Real* dx } void -MLCellLinOp::applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const +MLCellLinOp::applyMetricTerm (int amrlev, int mglev, Any& rhs) const +{ + amrex::ignore_unused(amrlev,mglev,rhs); +#if (AMREX_SPACEDIM != 3) + + if (!m_has_metric_term) return; + + AMREX_ASSERT(rhs.is()); + applyMetricTermToMF(amrlev, mglev, rhs.get()); +#endif +} + +void +MLCellLinOp::applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const { amrex::ignore_unused(amrlev,mglev,rhs); #if (AMREX_SPACEDIM != 3) @@ -1435,9 +1446,417 @@ MLCellLinOp::update () if (MLLinOp::needsUpdate()) MLLinOp::update(); } -#ifdef AMREX_SOFT_PERF_COUNTERS -// perf_counters -MLCellLinOp::Counters MLCellLinOp::perf_counters; +void +MLCellLinOp::computeVolInv () const +{ + if (!m_volinv.empty()) return; + + m_volinv.resize(m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_volinv[amrlev].resize(NMGLevels(amrlev)); + } + + // We don't need to compute for every level + + auto f = [&] (int amrlev, int mglev) { +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + m_volinv[amrlev][mglev] = vfrac.sum(0,true); + } + else +#endif + { + m_volinv[amrlev][mglev] + = Real(1.0 / compactify(Geom(amrlev,mglev).Domain()).d_numPts()); + } + }; + + // amrlev = 0, mglev = 0 + f(0,0); + + int mgbottom = NMGLevels(0)-1; + f(0,mgbottom); + +#ifdef AMREX_USE_EB + Real temp1, temp2; + auto factory = dynamic_cast(Factory(0,0)); + if (factory) + { + ParallelAllReduce::Sum({m_volinv[0][0], m_volinv[0][mgbottom]}, + ParallelContext::CommunicatorSub()); + temp1 = Real(1.0)/m_volinv[0][0]; + temp2 = Real(1.0)/m_volinv[0][mgbottom]; + } + else + { + temp1 = m_volinv[0][0]; + temp2 = m_volinv[0][mgbottom]; + } + m_volinv[0][0] = temp1; + m_volinv[0][mgbottom] = temp2; +#endif +} + +Vector +MLCellLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const +{ + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + + computeVolInv(); + + const int ncomp = getNComp(); + Vector offset(ncomp); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + for (int c = 0; c < ncomp; ++c) { + offset[c] = MultiFab::Dot(rhs, c, vfrac, 0, 1, 0, true) * m_volinv[amrlev][mglev]; + } + } + else +#endif + { + for (int c = 0; c < ncomp; ++c) { + offset[c] = rhs.sum(c,true) * m_volinv[amrlev][mglev]; + } + } + + ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); + + return offset; +} + +Real +MLCellLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + Real norm = 0._rt; +#ifdef AMREX_USE_EB + const int ncomp = getNComp(); + if (! mf.isAllRegular()) { + auto factory = dynamic_cast(Factory(amrlev)); + const MultiFab& vfrac = factory->getVolFrac(); + if (amrlev == finest_level) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = m_norm_fine_mask[amrlev]->const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) #endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = m_norm_fine_mask[amrlev]->const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + } + }); + } + } + } + } else +#endif + { + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + norm = MFNormInf(mf, fine_mask, true); + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLCellLinOp::AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is() && fres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), AMRRefRatio(clev)); +} + +void +MLCellLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& /*nghost*/) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(famrlev)); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& ff = fine.array(mfi); + Array4 const& cc = crse.const_array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (factory) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); + } + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); + } + } + } +} + +void +MLCellLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + const IntVect ng = crse.nGrowVect(); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, ng); + cfine.setVal(0.0); + cfine.ParallelCopy(crse, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); + cmf = & cfine; + } + + bool isEB = fine.hasEBFabFactory(); + ignore_unused(isEB); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(&(fine.Factory())); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const auto& ff = fine.array(mfi); + const auto& cc = cmf->array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (isEB) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { +#if (AMREX_SPACEDIM == 3) + if (hasHiddenDimension()) { + Box const& bx_2d = compactify(bx); + auto const& ff_2d = compactify(ff); + auto const& cc_2d = compactify(cc); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, + { + TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); + }); + } else +#endif + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + } + } + } +} + +void +MLCellLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + int ncomp = getNComp(); + for (int falev = NAMRLevels()-1; falev > 0; --falev) + { +#ifdef AMREX_USE_EB + amrex::EB_average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#else + amrex::average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#endif + } +} + +void +MLCellLinOp::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& offset) const +{ + amrex::ignore_unused(amrlev, mglev); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + + const int ncomp = getNComp(); + for (int c = 0; c < ncomp; ++c) { + rhs.plus(-offset[c], c, 1); + } +#ifdef AMREX_USE_EB + if (rhs.hasEBFabFactory()) { + Vector val(ncomp, 0.0_rt); + amrex::EB_set_covered(rhs, 0, ncomp, val); + } +#endif +} } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index b63aaa8d001..0d294c9da8c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -72,7 +72,7 @@ public: virtual std::unique_ptr > makeFactory (int amrlev, int mglev) const final override; - virtual void scaleRHS (int amrlev, MultiFab& rhs) const final; + virtual void scaleRHS (int amrlev, Any& rhs) const final; #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index cfa7595b515..62a7c3af282 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -318,8 +318,11 @@ MLEBNodeFDLaplacian::prepareForSolve () #ifdef AMREX_USE_EB void -MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const +MLEBNodeFDLaplacian::scaleRHS (int amrlev, Any& a_rhs) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + auto const& dmask = *m_dirichlet_mask[amrlev][0]; auto factory = dynamic_cast(m_factory[amrlev][0].get()); auto const& edgecent = factory->getEdgeCent(); @@ -634,19 +637,19 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& mfi, - Array4 const& gid, - Array4 const& lid, - HypreNodeLap::Int* const ncols, - HypreNodeLap::Int* const cols, - Real* const mat) const +MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& /*mfi*/, + Array4 const& /*gid*/, + Array4 const& /*lid*/, + HypreNodeLap::Int* const /*ncols*/, + HypreNodeLap::Int* const /*cols*/, + Real* const /*mat*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillIJMatrix: todo"); } void -MLEBNodeFDLaplacian::fillRHS (MFIter const& mfi, Array4 const& lid, - Real* const rhs, Array4 const& bfab) const +MLEBNodeFDLaplacian::fillRHS (MFIter const& /*mfi*/, Array4 const& /*lid*/, + Real* const /*rhs*/, Array4 const& /*bfab*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillRHS: todo"); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index f744c96e059..f7096b93778 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -2,6 +2,7 @@ #define AMREX_ML_LINOP_H_ #include +#include #include #include #include @@ -177,10 +178,10 @@ public: * inhomogeneous Neumann BC, the value in leveldata is assumed to be * `d./dx`. */ - virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a = nullptr, - const MultiFab* robinbc_b = nullptr, - const MultiFab* robinbc_f = nullptr) = 0; + virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, + const MultiFab* /*robinbc_a*/ = nullptr, + const MultiFab* /*robinbc_b*/ = nullptr, + const MultiFab* /*robinbc_f*/ = nullptr) {} void setVerbose (int v) noexcept { verbose = v; } @@ -197,52 +198,51 @@ public: virtual bool needsUpdate () const { return false; } virtual void update () {} - virtual void restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const = 0; - virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const = 0; - virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, - const MultiFab& fine_sol, const MultiFab& fine_rhs) = 0; + virtual void restriction (int /*amrlev*/, int /*cmglev*/, MultiFab& /*crse*/, MultiFab& /*fine*/) const {} + virtual void interpolation (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, const MultiFab& /*crse*/) const {} + virtual void interpAssign (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, MultiFab& /*crse*/) const {} + virtual void averageDownSolutionRHS (int /*camrlev*/, MultiFab& /*crse_sol*/, MultiFab& /*crse_rhs*/, + const MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, - StateMode s_mode, const MLMGBndry* bndry=nullptr) const = 0; - virtual void smooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, - bool skip_fillboundary=false) const = 0; + virtual void apply (int /*amrlev*/, int /*mglev*/, MultiFab& /*out*/, MultiFab& /*in*/, BCMode /*bc_mode*/, + StateMode /*s_mode*/, const MLMGBndry* /*bndry*/=nullptr) const {} + virtual void smooth (int /*amrlev*/, int /*mglev*/, MultiFab& /*sol*/, const MultiFab& /*rhs*/, + bool /*skip_fillboundary*/=false) const {} // Divide mf by the diagonal component of the operator. Used by bicgstab. virtual void normalize (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - virtual void solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const MultiFab& b, - const MultiFab* crse_bcdata=nullptr) = 0; - virtual void correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, - BCMode bc_mode, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void reflux (int crse_amrlev, - MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, - MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const = 0; - virtual void compFlux (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const = 0; - virtual void compGrad (int amrlev, const Array& grad, - MultiFab& sol, Location loc) const = 0; - - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void unimposeNeumannBC (int /*amrlev*/, MultiFab& /*rhs*/) const {} // only nodal solver might need it - virtual void applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual void applyOverset (int /*amlev*/, MultiFab& /*rhs*/) const {} - virtual void scaleRHS (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual Real getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, MultiFab const& /*rhs*/) const { return 0._rt; } // Only nodal solvers need it - virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/, Real /*offset*/) const {} // Only nodal solvers need it + virtual void solutionResidual (int /*amrlev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + const MultiFab* /*crse_bcdata*/=nullptr) {} + virtual void correctionResidual (int /*amrlev*/, int /*mglev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + BCMode /*bc_mode*/, const MultiFab* /*crse_bcdata*/=nullptr) {} + + virtual void reflux (int /*crse_amrlev*/, + MultiFab& /*res*/, const MultiFab& /*crse_sol*/, const MultiFab& /*crse_rhs*/, + MultiFab& /*fine_res*/, MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) const {} + virtual void compFlux (int /*amrlev*/, const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const {} + virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const {} + + virtual void unimposeNeumannBC (int /*amrlev*/, Any& /*rhs*/) const {} // only nodal solver might need it + virtual void applyInhomogNeumannTerm (int /*amrlev*/, Any& /*rhs*/) const {} + virtual void applyOverset (int /*amlev*/, Any& /*rhs*/) const {} + virtual void scaleRHS (int /*amrlev*/, Any& /*rhs*/) const {} + virtual Vector getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, + Any const& /*rhs*/) const { return {}; } + virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/, + Vector const& /*offset*/) const {} virtual void prepareForSolve () = 0; - virtual bool isSingular (int amrlev) const = 0; - virtual bool isBottomSingular () const = 0; - virtual Real xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, bool local) const = 0; + virtual bool isSingular (int /*amrlev*/) const { return false; } + virtual bool isBottomSingular () const { return false; } + virtual Real xdoty (int /*amrlev*/, int /*mglev*/, const MultiFab& /*x*/, const MultiFab& /*y*/, bool /*local*/) const { return 0._rt; } - virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } - virtual void nodalSync (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - - virtual std::unique_ptr makeNLinOp (int grid_size) const = 0; + virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const { return {nullptr}; } virtual void getFluxes (const Vector >& /*a_flux*/, const Vector& /*a_sol*/, @@ -283,6 +283,57 @@ public: virtual void copyNSolveSolution (MultiFab&, MultiFab const&) const {} + virtual Any AnyMake (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const; + virtual Any AnyMakeAlias (Any const& a) const; + virtual IntVect AnyGrowVect (Any const& a) const; + virtual void AnyCopy (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnyAdd (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnySetToZero (Any& a) const; + virtual void AnySetBndryToZero (Any& a) const; +#ifdef AMREX_USE_EB + virtual void AnySetCoveredToZero (Any& a) const; +#endif + virtual void AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period = Periodicity::NonPeriodic()) const; + + virtual Real AnyNormInf (Any& a) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const = 0; + + virtual void AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata = nullptr); + virtual void AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, + const Any& b, BCMode bc_mode, + const Any* crse_bcdata=nullptr); + virtual void AnyReflux (int crse_amrlev, + Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const = 0; + virtual void AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const; + + virtual void AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary=false) const; + + virtual void AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const; + + virtual void AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const; + virtual void AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const; + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const = 0; + + virtual void AnyAverageDownSolutionRHS (int camrlev, Any& crse_sol, Any& crse_rhs, + const Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAverageDownAndSync (Vector& sol) const = 0; + + Real MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const; + + bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; + protected: static constexpr int mg_coarsen_ratio = 2; @@ -401,7 +452,7 @@ protected: bool isCellCentered () const noexcept { return m_ixtype == 0; } - virtual void make (Vector >& mf, int nc, IntVect const& ng) const; + void make (Vector >& mf, IntVect const& ng) const; virtual std::unique_ptr > makeFactory (int /*amrlev*/, int /*mglev*/) const { return std::make_unique(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp index 9c6ccc8ce05..5f71895320d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp @@ -4,10 +4,12 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include #include +#include #endif #ifdef AMREX_USE_PETSC @@ -544,7 +546,7 @@ MLLinOp::defineBC () } void -MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const +MLLinOp::make (Vector >& mf, IntVect const& ng) const { mf.clear(); mf.resize(m_num_amr_levels); @@ -553,8 +555,7 @@ MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const mf[alev].resize(m_num_mg_levels[alev]); for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) { - const auto& ba = amrex::convert(m_grids[alev][mlev], m_ixtype); - mf[alev][mlev].define(ba, m_dmap[alev][mlev], nc, ng, MFInfo(), *m_factory[alev][mlev]); + mf[alev][mlev] = AnyMake(alev, mlev, ng); } } } @@ -895,6 +896,276 @@ MLLinOp::resizeMultiGrid (int new_size) } } +Any +MLLinOp::AnyMake (int amrlev, int mglev, IntVect const& ng) const +{ + return Any(MultiFab(amrex::convert(m_grids[amrlev][mglev], m_ixtype), + m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(), + *m_factory[amrlev][mglev])); +} + +Any +MLLinOp::AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const +{ + BoxArray cba = m_grids[amrlev][mglev]; + IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev]; + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[amrlev][mglev], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const +{ + BoxArray cba = m_grids[famrlev][0]; + IntVect ratio(AMRRefRatio(famrlev-1)); + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[famrlev][0], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeAlias (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return Any(MultiFab(mf, amrex::make_alias, 0, mf.nComp())); +} + +IntVect +MLLinOp::AnyGrowVect (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return mf.nGrowVect(); +} + +void +MLLinOp::AnySetToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setVal(0._rt); +} + +void +MLLinOp::AnySetBndryToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setBndry(0._rt, 0, getNComp()); +} + +#ifdef AMREX_USE_EB +void +MLLinOp::AnySetCoveredToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + EB_set_covered(mf, 0, getNComp(), 0, 0._rt); +} +#endif + +void +MLLinOp::AnyCopy (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Copy(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAdd (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Add(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAverageDownSolutionRHS (int camrlev, Any& a_crse_sol, Any& a_crse_rhs, + const Any& a_fine_sol, const Any& a_fine_rhs) +{ + AMREX_ASSERT(a_crse_sol.is() && + a_crse_rhs.is() && + a_fine_sol.is() && + a_fine_rhs.is()); + auto& crse_sol = a_crse_sol.get(); + auto& crse_rhs = a_crse_rhs.get(); + auto& fine_sol = a_fine_sol.get(); + auto& fine_rhs = a_fine_rhs.get(); + averageDownSolutionRHS(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); +} + +void +MLLinOp::AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period) const +{ + AMREX_ASSERT(dst.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + dmf.ParallelCopy(smf, 0, 0, getNComp(), src_nghost, dst_nghost, period); +} + +Real +MLLinOp::AnyNormInf (Any& a) const +{ + AMREX_ASSERT(a.is()); + return a.get().norminf(); +} + +void +MLLinOp::AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + solutionResidual(amrlev, resid.get(), x.get(), b.get(), + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, const Any& b, + BCMode bc_mode, const Any* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + correctionResidual(amrlev, mglev, resid.get(), x.get(), + b.get(), bc_mode, + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyReflux (int clev, Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs) +{ + AMREX_ASSERT(res.is()); + reflux(clev,res.get(), crse_sol.get(), crse_rhs.get(), + fine_res.get(), fine_sol.get(), fine_rhs.get()); +} + +Real +MLLinOp::MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const +{ + const int ncomp = getNComp(); + Real norm = 0._rt; + + if (fine_mask == nullptr) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = fine_mask->const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = fine_mask->const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + } + }); + } + } + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLLinOp::AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), + mg_coarsen_ratio_vec[clev-1]); +} + +void +MLLinOp::AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary) const +{ + AMREX_ASSERT(sol.is() && rhs.is()); + smooth(amrlev, mglev, sol.get(), rhs.get(), skip_fillboundary); +} + +void +MLLinOp::AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + restriction(amrlev, cmglev, crse.get(), fine.get()); +} + +void +MLLinOp::AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpolation(amrlev, fmglev, fine.get(), crse.get()); +} + +void +MLLinOp::AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpAssign(amrlev, fmglev, fine.get(), crse.get()); +} + +bool +MLLinOp::isMFIterSafe (int amrlev, int mglev1, int mglev2) const +{ + return m_dmap[amrlev][mglev1] == m_dmap[amrlev][mglev2] + && BoxArray::SameRefs(m_grids[amrlev][mglev1], m_grids[amrlev][mglev2]); +} + #ifdef AMREX_USE_PETSC std::unique_ptr MLLinOp::makePETSc () const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H new file mode 100644 index 00000000000..68d7c836ba5 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H @@ -0,0 +1,486 @@ +#ifndef AMREX_MLLINOP_TEMP_H_ +#define AMREX_MLLINOP_TEMP_H_ + +//! This is a template for writing your own linear operator class for Ax=b. + +#include + +namespace amrex_temp +{ + +class MLLinOpTemp + : public amrex::MLLinOp +{ +public: + + //! In this example, there are 3 edge based MultiFabs. + using Container = amrex::Array; + + MLLinOpTemp () {} + + virtual ~MLLinOpTemp () {} + + MLLinOpTemp (const MLLinOpTemp&) = delete; + MLLinOpTemp (MLLinOpTemp&&) = delete; + MLLinOpTemp& operator= (const MLLinOpTemp&) = delete; + MLLinOpTemp& operator= (MLLinOpTemp&&) = delete; + + MLLinOpTemp (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + void define (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + amrex::MLLinOp::define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + /** + * \brief Return the default solver at the bottom of MG cycles. By + * default, MLLinOp uses a BiCGStab solver implemented in + * AMReX::MLCGSolver. However, it only supports a single MultiFab. + * Since our data type is different, we use a smoother instead. In the + * future we can try to generalize MLCGSolver. + */ + virtual amrex::BottomSolver getDefaultBottomSolver () const override { + return amrex::BottomSolver::smoother; + } + + /** + * \brief Make data container (e.g., MultiFabs stored in Any) for given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMake (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto const& ba = m_grids[amrlev][mglev]; + auto const& dm = m_dmap [amrlev][mglev]; + auto const& fc = *m_factory[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng, amrex::MFInfo(), fc)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the give MG level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. The coarser level is mglev+1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseMG (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto ratio = (amrlev > 0) ? amrex::IntVect(2) : this->mg_coarsen_ratio_vec[mglev]; + auto const& ba = amrex::coarsen(m_grids[amrlev][mglev], ratio); + auto const& dm = m_dmap[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the given AMR level. + * + * \param famrlev AMR level. The coarser AMR level is famrlev-1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseAmr (int famrlev, amrex::IntVect const& ng) const override + { + amrex::IntVect ratio(this->AMRRefRatio(famrlev-1)); + auto const& ba = amrex::coarsen(m_grids[famrlev][0], ratio); + auto const& dm = m_dmap[famrlev][0]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make an alias of the given Any without deepcopying. + * + * \param a an Any object. + */ + virtual amrex::Any AnyMakeAlias (amrex::Any const& a) const override + { + auto const& rhs = a.get(); + return amrex::Any(Container{amrex::MultiFab(rhs[0], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[1], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[2], amrex::make_alias, 0, 1)}); + } + + /** + * \brief Retuen the number of ghost cells in the given Any. + * + * \param a an Any object. + */ + virtual amrex::IntVect AnyGrowVect (amrex::Any const& a) const override + { + auto const& mfs = a.get(); + return mfs[0].nGrowVect(); + } + + /** + * \brief Copy data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyCopy (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Copy(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Add data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyAdd (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Add(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Set the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setVal(amrex::Real(0.0)); + } + } + + /** + * \brief Set boundary (i.e., ghost cells) the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetBndryToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setBndry(amrex::Real(0.0), 0, 1); + } + } + +#ifdef AMREX_USE_EB + /** + * \brief Set covered region of the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetCoveredToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::EB_set_covered(mfs[idim], 0, 1, 0, amrex::Real(0.0)); + } + } +#endif + + /** + * \brief ParallelCopy from source Any ot destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param src_nghost number of ghost cells in the source included in the operation. + * \param dst_nghost number of ghost cells in the destination included in the operation. + * \param period Periodicity. + */ + virtual void AnyParallelCopy (amrex::Any& dst, amrex::Any const& src, + amrex::IntVect const& src_nghost, amrex::IntVect const& dst_nghost, + amrex::Periodicity const& period = amrex::Periodicity::NonPeriodic()) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_nowait(smf[idim], 0, 0, 1, src_nghost, dst_nghost, period); + } + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_finish(); + } + } + + /** + * \brief Return the infinity norm of the given Any. + * + * \param a an Any object. + */ + virtual amrex::Real AnyNormInf (amrex::Any& a) const override + { + auto& mfs = a.get(); + amrex::Real r = amrex::Real(0.0); + for (int idim=0; idim < 3; ++idim) { + auto tmp = mfs[idim].norminf(0, 0, true); + r = std::max(r, tmp); + } + amrex::ParallelAllReduce::Max(r, amrex::ParallelContext::CommunicatorSub()); + return r; + } + + /** + * \brief Return the infinity norm of the masked region of the given Any. + * + * For a composite solve with multiple AMR levels, the region covered by + * finer AMR levels are not included in the operation. + * + * \parame amrlev AMR level. + * \param a an Any object. + * \parame local determines if the reduction is local (i.e., no MPI communication) or not. + */ + virtual amrex::Real AnyNormInfMask (int amrlev, amrex::Any const& a, bool local) const override + { + amrex::ignore_unused(amrlev, a, local); + amrex::Abort("TODO: AnyNormInfMask"); + // This is only needed for multi-level composite solve + return amrex::Real(0.0); + } + + /** + * \brief Compute residual of the original form, r = b - Ax. + * + * \param amrlev AMR level + * \param resid residual + * \param x the solution x + * \param b the RHS b + * \param crse_bcdata provides Dirichlet BC at AMR coarse/fine interface. + * It's a nullptr for single level solve. + */ + virtual void AnySolutionResidual (int amrlev, amrex::Any& resid, amrex::Any& x, amrex::Any const& b, + amrex::Any const* crse_bcdata = nullptr) override + { + amrex::ignore_unused(amrlev, resid, x, b, crse_bcdata); + amrex::Abort("TODO: AnySolutionResidual"); + } + + /** + * \brief Compute residual of the residual correction form, r = b - Ax. + * + * \param amrlev AMR level. + * \param resid residual of the residual correction form. + * \param x the correction. + * \param b the RHS for the residual correction form (i.e., the residual of the original form. + * \param bc_mode is either Homogeneous or Inhomogeneous. + * \param crse_bcdata provides inhomogenous Dirichlet BC at AMR coarse/fine interface. + * It's ignored for homogeneous Dirichlet BC. + */ + virtual void AnyCorrectionResidual (int amrlev, int mglev, amrex::Any& resid, amrex::Any& x, + const amrex::Any& b, MLLinOp::BCMode bc_mode, + const amrex::Any* crse_bcdata=nullptr) override + { + amrex::ignore_unused(amrlev, mglev, resid, x, b, bc_mode, crse_bcdata); + amrex::Abort("TODO: AnyCorrectionResidual"); + } + + /** + * \brief Reflux + * + * This modifies the coarse level residual at the coarse/fine interface. + * + * \param crse_amrlev coarse AMR level. + * \param res coarse level residual. + * \param crse_sol coarse level x. + * \param crse_rhs coarse level b. + * \param fine_res fine level residual. This may not be needed depending on the coarse/fine stencil. + * \param fine_sol fine level x. + * \param fine_rhs fine level b. + */ + virtual void AnyReflux (int crse_amrlev, + amrex::Any& res, const amrex::Any& crse_sol, const amrex::Any& crse_rhs, + amrex::Any& fine_res, amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(crse_amrlev, res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + amrex::Abort("TODO: AnyReflux"); + // This is only needed for multi-level composite solve + } + + /** + * \brief Average down residual from fine to coarse AMR level. + * + * \param clev coarse ARR level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResAmr (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResAmr"); + // This is only needed for mulit-level composite solve. + // And maybe there is nothing neeed to be done here, like in the nodal projection solver. + } + + /** + * \brief Average down residual from fine to coarse MG level. + * + * This is only needed for MG F-cycle, and we don't need to implement this for V-cycle. + * + * \param clev coarse MG level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResMG (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResMG"); // Not needed for V-cycle. + } + + /** + * \brief Smooth the given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param sol x + * \param rhs b + * \param skip_fillboundary a flag for if we need to fill ghost cells in this function. + */ + virtual void AnySmooth (int amrlev, int mglev, amrex::Any& sol, const amrex::Any& rhs, + bool skip_fillboundary=false) const override + { + amrex::ignore_unused(amrlev, mglev, sol, rhs, skip_fillboundary); + amrex::Abort("TODO: AnySmooth"); + } + + /** + * \brief Restriction from fine to coarse MG level. + * + * \param amrlev AMR level. + * \param cmglev coarse MG level. The fine MG level is cmglev-1. + * \param crse coarse data. + * \param fine fine data. This is not const& because we may need to fill its ghost cells. + */ + virtual void AnyRestriction (int amrlev, int cmglev, amrex::Any& crse, amrex::Any& fine) const override + { + amrex::ignore_unused(amrlev, cmglev, crse, fine); + amrex::Abort("TODO: AnyRestriction"); + } + + /** + * \brief Add interpolated coarse data onto the fine MG level. + * + * Note that it's an ADD operation. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpolationMG (int amrlev, int fmglev, amrex::Any& fine, const amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpolationMG"); + } + + /** + * \brief Assign (i.e., copy) interpolated coarse data onto the fine MG level. + * + * Note that it's an ASSIGN operation. This is used in MG F-cycle, and + * does not need to be implemented for V-cycle. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpAssignMG (int amrlev, int fmglev, amrex::Any& fine, amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpAssignMG"); // not needed for V-cycle. + } + + /** + * \brief Interpolate data from coarse to fine AMR level. + * + * \param famrlev fine AMR level. The coarse AMR level is famrlev-1. + * \param fine data on fine AMR level. + * \param crse data on coarse AMR level. + */ + virtual void AnyInterpolationAmr (int famrlev, amrex::Any& fine, const amrex::Any& crse, + amrex::IntVect const& /*nghost*/) const override + { + amrex::ignore_unused(famrlev, fine, crse); + // This is only needed for multi-level composite solve + amrex::Abort("TODO: AnyInterpolationAmr"); + } + + /** + * \brief Average down x and b from fine to coarse AMR level. + * + * This is called before V-cycle to make data on AMR levels consistent. + * + * \param camrlev coarse AMR level. The fine level is camrlev+1. + * \param crse_sol x on coarse level. + * \param crse_rhs b on coarse level. + * \param fine_sol x on fine level. + * \param fine_rhs b on fine level. + */ + virtual void AnyAverageDownSolutionRHS (int camrlev, amrex::Any& crse_sol, amrex::Any& crse_rhs, + const amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); + // This is only needed for multi-level composite solve + amrex::Abort("AnyAverageDownSolutionRHS"); + } + + /** + * \brief Average down and synchronize AMR data. + * + * Synchronize the data on each level. That is the nodal data in the + * same MultiFab needs to be synchronized. This function also needs to + * average down the data from fine to coarse AMR levels. + * + * \param sol data on all AMR levels. + */ + virtual void AnyAverageDownAndSync (amrex::Vector& sol) const override + { + amrex::ignore_unused(sol); + // Even for single level, we shoudl synchronize the data on level 0. + amrex::Abort("TODO: AnyAverageDownAndSync"); + } + + /** + * \brief Prepare the solver for MG cycle. + */ + virtual void prepareForSolve () override + { + amrex::Abort("TODO: prepareForSolve"); + } +}; + +} + + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 32980d74c45..e884f877fbc 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -36,6 +36,10 @@ public: Real solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + // For this version of solve, Any holds MultiFab like objects. + Real solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + void getGradSolution (const Vector >& a_grad_sol, Location a_loc = Location::FaceCenter); @@ -121,7 +125,7 @@ public: void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;} #endif - void prepareForSolve (const Vector& a_sol, const Vector& a_rhs); + void prepareForSolve (Vector& a_sol, const Vector& a_rhs); void prepareForNSolve (); @@ -151,19 +155,16 @@ public: Real MLRhsNormInf (bool local = false); void buildFineMask (); - void averageDownAndSync (); - - void computeVolInv (); void makeSolvable (); - void makeSolvable (int amrlev, int mglev, MultiFab& mf); + void makeSolvable (int amrlev, int mglev, Any& mf); #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) - void bottomSolveWithHypre (MultiFab& x, const MultiFab& b); + void bottomSolveWithHypre (Any& x, const Any& b); #endif - void bottomSolveWithPETSc (MultiFab& x, const MultiFab& b); + void bottomSolveWithPETSc (Any& x, const Any& b); - int bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type); + int bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type); Real getInitRHS () const noexcept { return m_rhsnorm0; } // Initial composite residual @@ -242,26 +243,21 @@ private: * \brief To avoid confusion, terms like sol, cor, rhs, res, ... etc. are * in the frame of the original equation, not the correction form */ - Vector > sol_raii; - Vector sol; //!< alias to argument a_sol - Vector rhs; //!< Copy of original rhs - //! L(sol) = rhs + Vector sol; //!< Might be alias to argument a_sol + Vector rhs; //!< Copy of original rhs + //! L(sol) = rhs + + Vector sol_is_alias; /** * \brief First Vector: Amr levels. 0 is the coarest level * Second Vector: MG levels. 0 is the finest level */ - Vector > res; //! = rhs - L(sol) - Vector > > cor; //!< L(cor) = res - Vector > > cor_hold; - Vector > rescor; //!< = res - L(cor) - //! Residual of the correction form - - Vector > fine_mask; - - Vector > volinv; //!< used by makeSolvable - - Vector > scratch; + Vector > res; //! = rhs - L(sol) + Vector > cor; //!< L(cor) = res + Vector > cor_hold; + Vector > rescor; //!< = res - L(cor) + //! Residual of the correction form enum timer_types { solve_time=0, iter_time, bottom_time, ntimers }; Vector timer; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index 2bdb9222b4b..a1e897e85ba 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #ifdef AMREX_USE_PETSC @@ -51,25 +50,52 @@ MLMG::~MLMG () Real MLMG::solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) +{ + Vector any_sol(namrlevs); + Vector any_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + any_sol[lev] = MultiFab(*a_sol[lev], amrex::make_alias, 0, a_sol[lev]->nComp()); + any_rhs[lev] = MultiFab(*a_rhs[lev], amrex::make_alias, 0, a_rhs[lev]->nComp()); + } + return solve(any_sol, any_rhs, a_tol_rel, a_tol_abs, checkpoint_file); +} + +Real +MLMG::solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) { BL_PROFILE("MLMG::solve()"); if (checkpoint_file != nullptr) { - checkPoint(a_sol, a_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + if (a_sol[0].is()) { + Vector mf_sol(namrlevs); + Vector mf_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + mf_sol[lev] = &(a_sol[lev].get()); + mf_rhs[lev] = &(a_rhs[lev].get()); + } + checkPoint(mf_sol, mf_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + } else { + amrex::Abort("MLMG::solve: checkpoint not supported for non-MultiFab type"); + } } if (bottom_solver == BottomSolver::Default) { bottom_solver = linop.getDefaultBottomSolver(); } +#if defined(AMREX_USE_HYPRE) || defined(AMREX_USE_PETSC) if (bottom_solver == BottomSolver::hypre || bottom_solver == BottomSolver::petsc) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(a_sol[0].is(), + "Non-MultiFab type not supported for hypre and petsc"); int mo = linop.getMaxOrder(); - if (a_sol[0]->hasEBFabFactory()) { + if (a_sol[0].get().hasEBFabFactory()) { linop.setMaxOrder(2); } else { linop.setMaxOrder(std::min(3,mo)); // maxorder = 4 not supported } } +#endif bool is_nsolve = linop.m_parent; @@ -84,8 +110,6 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh computeMLResidual(finest_amr_lev); - int ncomp = linop.getNComp(); - bool local = true; Real resnorm0 = MLResNormInf(finest_amr_lev, local); Real rhsnorm0 = MLRhsNormInf(local); @@ -200,9 +224,8 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh } for (int alev = 0; alev < namrlevs; ++alev) { - if (a_sol[alev] != sol[alev]) - { - MultiFab::Copy(*a_sol[alev], *sol[alev], 0, 0, ncomp, ng_back); + if (!sol_is_alias[alev]) { + linop.AnyCopy(a_sol[alev], sol[alev], ng_back); } } @@ -229,16 +252,13 @@ void MLMG::oneIter (int iter) { BL_PROFILE("MLMG::oneIter()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - for (int alev = finest_amr_lev; alev > 0; --alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); // compute residual for the coarse AMR level computeResWithCrseSolFineCor(alev-1,alev); @@ -250,7 +270,6 @@ void MLMG::oneIter (int iter) // coarsest amr level { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(0); // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { @@ -258,24 +277,27 @@ void MLMG::oneIter (int iter) } if (iter < max_fmg_iters) { - mgFcycle (); + mgFcycle(); } else { - mgVcycle (0, 0); + mgVcycle(0, 0); } - MultiFab::Add(*sol[0], *cor[0][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(0)); + linop.AnyAdd(sol[0], cor[0][0], nghost); } for (int alev = 1; alev <= finest_amr_lev; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); // (Fine AMR correction) = I(Coarse AMR correction) interpCorrection(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor_hold[alev][0], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor_hold[alev][0], cor[alev][0], nghost); } // Update fine AMR level correction @@ -283,14 +305,14 @@ void MLMG::oneIter (int iter) miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor[alev][0], *cor_hold[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[alev][0], cor_hold[alev][0], nghost); } } - averageDownAndSync(); + linop.AnyAverageDownAndSync(sol); } // Compute multi-level Residual (res) up to amrlevmax. @@ -301,11 +323,11 @@ MLMG::computeMLResidual (int amrlevmax) const int mglev = 0; for (int alev = amrlevmax; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; - linop.solutionResidual(alev, res[alev][mglev], *sol[alev], rhs[alev], crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][mglev], sol[alev], rhs[alev], crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, res[alev][mglev], *sol[alev], rhs[alev], - res[alev+1][mglev], *sol[alev+1], rhs[alev+1]); + linop.AnyReflux(alev, res[alev][mglev], sol[alev], rhs[alev], + res[alev+1][mglev], sol[alev+1], rhs[alev+1]); } } } @@ -315,16 +337,8 @@ void MLMG::computeResidual (int alev) { BL_PROFILE("MLMG::computeResidual()"); - - MultiFab& x = *sol[alev]; - const MultiFab& b = rhs[alev]; - MultiFab& r = res[alev][0]; - - const MultiFab* crse_bcdata = nullptr; - if (alev > 0) { - crse_bcdata = sol[alev-1]; - } - linop.solutionResidual(alev, r, x, b, crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][0], sol[alev], rhs[alev], crse_bcdata); } // Compute coarse AMR level composite residual with coarse solution and fine correction @@ -333,39 +347,28 @@ MLMG::computeResWithCrseSolFineCor (int calev, int falev) { BL_PROFILE("MLMG::computeResWithCrseSolFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = std::min(linop.getNGrow(falev),linop.getNGrow(calev)); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(std::min(linop.getNGrow(falev),linop.getNGrow(calev))); - MultiFab& crse_sol = *sol[calev]; - const MultiFab& crse_rhs = rhs[calev]; - MultiFab& crse_res = res[calev][0]; + Any& crse_sol = sol[calev]; + const Any& crse_rhs = rhs[calev]; + Any& crse_res = res[calev][0]; - MultiFab& fine_sol = *sol[falev]; - const MultiFab& fine_rhs = rhs[falev]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_sol = sol[falev]; + const Any& fine_rhs = rhs[falev]; + Any& fine_cor = cor[falev][0]; + Any& fine_res = res[falev][0]; + Any& fine_rescor = rescor[falev][0]; - const MultiFab* crse_bcdata = nullptr; - if (calev > 0) { - crse_bcdata = sol[calev-1]; - } - linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); + const Any* crse_bcdata = (calev > 0) ? &(sol[calev-1]) : nullptr; + linop.AnySolutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); + linop.AnyCopy(fine_res, fine_rescor, nghost); - linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + linop.AnyReflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); - if (linop.isCellCentered()) { - const int amrrr = linop.AMRRefRatio(calev); -#ifdef AMREX_USE_EB - amrex::EB_average_down(fine_res, crse_res, 0, ncomp, amrrr); -#else - amrex::average_down(fine_res, crse_res, 0, ncomp, amrrr); -#endif - } + linop.AnyAvgDownResAmr(calev, crse_res, fine_res); } // Compute fine AMR level residual fine_res = fine_res - L(fine_cor) with coarse providing BC. @@ -374,20 +377,19 @@ MLMG::computeResWithCrseCorFineCor (int falev) { BL_PROFILE("MLMG::computeResWithCrseCorFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(falev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(falev)); - const MultiFab& crse_cor = *cor[falev-1][0]; + const Any& crse_cor = cor[falev-1][0]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_cor = cor [falev][0]; + Any& fine_res = res [falev][0]; + Any& fine_rescor = rescor[falev][0]; // fine_rescor = fine_res - L(fine_cor) - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, - BCMode::Inhomogeneous, &crse_cor); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, + BCMode::Inhomogeneous, &crse_cor); + linop.AnyCopy(fine_res, fine_rescor, nghost); } void @@ -413,16 +415,16 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = res[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], + skip_fillboundary); skip_fillboundary = false; } @@ -431,14 +433,13 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm after smooth " << norm << "\n"; } // res_crse = R(rescor_fine); this provides res/b to the level below - linop.restriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); - + linop.AnyRestriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); } BL_PROFILE_VAR("MLMG::mgVcycle_bottom", blp_bottom); @@ -446,7 +447,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " DN: Norm before bottom " << norm << "\n"; } @@ -454,7 +455,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " UP: Norm after bottom " << norm << "\n"; @@ -464,21 +465,21 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev_bottom]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev_bottom]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev_bottom, *cor[amrlev][mglev_bottom], res[amrlev][mglev_bottom], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom], + res[amrlev][mglev_bottom], skip_fillboundary); skip_fillboundary = false; } if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm after smooth " << norm << "\n"; } @@ -493,12 +494,12 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm before smooth " << norm << "\n"; } for (int i = 0; i < nu2; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev]); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev]); } if (cf_strategy == CFStrategy::ghostnodes) computeResOfCorrection(amrlev, mglev); @@ -506,7 +507,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm after smooth " << norm << "\n"; } @@ -523,19 +524,12 @@ MLMG::mgFcycle () const int amrlev = 0; const int mg_bottom_lev = linop.NMGLevels(amrlev) - 1; - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(amrlev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(amrlev)); for (int mglev = 1; mglev <= mg_bottom_lev; ++mglev) { -#ifdef AMREX_USE_EB - amrex::EB_average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#else - amrex::average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#endif + linop.AnyAvgDownResMG(mglev, res[amrlev][mglev], res[amrlev][mglev-1]); } bottomSolve(); @@ -543,17 +537,17 @@ MLMG::mgFcycle () for (int mglev = mg_bottom_lev-1; mglev >= 0; --mglev) { // cor_fine = I(cor_crse) - interpCorrection (amrlev, mglev); + interpCorrection(amrlev, mglev); // rescor = res - L(cor) computeResOfCorrection(amrlev, mglev); // res = rescor; this provides b to the vcycle below - MultiFab::Copy(res[amrlev][mglev], rescor[amrlev][mglev], 0,0,ncomp,nghost); + linop.AnyCopy(res[amrlev][mglev], rescor[amrlev][mglev], nghost); // save cor; do v-cycle; add the saved to cor std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]); mgVcycle(amrlev, mglev); - MultiFab::Add(*cor[amrlev][mglev], *cor_hold[amrlev][mglev], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], nghost); } } @@ -563,17 +557,11 @@ MLMG::interpCorrection (int alev) { BL_PROFILE("MLMG::interpCorrection_1"); - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); - const MultiFab& crse_cor = *cor[alev-1][0]; - MultiFab& fine_cor = *cor[alev][0]; - - BoxArray ba = fine_cor.boxArray(); - const int amrrr = linop.AMRRefRatio(alev-1); - IntVect refratio{amrrr}; - ba.coarsen(refratio); + Any const& crse_cor = cor[alev-1][0]; + Any & fine_cor = cor[alev ][0]; const Geometry& crse_geom = linop.Geom(alev-1,0); @@ -584,121 +572,12 @@ MLMG::interpCorrection (int alev) ng_src = linop.getNGrow(alev-1); ng_dst = linop.getNGrow(alev-1); } - MultiFab cfine(ba, fine_cor.DistributionMap(), ncomp, ng_dst); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, ng_src, ng_dst, crse_geom.periodicity()); - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - Array4 const& ff = fine_cor.array(mfi); - Array4 const& cc = cfine.const_array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); - } + Any cfine = linop.AnyMakeCoarseAmr(alev, IntVect(ng_dst)); + linop.AnySetToZero(cfine); + linop.AnyParallelCopy(cfine, crse_cor, IntVect(ng_src), IntVect(ng_dst), crse_geom.periodicity()); - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); - } - } - } - } - else - { - AMREX_ALWAYS_ASSERT(amrrr == 2 || amrrr == 4); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Box fbx = mfi.tilebox(); - if (cf_strategy == CFStrategy::ghostnodes && nghost >1) fbx.grow(nghost); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cfine.const_array(mfi); - - if (amrrr == 2) { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } else { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); - }); - } - } - } + linop.AnyInterpolationAmr(alev, fine_cor, cfine, nghost); } // Interpolate correction between MG levels @@ -709,119 +588,9 @@ MLMG::interpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::interpCorrection_2"); - MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; - - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); - - const Geometry& crse_geom = linop.Geom(alev,mglev+1); - const IntVect refratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - MultiFab cfine; - const MultiFab* cmf; - - if (amrex::isMFIterSafe(crse_cor, fine_cor)) - { - crse_cor.FillBoundary(crse_geom.periodicity()); - cmf = &crse_cor; - } - else - { - BoxArray cba = fine_cor.boxArray(); - cba.coarsen(refratio); - IntVect ng = linop.isCellCentered() ? crse_cor.nGrowVect() : IntVect(0); - if (cf_strategy == CFStrategy::ghostnodes) ng = IntVect(nghost); - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); - cmf = & cfine; - } - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); - -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - const auto& ff = fine_cor.array(mfi); - const auto& cc = cmf->array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { -#if (AMREX_SPACEDIM == 3) - if (linop.hasHiddenDimension()) { - Box const& bx_2d = linop.compactify(bx); - auto const& ff_2d = linop.compactify(ff); - auto const& cc_2d = linop.compactify(cc); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, - { - TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); - }); - } else -#endif - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - } - } - } - } - else - { -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - const Box& fbx = mfi.tilebox(); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cmf->const_array(mfi); - - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } - } + Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; + linop.AnyInterpAssignMG(alev, mglev, fine_cor, crse_cor); } // (Fine MG level correction) += I(Coarse MG level correction) @@ -830,31 +599,24 @@ MLMG::addInterpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::addInterpCorrection()"); - const int ncomp = linop.getNComp(); - - const MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; + const Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; - MultiFab cfine; - const MultiFab* cmf; + Any cfine; + const Any* cany; - if (amrex::isMFIterSafe(crse_cor, fine_cor)) + if (linop.isMFIterSafe(alev, mglev, mglev+1)) { - cmf = &crse_cor; + cany = &crse_cor; } else { - BoxArray cba = fine_cor.boxArray(); - IntVect ratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - cba.coarsen(ratio); - const int ng = 0; - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.ParallelCopy(crse_cor); - cmf = &cfine; + cfine = linop.AnyMakeCoarseMG(alev, mglev, IntVect(0)); + linop.AnyParallelCopy(cfine,crse_cor,IntVect(0),IntVect(0)); + cany = &cfine; } - linop.interpolation(alev, mglev, fine_cor, *cmf); + linop.AnyInterpolationMG(alev, mglev, fine_cor, *cany); } // Compute rescor = res - L(cor) @@ -865,10 +627,10 @@ void MLMG::computeResOfCorrection (int amrlev, int mglev) { BL_PROFILE("MLMG:computeResOfCorrection()"); - MultiFab& x = *cor[amrlev][mglev]; - const MultiFab& b = res[amrlev][mglev]; - MultiFab& r = rescor[amrlev][mglev]; - linop.correctionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); + Any & x = cor[amrlev][mglev]; + const Any& b = res[amrlev][mglev]; + Any & r = rescor[amrlev][mglev]; + linop.AnyCorrectionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); } // At the true bottom of the coarset AMR level. @@ -894,7 +656,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_sol.setVal(0.0); - MultiFab const& res_bottom = res[0].back(); + MultiFab const& res_bottom = res[0].back().get(); if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) && DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap())) { @@ -906,7 +668,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_solver.solve({&a_sol}, {&a_rhs}, Real(-1.0), Real(-1.0)); - linop.copyNSolveSolution(*cor[0].back(), a_sol); + linop.copyNSolveSolution(cor[0].back().get(), a_sol); } void @@ -914,8 +676,6 @@ MLMG::actualBottomSolve () { BL_PROFILE("MLMG::actualBottomSolve()"); - const int ncomp = linop.getNComp(); - if (!linop.isBottomActive()) return; auto bottom_start_time = amrex::second(); @@ -924,28 +684,28 @@ MLMG::actualBottomSolve () const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; - MultiFab& x = *cor[amrlev][mglev]; - MultiFab& b = res[amrlev][mglev]; + auto& x = cor[amrlev][mglev]; + auto& b = res[amrlev][mglev]; - x.setVal(0.0); + linop.AnySetToZero(x); if (bottom_solver == BottomSolver::smoother) { bool skip_fillboundary = true; for (int i = 0; i < nuf; ++i) { - linop.smooth(amrlev, mglev, x, b, skip_fillboundary); + linop.AnySmooth(amrlev, mglev, x, b, skip_fillboundary); skip_fillboundary = false; } } else { - MultiFab* bottom_b = &b; - MultiFab raii_b; + Any* bottom_b = &b; + Any raii_b; if (linop.isBottomSingular() && linop.getEnforceSingularSolvable()) { - raii_b.define(b.boxArray(), b.DistributionMap(), ncomp, b.nGrowVect(), - MFInfo(), *linop.Factory(amrlev,mglev)); - MultiFab::Copy(raii_b,b,0,0,ncomp,b.nGrowVect()); + const IntVect ng = linop.AnyGrowVect(b); + raii_b = linop.AnyMake(amrlev, mglev, ng); + linop.AnyCopy(raii_b, b, ng); bottom_b = &raii_b; makeSolvable(amrlev,mglev,*bottom_b); @@ -973,7 +733,7 @@ MLMG::actualBottomSolve () int ret = bottomSolveWithCG(x, *bottom_b, cg_type); // If the MLMG solve failed then set the correction to zero if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); if (bottom_solver == BottomSolver::cgbicg || bottom_solver == BottomSolver::bicgcg) { if (bottom_solver == BottomSolver::cgbicg) { @@ -983,7 +743,7 @@ MLMG::actualBottomSolve () } ret = bottomSolveWithCG(x, *bottom_b, cg_type); if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); } else { // switch permanently if (cg_type == MLCGSolver::Type::CG) { bottom_solver = BottomSolver::cg; @@ -995,7 +755,7 @@ MLMG::actualBottomSolve () } const int n = (ret==0) ? nub : nuf; for (int i = 0; i < n; ++i) { - linop.smooth(amrlev, mglev, x, b); + linop.AnySmooth(amrlev, mglev, x, b); } } } @@ -1006,7 +766,7 @@ MLMG::actualBottomSolve () } int -MLMG::bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type) +MLMG::bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type) { MLCGSolver cg_solver(this, linop); cg_solver.setSolver(type); @@ -1027,37 +787,7 @@ Real MLMG::ResNormInf (int alev, bool local) { BL_PROFILE("MLMG::ResNormInf()"); - const int ncomp = linop.getNComp(); - const int mglev = 0; - Real norm = 0.0; - MultiFab* pmf = &(res[alev][mglev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, res[alev][mglev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::ResNormInf: not EB Factory"); - } - } -#endif - for (int n = 0; n < ncomp; n++) - { - Real newnorm = 0.0; - if (fine_mask[alev]) { - newnorm = pmf->norm0(*fine_mask[alev],n,0,true); - } else { - newnorm = pmf->norm0(n,0,true); - } - norm = std::max(norm, newnorm); - } - if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); - return norm; + return linop.AnyNormInfMask(alev, res[alev][0], local); } // Computes multi-level masked inf-norm of Residual (res). @@ -1079,66 +809,17 @@ Real MLMG::MLRhsNormInf (bool local) { BL_PROFILE("MLMG::MLRhsNormInf()"); - const int ncomp = linop.getNComp(); - Real r = 0.0; - for (int alev = 0; alev <= finest_amr_lev; ++alev) - { - MultiFab* pmf = &(rhs[alev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, rhs[alev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::MLRhsNormInf: not EB Factory"); - } - } -#endif - for (int n=0; nnorm0(*fine_mask[alev],n,0,true)); - } else { - r = std::max(r, pmf->norm0(n,0,true)); - } - } + Real r = 0.0_rt; + for (int alev = 0; alev <= finest_amr_lev; ++alev) { + auto t = linop.AnyNormInfMask(alev, rhs[alev], true); + r = std::max(r, t); } if (!local) ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub()); return r; } void -MLMG::buildFineMask () -{ - BL_PROFILE("MLMG::buildFineMask()"); - - if (!fine_mask.empty()) return; - - fine_mask.clear(); - fine_mask.resize(namrlevs); - - const auto& amrrr = linop.AMRRefRatio(); - for (int alev = 0; alev < finest_amr_lev; ++alev) - { - fine_mask[alev] = std::make_unique - (makeFineMask(rhs[alev], rhs[alev+1], IntVect(0), IntVect(amrrr[alev]), - Periodicity::NonPeriodic(), 1, 0)); - } - - if (!linop.isCellCentered()) { - for (int alev = 0; alev < finest_amr_lev; ++alev) { - linop.fixUpResidualMask(alev, *fine_mask[alev]); - } - } -} - -void -MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_rhs) +MLMG::prepareForSolve (Vector& a_sol, const Vector& a_rhs) { BL_PROFILE("MLMG::prepareForSolve()"); @@ -1147,7 +828,6 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_sol, const VectornGrowVect() == ng_sol) + else if (linop.AnyGrowVect(a_sol[alev]) == ng_sol) { - sol[alev] = a_sol[alev]; - sol[alev]->setBndry(0.0); + sol[alev] = linop.AnyMakeAlias(a_sol[alev]); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = true; } else { if (!solve_called) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol_raii[alev]->setBndry(0.0); - sol[alev] = sol_raii[alev].get(); + linop.AnyCopy(sol[alev], a_sol[alev], IntVect(0)); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = false; } } @@ -1202,10 +881,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const VectorboxArray(), a_rhs[alev]->DistributionMap(), ncomp, ng_rhs, - MFInfo(), *linop.Factory(alev)); + rhs[alev] = linop.AnyMake(alev, 0, ng_rhs); } - MultiFab::Copy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs); + linop.AnyCopy(rhs[alev], a_rhs[alev], ng_rhs); linop.applyMetricTerm(alev, 0, rhs[alev]); linop.unimposeNeumannBC(alev, rhs[alev]); linop.applyInhomogNeumannTerm(alev, rhs[alev]); @@ -1215,38 +893,37 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(linop.Factory(alev)); if (factory) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - amrex::EB_set_covered(*sol[alev], 0, ncomp, val); + linop.AnySetCoveredToZero(rhs[alev]); + linop.AnySetCoveredToZero(sol[alev]); } #endif } for (int falev = finest_amr_lev; falev > 0; --falev) { - linop.averageDownSolutionRHS(falev-1, *sol[falev-1], rhs[falev-1], *sol[falev], rhs[falev]); + linop.AnyAverageDownSolutionRHS(falev-1, sol[falev-1], rhs[falev-1], + sol[falev], rhs[falev]); } // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { - computeVolInv(); makeSolvable(); } IntVect ng = linop.isCellCentered() ? IntVect(0) : IntVect(1); if (cf_strategy == CFStrategy::ghostnodes) ng = ng_rhs; if (!solve_called) { - linop.make(res, ncomp, ng); - linop.make(rescor, ncomp, ng); + linop.make(res, ng); + linop.make(rescor, ng); } for (int alev = 0; alev <= finest_amr_lev; ++alev) { const int nmglevs = linop.NMGLevels(alev); for (int mglev = 0; mglev < nmglevs; ++mglev) { - res[alev][mglev].setVal(0.0); - rescor[alev][mglev].setVal(0.0); + linop.AnySetToZero(res [alev][mglev]); + linop.AnySetToZero(rescor[alev][mglev]); } } @@ -1261,12 +938,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(res[alev][mglev].boxArray(), - res[alev][mglev].DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[alev][mglev]); } } @@ -1280,12 +954,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][mglev]->boxArray(), - cor[alev][mglev]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor_hold[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor_hold[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor_hold[alev][mglev]); } } for (int alev = 1; alev < finest_amr_lev; ++alev) @@ -1294,31 +965,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][0]->boxArray(), - cor[alev][0]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,0)); + cor_hold[alev][0] = linop.AnyMake(alev, 0, _ng); } - cor_hold[alev][0]->setVal(0.0); - } - - buildFineMask(); - - if (!solve_called) - { - scratch.resize(namrlevs); -#ifdef AMREX_USE_EB - if (linop.isCellCentered()) { - for (int alev=0; alev < namrlevs; ++alev) { - if (rhs[alev].hasEBFabFactory()) { - scratch[alev] = std::make_unique(rhs[alev].boxArray(), - rhs[alev].DistributionMap(), - ncomp, 0, MFInfo(), - *linop.Factory(alev)); - } - } - } -#endif + linop.AnySetToZero(cor_hold[alev][0]); } if (linop.m_parent) { @@ -1379,7 +1028,7 @@ MLMG::getGradSolution (const Vector >& a_grad_so { BL_PROFILE("MLMG::getGradSolution()"); for (int alev = 0; alev <= finest_amr_lev; ++alev) { - linop.compGrad(alev, a_grad_sol[alev], *sol[alev], a_loc); + linop.compGrad(alev, a_grad_sol[alev], sol[alev].get(), a_loc); } } @@ -1392,7 +1041,11 @@ MLMG::getFluxes (const Vector >& a_flux, } AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1413,7 +1066,11 @@ void MLMG::getFluxes (const Vector & a_flux, Location a_loc) { AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1459,7 +1116,11 @@ MLMG::getEBFluxes (const Vector& a_eb_flux) } AMREX_ASSERT(sol.size() == a_eb_flux.size()); - getEBFluxes(a_eb_flux, sol); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getEBFluxes(a_eb_flux, solmf); } void @@ -1486,28 +1147,21 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s if (linop.hasHiddenDimension()) ng_sol[linop.hiddenDirection()] = 0; sol.resize(namrlevs); - sol_raii.resize(namrlevs); + sol_is_alias.resize(namrlevs,true); for (int alev = 0; alev < namrlevs; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) - { - sol[alev] = a_sol[alev]; - } - else if (a_sol[alev]->nGrowVect() == ng_sol) + if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol) { - sol[alev] = a_sol[alev]; + sol[alev] = linop.AnyMakeAlias(a_sol[alev]); + sol_is_alias[alev] = true; } else { - if (sol_raii[alev] == nullptr) + if (sol_is_alias[alev]) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol[alev] = sol_raii[alev].get(); + MultiFab::Copy(sol[alev].get(), *a_sol[alev], 0, 0, ncomp, 0); } } @@ -1521,22 +1175,23 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s const auto& amrrr = linop.AMRRefRatio(); for (int alev = finest_amr_lev; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; + const MultiFab* crse_bcdata = (alev > 0) ? &(sol[alev-1].get()) : nullptr; const MultiFab* prhs = a_rhs[alev]; #if (AMREX_SPACEDIM != 3) int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0; - MultiFab rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, - MFInfo(), *linop.Factory(alev)); + Any rhstmp_a(MultiFab(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, + MFInfo(), *linop.Factory(alev))); + MultiFab& rhstmp = rhstmp_a.get(); MultiFab::Copy(rhstmp, *prhs, 0, 0, ncomp, nghost); - linop.applyMetricTerm(alev, 0, rhstmp); - linop.unimposeNeumannBC(alev, rhstmp); - linop.applyInhomogNeumannTerm(alev, rhstmp); + linop.applyMetricTerm(alev, 0, rhstmp_a); + linop.unimposeNeumannBC(alev, rhstmp_a); + linop.applyInhomogNeumannTerm(alev, rhstmp_a); prhs = &rhstmp; #endif - linop.solutionResidual(alev, *a_res[alev], *sol[alev], *prhs, crse_bcdata); + linop.solutionResidual(alev, *a_res[alev], sol[alev].get(), *prhs, crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, *a_res[alev], *sol[alev], *prhs, - *a_res[alev+1], *sol[alev+1], *a_rhs[alev+1]); + linop.reflux(alev, *a_res[alev], sol[alev].get(), *prhs, + *a_res[alev+1], sol[alev+1].get(), *a_rhs[alev+1]); if (linop.isCellCentered()) { #ifdef AMREX_USE_EB amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); @@ -1604,7 +1259,8 @@ MLMG::apply (const Vector& out, const Vector& a_in) } for (int alev = 0; alev < namrlevs; ++alev) { - linop.applyInhomogNeumannTerm(alev, rh[alev]); + Any a(MultiFab(rh[alev], amrex::make_alias, 0, rh[alev].nComp())); + linop.applyInhomogNeumannTerm(alev, a); } const auto& amrrr = linop.AMRRefRatio(); @@ -1637,215 +1293,45 @@ MLMG::apply (const Vector& out, const Vector& a_in) } } -void -MLMG::averageDownAndSync () -{ - const auto& amrrr = linop.AMRRefRatio(); - - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - - if (linop.isCellCentered()) - { - for (int falev = finest_amr_lev; falev > 0; --falev) - { -#ifdef AMREX_USE_EB - amrex::EB_average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#else - amrex::average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#endif - } - } - else - { - linop.nodalSync(finest_amr_lev, 0, *sol[finest_amr_lev]); - - for (int falev = finest_amr_lev; falev > 0; --falev) - { - const auto& fmf = *sol[falev]; - auto& cmf = *sol[falev-1]; - - MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), amrrr[falev-1]), fmf.DistributionMap(), ncomp, nghost); - amrex::average_down(fmf, tmpmf, 0, ncomp, amrrr[falev-1]); - cmf.ParallelCopy(tmpmf, 0, 0, ncomp); - linop.nodalSync(falev-1, 0, cmf); - } - } -} - -void -MLMG::computeVolInv () -{ - if (solve_called) return; - - if (linop.isCellCentered()) - { - volinv.resize(namrlevs); - for (int amrlev = 0; amrlev < namrlevs; ++amrlev) { - volinv[amrlev].resize(linop.NMGLevels(amrlev)); - } - - // We don't need to compute for every level - - auto f = [&] (int amrlev, int mglev) { -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - volinv[amrlev][mglev] = vfrac.sum(0,true); - } - else -#endif - { - volinv[amrlev][mglev] - = Real(1.0 / linop.compactify(linop.Geom(amrlev,mglev).Domain()).d_numPts()); - } - }; - - // amrlev = 0, mglev = 0 - f(0,0); - - int mgbottom = linop.NMGLevels(0)-1; - f(0,mgbottom); - -#ifdef AMREX_USE_EB - Real temp1, temp2; - if (rhs[0].hasEBFabFactory()) - { - ParallelAllReduce::Sum({volinv[0][0], volinv[0][mgbottom]}, - ParallelContext::CommunicatorSub()); - temp1 = Real(1.0)/volinv[0][0]; - temp2 = Real(1.0)/volinv[0][mgbottom]; - } - else - { - temp1 = volinv[0][0]; - temp2 = volinv[0][mgbottom]; - } - volinv[0][0] = temp1; - volinv[0][mgbottom] = temp2; -#endif - } -} - void MLMG::makeSolvable () { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(0)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(rhs[0], c, vfrac, 0, 1, 0, true) * volinv[0][0]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = rhs[0].sum(c,true) * volinv[0][0]; - } - } - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from rhs component " << c << "\n"; - } - } - for (int alev = 0; alev < namrlevs; ++alev) { - for (int c = 0; c < ncomp; ++c) { - rhs[alev].plus(-offset[c], c, 1); - } -#ifdef AMREX_USE_EB - if (rhs[alev].hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - } -#endif + auto const& offset = linop.getSolvabilityOffset(0, 0, rhs[0]); + if (verbose >= 4) { + const int ncomp = offset.size(); + for (int c = 0; c < ncomp; ++c) { + amrex::Print() << "MLMG: Subtracting " << offset[c] << " from rhs component " + << c << "\n"; } } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(0, 0, rhs[0]); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " from rhs\n"; - } - for (int alev = 0; alev < namrlevs; ++alev) { - linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); - } + for (int alev = 0; alev < namrlevs; ++alev) { + linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); } } void -MLMG::makeSolvable (int amrlev, int mglev, MultiFab& mf) +MLMG::makeSolvable (int amrlev, int mglev, Any& mf) { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(mf, c, vfrac, 0, 1, 0, true) * volinv[amrlev][mglev]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = mf.sum(c,true) * volinv[amrlev][mglev]; - } - } - - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from mf component c = " << c << "\n"; - } - } - + auto const& offset = linop.getSolvabilityOffset(amrlev, mglev, mf); + if (verbose >= 4) { + const int ncomp = offset.size(); for (int c = 0; c < ncomp; ++c) { - mf.plus(-offset[c], c, 1); + amrex::Print() << "MLMG: Subtracting " << offset[c] + << " from mf component c = " << c + << " on level (" << amrlev << ", " << mglev << ")\n"; } -#ifdef AMREX_USE_EB - if (mf.hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(mf, 0, ncomp, val); - } -#endif - } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(amrlev, mglev, mf); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " on level (" << amrlev << ", " - << mglev << ")\n"; - } - linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } + linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithHypre (Any& a_x, const Any& a_b) { + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); + const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; @@ -1905,18 +1391,21 @@ MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) // For precision reasons we enforce that the average of the correction from hypre is 0 if (linop.isSingular(amrlev) && linop.getEnforceSingularSolvable()) { - makeSolvable(amrlev, mglev, x); + makeSolvable(amrlev, mglev, a_x); } } #endif void -MLMG::bottomSolveWithPETSc (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithPETSc (Any& a_x, const Any& a_b) { #if !defined(AMREX_USE_PETSC) - amrex::ignore_unused(x,b); + amrex::ignore_unused(a_x,a_b); amrex::Abort("bottomSolveWithPETSc is called without building with PETSc"); #else + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); const int ncomp = linop.getNComp(); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ncomp == 1, "bottomSolveWithPETSc doesn't work with ncomp > 1"); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index affe4c73eaf..50f20e22915 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -116,9 +116,11 @@ public : } virtual void getFluxes (const Vector& a_flux, const Vector& a_sol) const final override; - virtual void unimposeNeumannBC (int amrlev, MultiFab& rhs) const final override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; + virtual void unimposeNeumannBC (int amrlev, Any& rhs) const final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, MultiFab& /*sol*/, Location /*loc*/) const final override { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp index 79358b58898..c0efaed25d6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp @@ -150,13 +150,16 @@ MLNodeLaplacian::resizeMultiGrid (int new_size) } void -MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const +MLNodeLaplacian::unimposeNeumannBC (int amrlev, Any& a_rhs) const { if (m_coarsening_strategy == CoarseningStrategy::RAP) { const Box& nddom = amrex::surroundingNodes(Geom(amrlev).Domain()); const auto lobc = LoBC(); const auto hibc = HiBC(); + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); #ifdef AMREX_USE_OMP @@ -171,14 +174,17 @@ MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const } } -Real -MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); if (m_coarsening_strategy == CoarseningStrategy::RAP) { + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); if (mglev == 0 && factory && !factory->isAllRegular()) { @@ -229,7 +235,7 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } else #endif { @@ -279,16 +285,21 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } } else { - return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, rhs); + return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, a_rhs); } } void -MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const +MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& a_offset) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + Real offset = a_offset[0]; + if (m_coarsening_strategy == CoarseningStrategy::RAP) { #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index c46f4a250f2..1935be89f1d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -36,10 +36,6 @@ public: const Vector const*>& a_factory = {}, int a_eb_limit_coarsening = -1); - virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, - const MultiFab* = nullptr, const MultiFab* = nullptr, - const MultiFab* = nullptr) final override {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry=nullptr) const final override; @@ -59,20 +55,15 @@ public: amrex::Abort("AMReX_MLNodeLinOp::compGrad::How did we get here?"); } - virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const final override {} virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} - virtual void fillSolutionBC (int /*amrlev*/, MultiFab& /*sol*/, - const MultiFab* /*crse_bcdata*/=nullptr) final override { - amrex::Abort("AMReX_MLNodeLinOp::fillSolutionBC::How did we get here?"); - } - - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; - - virtual void prepareForSolve () override {} + virtual void prepareForSolve () override; virtual bool isSingular (int amrlev) const override { return (amrlev == 0) ? m_is_bottom_singular : false; } @@ -86,7 +77,7 @@ public: virtual void Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const = 0; virtual void Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rsh) const = 0; - virtual void nodalSync (int amrlev, int mglev, MultiFab& mf) const final override; + void nodalSync (int amrlev, int mglev, MultiFab& mf) const; virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const final override { amrex::Abort("MLNodeLinOp::makeNLinOp: N-Solve not supported"); @@ -102,6 +93,19 @@ public: // omask is either 0 or 1. 1 means the node is an unknown. 0 means it's known. void setOversetMask (int amrlev, const iMultiFab& a_omask); + virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int, Any&, Any const&) const final override { } + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& nghost) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypreNodeLap( int bottom_verbose, @@ -139,6 +143,8 @@ protected: MultiFab m_bottom_dot_mask; MultiFab m_coarse_dot_mask; + Vector > m_norm_fine_mask; + #ifdef AMREX_USE_EB CoarseningStrategy m_coarsening_strategy = CoarseningStrategy::RAP; #else diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index baf0f5edb42..b5173b71f5f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifdef AMREX_USE_OMP @@ -83,6 +84,22 @@ MLNodeLinOp::define (const Vector& a_geom, m_has_fine_bndry[amrlev] = std::make_unique >(m_grids[amrlev][0], m_dmap[amrlev][0]); } + + m_norm_fine_mask.resize(m_num_amr_levels-1); + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(amrex::convert(m_grids[amrlev][0], IntVect(1)), m_dmap[amrlev][0], + amrex::convert(m_grids[amrlev+1][0], IntVect(1)), + IntVect(m_amr_ref_ratio[amrlev]), 1, 0)); + } +} + +void +MLNodeLinOp::prepareForSolve () +{ + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + fixUpResidualMask(amrlev, *m_norm_fine_mask[amrlev]); + } } std::unique_ptr @@ -177,17 +194,16 @@ MLNodeLinOp::xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, return result; } -void -MLNodeLinOp::applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const -{ -} - -Real -MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); + + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask; const auto& mask_ma = mask.const_arrays(); const auto& rhs_ma = rhs.const_arrays(); @@ -203,13 +219,16 @@ MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) c Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } void -MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& rhs, Real offset) const +MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& a_rhs, + Vector const& offset) const { - rhs.plus(-offset, 0, 1); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + rhs.plus(-offset[0], 0, 1); } namespace { @@ -448,6 +467,119 @@ MLNodeLinOp::resizeMultiGrid (int new_size) MLLinOp::resizeMultiGrid(new_size); } +Real +MLNodeLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + return MFNormInf(mf, fine_mask, local); +} + +void +MLNodeLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& nghost) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + + AMREX_ALWAYS_ASSERT(refratio == 2 || refratio == 4); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box fbx = mfi.tilebox(); + fbx.grow(nghost); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = crse.const_array(mfi); + + if (refratio == 2) { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } else { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); + }); + } + } +} + +void +MLNodeLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + const int ncomp = getNComp(); + const int finest_amr_lev = NAMRLevels() - 1; + + nodalSync(finest_amr_lev, 0, sol[finest_amr_lev].get()); + + for (int falev = finest_amr_lev; falev > 0; --falev) + { + const auto& fmf = sol[falev ].get(); + auto& cmf = sol[falev-1].get(); + + auto rr = AMRRefRatio(falev-1); + MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), rr), fmf.DistributionMap(), ncomp, 0); + amrex::average_down(fmf, tmpmf, 0, ncomp, rr); + cmf.ParallelCopy(tmpmf, 0, 0, ncomp); + nodalSync(falev-1, 0, cmf); + } +} + +void +MLNodeLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + AMREX_ALWAYS_ASSERT(refratio == 2); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, 0); + cfine.ParallelCopy(crse, 0, 0, ncomp, 0, 0, crse_geom.periodicity()); + cmf = & cfine; + } + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& fbx = mfi.tilebox(); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = cmf->const_array(mfi); + + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } +} + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) std::unique_ptr MLNodeLinOp::makeHypreNodeLap (int bottom_verbose, const std::string& options_namespace) const From 6eaab8c1c9f0e2a21531526dfd170ebe3aad507b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 3 Aug 2022 13:39:44 -0700 Subject: [PATCH 037/111] MPMD Support (#2895) Add support for multiple programs multiple data (MPMD). For now, we assume there are only two programs (i.e., executables) in the MPMD mode. During the initialization, MPI_COMM_WORLD is split into two communicators. The MPMD::Copier class can be used to copy FabArray/MultiFab data between two programs. This new capability can be used by FHDeX to couple FHD with SPPARKS. --- Src/Base/AMReX_BLBackTrace.cpp | 13 +- Src/Base/AMReX_BoxList.H | 4 +- Src/Base/AMReX_MPMD.H | 178 ++++++++++++++++++++++++++ Src/Base/AMReX_MPMD.cpp | 225 +++++++++++++++++++++++++++++++++ Src/Base/CMakeLists.txt | 8 +- Src/Base/Make.package | 4 + 6 files changed, 427 insertions(+), 5 deletions(-) create mode 100644 Src/Base/AMReX_MPMD.H create mode 100644 Src/Base/AMReX_MPMD.cpp diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp index 477e0b6bac2..0c304d30011 100644 --- a/Src/Base/AMReX_BLBackTrace.cpp +++ b/Src/Base/AMReX_BLBackTrace.cpp @@ -5,6 +5,9 @@ #include #include #include +#ifdef AMREX_USE_MPI +#include +#endif #ifdef AMREX_TINY_PROFILING #include @@ -71,7 +74,15 @@ BLBackTrace::handler(int s) std::string errfilename; { std::ostringstream ss; - ss << "Backtrace." << ParallelDescriptor::MyProc(); +#ifdef AMREX_USE_MPI + if (MPMD::Initialized()) { + ss << "Backtrace.prog" << MPMD::MyProgId() << "."; + } else +#endif + { + ss << "Backtrace."; + } + ss << ParallelDescriptor::MyProc(); #ifdef AMREX_USE_OMP ss << "." << omp_get_thread_num(); #endif diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H index 04e93eab97e..1dc8f15c536 100644 --- a/Src/Base/AMReX_BoxList.H +++ b/Src/Base/AMReX_BoxList.H @@ -206,9 +206,9 @@ public: BoxList& convert (IndexType typ) noexcept; //! Returns a reference to the Vector. - Vector& data() noexcept { return m_lbox; } + Vector& data () noexcept { return m_lbox; } //! Returns a constant reference to the Vector. - const Vector& data() const noexcept { return m_lbox; } + const Vector& data () const noexcept { return m_lbox; } void swap (BoxList& rhs) { std::swap(m_lbox, rhs.m_lbox); diff --git a/Src/Base/AMReX_MPMD.H b/Src/Base/AMReX_MPMD.H new file mode 100644 index 00000000000..2b8ef399866 --- /dev/null +++ b/Src/Base/AMReX_MPMD.H @@ -0,0 +1,178 @@ +#ifndef AMREX_MPMD_H_ +#define AMREX_MPMD_H_ +#include + +#ifdef AMREX_USE_MPI + +#include + +#include + +namespace amrex { namespace MPMD { + +MPI_Comm Initialize (int argc, char* argv[]); + +void Finalize (); + +bool Initialized (); + +int MyProc (); //! Process ID in MPI_COMM_WORLD +int NProcs (); //! Number of processes in MPI_COMM_WORLD +int MyProgId (); //! Program ID + +class Copier +{ +public: + Copier (BoxArray const& ba, DistributionMapping const& dm); + + template + void send (FabArray const& fa, int icomp, int ncomp) const; + + template + void recv (FabArray& fa, int icomp, int ncomp) const; + +private: + std::map m_SndTags; + std::map m_RcvTags; +}; + +template +void Copier::send (FabArray const& mf, int icomp, int ncomp) const +{ + const int N_snds = m_SndTags.size(); + + if (N_snds == 0) return; + + // Prepare buffer + + Vector send_data; + Vector send_size; + Vector send_rank; + Vector send_reqs; + Vector send_cctc; + + Vector offset; + std::size_t total_volume = 0; + for (auto const& kv : m_SndTags) { + auto const& cctc = kv.second; + + std::size_t nbytes = 0; + for (auto const& cct : cctc) { + nbytes += cct.sbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned + + // Also need to align the offset properly + total_volume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), total_volume); + + offset.push_back(total_volume); + total_volume += nbytes; + + send_data.push_back(nullptr); + send_size.push_back(nbytes); + send_rank.push_back(kv.first); + send_reqs.push_back(MPI_REQUEST_NULL); + send_cctc.push_back(&cctc); + } + + Gpu::PinnedVector send_buffer(total_volume); + char* the_send_data = send_buffer.data(); + for (int i = 0; i < N_snds; ++i) { + send_data[i] = the_send_data + offset[i]; + } + + // Pack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.pack_send_buffer_gpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } else +#endif + { + mf.pack_send_buffer_cpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } + + // Send + for (int i = 0; i < N_snds; ++i) { + send_reqs[i] = ParallelDescriptor::Asend + (send_data[i], send_size[i], send_rank[i], 100, MPI_COMM_WORLD).req(); + } + Vector stats(N_snds); + ParallelDescriptor::Waitall(send_reqs, stats); +} + +template +void Copier::recv (FabArray& mf, int icomp, int ncomp) const +{ + const int N_rcvs = m_RcvTags.size(); + + if (N_rcvs == 0) return; + + // Prepare buffer + + Vector recv_data; + Vector recv_size; + Vector recv_from; + Vector recv_reqs; + + Vector offset; + std::size_t TotalRcvsVolume = 0; + for (auto const& kv : m_RcvTags) { + std::size_t nbytes = 0; + for (auto const& cct : kv.second) { + nbytes += cct.dbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned + + // Also need to align the offset properly + TotalRcvsVolume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), TotalRcvsVolume); + + offset.push_back(TotalRcvsVolume); + TotalRcvsVolume += nbytes; + + recv_data.push_back(nullptr); + recv_size.push_back(nbytes); + recv_from.push_back(kv.first); + recv_reqs.push_back(MPI_REQUEST_NULL); + } + + Gpu::PinnedVector recv_buffer(TotalRcvsVolume); + char* the_recv_data = recv_buffer.data(); + + // Recv + for (int i = 0; i < N_rcvs; ++i) { + recv_data[i] = the_recv_data + offset[i]; + recv_reqs[i] = ParallelDescriptor::Arecv + (recv_data[i], recv_size[i], recv_from[i], 100, MPI_COMM_WORLD).req(); + } + + Vector recv_cctc(N_rcvs, nullptr); + for (int i = 0; i < N_rcvs; ++i) { + recv_cctc[i] = &(m_RcvTags.at(recv_from[i])); + } + + Vector stats(N_rcvs); + ParallelDescriptor::Waitall(recv_reqs, stats); + + // Unpack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.unpack_recv_buffer_gpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } else +#endif + { + mf.unpack_recv_buffer_cpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } +} + +}} + +#endif +#endif diff --git a/Src/Base/AMReX_MPMD.cpp b/Src/Base/AMReX_MPMD.cpp new file mode 100644 index 00000000000..917c741c2a6 --- /dev/null +++ b/Src/Base/AMReX_MPMD.cpp @@ -0,0 +1,225 @@ +#include +#include + +#include +#include +#include +#include +#include + +#ifdef AMREX_USE_MPI + +namespace amrex { namespace MPMD { + +namespace { + bool initialized = false; + bool mpi_initialized_by_us = false; + MPI_Comm app_comm = MPI_COMM_NULL; + int myproc; + int nprocs; +} + +namespace { + +template +int num_unique_elements (std::vector& v) +{ + std::sort(v.begin(), v.end()); + auto last = std::unique(v.begin(), v.end()); + return last - v.begin(); +} + +} + +MPI_Comm Initialize (int argc, char* argv[]) +{ + initialized = true; + int flag; + MPI_Initialized(&flag); + if (!flag) { + MPI_Init(&argc, &argv); + mpi_initialized_by_us = true; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int* p; + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_APPNUM, &p, &flag); + int appnum = *p; + + std::vector all_appnum(nprocs); + MPI_Allgather(&appnum, 1, MPI_INT, all_appnum.data(), 1, MPI_INT, MPI_COMM_WORLD); + int napps = num_unique_elements(all_appnum); + + // MPI_APPNUM does not appear to work with slurm on some systems. + if (napps != 2) { + std::vector all_argc(nprocs); + MPI_Allgather(&argc, 1, MPI_INT, all_argc.data(), 1, MPI_INT, MPI_COMM_WORLD); + napps = num_unique_elements(all_argc); + if (napps == 2) { + appnum = static_cast(argc != all_argc[0]); + } + } + + if (napps != 2) { + std::string exename; + if (argc > 0) { + exename = std::string(argv[0]); + } + unsigned long long hexe = std::hash{}(exename); + std::vector all_hexe(nprocs); + MPI_Allgather(&hexe, 1, MPI_UNSIGNED_LONG_LONG, + all_hexe.data(), 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD); + napps = num_unique_elements(all_hexe); + if (napps == 2) { + appnum = static_cast(hexe != all_hexe[0]); + } + } + + if (napps == 2) { + MPI_Comm_split(MPI_COMM_WORLD, appnum, myproc, &app_comm); + } else { + std::cout << "amrex::MPMD only supports two programs." << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + return app_comm; +} + +void Finalize () +{ + MPI_Comm_free(&app_comm); + if (mpi_initialized_by_us) { + MPI_Finalize(); + mpi_initialized_by_us = false; + } + initialized = false; +} + +bool Initialized () { return initialized; } + +int MyProc () +{ + return myproc; +} + +int NProcs () +{ + return nprocs; +} + +int MyProgId () +{ + return (myproc == ParallelDescriptor::MyProc()) ? 0 : 1; +} + +Copier::Copier (BoxArray const& ba, DistributionMapping const& dm) +{ + int rank_offset = myproc - ParallelDescriptor::MyProc(); + int this_root, other_root; + if (rank_offset == 0) { // First program + this_root = 0; + other_root = ParallelDescriptor::NProcs(); + } else { + this_root = rank_offset; + other_root = 0; + } + + Vector bv = ba.boxList().data(); + + int this_nboxes = ba.size(); + Vector procs = dm.ProcessorMap(); + if (rank_offset != 0) { + for (int i = 0; i < this_nboxes; ++i) { + procs[i] += rank_offset; + } + } + + Vector obv; + Vector oprocs; + int other_nboxes; + if (myproc == this_root) { + if (rank_offset == 0) // the first program + { + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD); + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + obv.resize(other_nboxes); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + oprocs.resize(other_nboxes); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + else // the second program + { + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD); + obv.resize(other_nboxes); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD); + oprocs.resize(other_nboxes); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD); + } + } + + ParallelDescriptor::Bcast(&other_nboxes, 1); + if (obv.empty()) { + obv.resize(other_nboxes); + oprocs.resize(other_nboxes); + } + ParallelDescriptor::Bcast(obv.data(), obv.size()); + ParallelDescriptor::Bcast(oprocs.data(), oprocs.size()); + + BoxArray oba(BoxList(std::move(obv))); + + // At this point, ba and bv hold our boxes, and oba holds the other + // program's boxes. procs holds mpi ranks of our boxes, and oprocs holds + // mpi ranks of the other program's boxes. All mpi ranks are in + // MPI_COMM_WORLD. + + // Build communication meta-data + + AMREX_ALWAYS_ASSERT(ba.ixType().cellCentered()); + + std::vector > isects; + + for (int i = 0; i < this_nboxes; ++i) { + if (procs[i] == myproc) { + oba.intersections(bv[i], isects); + for (auto const& isec : isects) { + const int oi = isec.first; + const Box& bx = isec.second; + const int orank = oprocs[oi]; + m_SndTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, oi, i)); + m_RcvTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, i, oi)); + } + } + } + + for (auto& kv : m_SndTags) { + std::sort(kv.second.begin(), kv.second.end()); + } + for (auto& kv : m_RcvTags) { + std::sort(kv.second.begin(), kv.second.end()); + } +} + +}} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 6a2db4526cd..c47fdcae706 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -71,6 +71,7 @@ target_sources( amrex AMReX_DataAllocator.H AMReX_BLProfiler.H AMReX_BLBackTrace.H + AMReX_BLBackTrace.cpp AMReX_BLFort.H AMReX_NFiles.H AMReX_NFiles.cpp @@ -231,8 +232,6 @@ target_sources( amrex # Memory pool ------------------------------------------------------------- AMReX_MemPool.cpp AMReX_MemPool.H - # Profiling --------------------------------------------------------------- - AMReX_BLBackTrace.cpp # Parser --------------------------------------------------------------- Parser/AMReX_Parser.cpp Parser/AMReX_Parser.H @@ -305,3 +304,8 @@ endif () if (AMReX_TINY_PROFILE) target_sources(amrex PRIVATE AMReX_TinyProfiler.cpp AMReX_TinyProfiler.H ) endif () + +# MPMD +if (AMReX_MPI) + target_sources(amrex PRIVATE AMReX_MPMD.cpp AMReX_MPMD.H ) +endif () diff --git a/Src/Base/Make.package b/Src/Base/Make.package index d7c4e520e7b..79085ae70a1 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -271,6 +271,10 @@ CEXE_sources += AMReX_Machine.cpp # Forward declaration CEXE_headers += AMReX_BaseFwd.H +ifeq ($(USE_MPI),TRUE) + CEXE_headers += AMReX_MPMD.H + CEXE_sources += AMReX_MPMD.cpp +endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base From 9ed4f5955b1d5d0e400fd2f233e5e7b83db4e41b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 3 Aug 2022 16:53:20 -0700 Subject: [PATCH 038/111] Fix a new bug introduced in #2858 (#2901) We need to take into account that `amrex::Any` stores `MultiFab&` or `MultiFab const&`. --- Src/Base/AMReX_Any.H | 18 ++++++++++++++++-- Src/LinearSolvers/MLMG/AMReX_MLMG.cpp | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_Any.H b/Src/Base/AMReX_Any.H index 31c824825a4..2c7d9688d36 100644 --- a/Src/Base/AMReX_Any.H +++ b/Src/Base/AMReX_Any.H @@ -48,11 +48,25 @@ public: //! Returns a reference to the contained object. template - MF& get () { return dynamic_cast&>(*m_ptr).m_mf; } + MF& get () { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else { + return dynamic_cast&>(*m_ptr).m_mf; + } + } //! Returns a const reference to the contained object. template - MF const& get () const { return dynamic_cast const&>(*m_ptr).m_mf; } + MF const& get () const { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else if (auto p1 = dynamic_cast*>(m_ptr.get())) { + return p1->m_mf; + } else { + return dynamic_cast const&>(*m_ptr).m_mf; + } + } template bool is () const { return m_ptr->Type() == typeid(MF); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index a1e897e85ba..0e1762ae3fb 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -1152,7 +1152,7 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s { if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol) { - sol[alev] = linop.AnyMakeAlias(a_sol[alev]); + sol[alev] = linop.AnyMakeAlias(*a_sol[alev]); sol_is_alias[alev] = true; } else From ed23627d6487306e26b37ed9a97d60fd8148a935 Mon Sep 17 00:00:00 2001 From: Yadong_Zeng <30739800+ruohai0925@users.noreply.github.com> Date: Thu, 4 Aug 2022 16:32:21 -0400 Subject: [PATCH 039/111] change data types from double to amrex::Real, and thus we can use single precision for the hypre IJ interface (#2896) Co-authored-by: yzeng --- Src/Extern/HYPRE/AMReX_HypreIJIface.H | 6 +++--- Src/Extern/HYPRE/AMReX_HypreIJIface.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.H b/Src/Extern/HYPRE/AMReX_HypreIJIface.H index 6d0dbacd95f..2ac96748b24 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.H +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.H @@ -93,11 +93,11 @@ private: HypreIntType (*m_precondSolvePtr)( HYPRE_Solver, HYPRE_ParCSRMatrix, HYPRE_ParVector, HYPRE_ParVector){nullptr}; - HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, double){nullptr}; - HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, double){nullptr}; + HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; + HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; HypreIntType (*m_solverSetMaxIterPtr)(HYPRE_Solver, HypreIntType){nullptr}; HypreIntType (*m_solverNumItersPtr)(HYPRE_Solver, HypreIntType*){nullptr}; - HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, double*){nullptr}; + HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, amrex::Real*){nullptr}; HypreIntType m_ilower{0}; HypreIntType m_iupper{0}; diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp index 9e7a42dbb5b..c2e4f126252 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp @@ -275,7 +275,7 @@ void HypreIJIface::boomeramg_precond_configure (const std::string& prefix) if (hpp.pp.contains("bamg_non_galerkin_level_tols")) { std::vector levels; - std::vector tols; + std::vector tols; hpp.pp.getarr("bamg_non_galerkin_level_levels", levels); hpp.pp.getarr("bamg_non_galerkin_level_tols", tols); From 6ebf8ffc2689e23ff2686627e660caf0a10ea315 Mon Sep 17 00:00:00 2001 From: Jon Rood Date: Thu, 4 Aug 2022 14:32:59 -0600 Subject: [PATCH 040/111] Add rpath to lib64 for ZFP. (#2902) --- Tools/GNUMake/packages/Make.hdf5 | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/GNUMake/packages/Make.hdf5 b/Tools/GNUMake/packages/Make.hdf5 index 35e2ff3e404..9d54463ce4e 100644 --- a/Tools/GNUMake/packages/Make.hdf5 +++ b/Tools/GNUMake/packages/Make.hdf5 @@ -29,6 +29,7 @@ ifeq ($(USE_HDF5_ZFP),TRUE) INCLUDE_LOCATIONS += $(ZFP_ABSPATH)/include $(H5Z_ABSPATH)/include LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(ZFP_ABSPATH)/lib64 $(H5Z_ABSPATH)/lib LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib + LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib64 endif endif endif From 103db6ebe2b570910ac4dbd7d6611e59d80f1a0b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 5 Aug 2022 15:25:33 -0700 Subject: [PATCH 041/111] EB: Add Fine Levels (#2881) Add a new function EB2::addFineLevels() that can be used to add more fine levels to the existing EB IndexSpace without changing the coarse levels. This is useful for restarting with a larger amr.max_level. The issue is we build EB at the finest level first and then coarsen it to the coarse levels. If the restart run has a different finest level, the EB on the coarse levels could be different without using this new capability. --- Src/EB/AMReX_EB2.H | 10 ++++++++- Src/EB/AMReX_EB2.cpp | 9 ++++++++ Src/EB/AMReX_EB2_IndexSpaceI.H | 32 +++++++++++++++++++++++++++-- Src/EB/AMReX_EB2_IndexSpace_STL.H | 1 + Src/EB/AMReX_EB2_IndexSpace_STL.cpp | 6 ++++++ Tests/EB/CNS/Source/main.cpp | 8 +++++++- 6 files changed, 62 insertions(+), 4 deletions(-) diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index ff897276510..6a143bf2a9c 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -49,6 +49,7 @@ public: virtual const Level& getLevel (const Geometry & geom) const = 0; virtual const Geometry& getGeometry (const Box& domain) const = 0; virtual const Box& coarsestDomain () const = 0; + virtual void addFineLevels (int num_new_fine_levels) = 0; protected: static AMREX_EXPORT Vector > m_instance; @@ -80,16 +81,21 @@ public: virtual const Box& coarsestDomain () const final { return m_geom.back().Domain(); } + virtual void addFineLevels (int num_new_fine_levels) final; using F = typename G::FunctionType; private: + G m_gshop; + bool m_build_coarse_level_by_coarsening; + bool m_extend_domain_face; + int m_num_coarsen_opt; + Vector > m_gslevel; Vector m_geom; Vector m_domain; Vector m_ngrow; - std::unique_ptr m_impfunc; }; #include @@ -125,6 +131,8 @@ void Build (const Geometry& geom, int maxCoarseningLevel (const Geometry& geom); int maxCoarseningLevel (IndexSpace const* ebis, const Geometry& geom); +void addFineLevels (int num_new_fine_levels); + }} #endif diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 16f683cb019..4f2ad5bf873 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -230,6 +230,15 @@ Build (const Geometry& geom, int required_coarsening_level, } } +void addFineLevels (int num_new_fine_levels) +{ + BL_PROFILE("EB2::addFineLevels()"); + auto p = const_cast(TopIndexSpace()); + if (p) { + p->addFineLevels(num_new_fine_levels); + } +} + namespace { static int comp_max_crse_level (Box cdomain, const Box& domain) { diff --git a/Src/EB/AMReX_EB2_IndexSpaceI.H b/Src/EB/AMReX_EB2_IndexSpaceI.H index cdad6b31f6f..e7db810b03b 100644 --- a/Src/EB/AMReX_EB2_IndexSpaceI.H +++ b/Src/EB/AMReX_EB2_IndexSpaceI.H @@ -5,6 +5,10 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, bool extend_domain_face, int num_coarsen_opt) + : m_gshop(gshop), + m_build_coarse_level_by_coarsening(build_coarse_level_by_coarsening), + m_extend_domain_face(extend_domain_face), + m_num_coarsen_opt(num_coarsen_opt) { // build finest level (i.e., level 0) first AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); @@ -56,8 +60,6 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(cdomain); m_ngrow.push_back(ng); } - - m_impfunc = std::make_unique(gshop.GetImpFunc()); } @@ -78,3 +80,29 @@ IndexSpaceImp::getGeometry (const Box& dom) const int i = std::distance(m_domain.begin(), it); return m_geom[i]; } + +template +void +IndexSpaceImp::addFineLevels (int num_new_fine_levels) +{ + if (num_new_fine_levels <= 0) { return; } + + if (m_num_coarsen_opt > 0) { + m_num_coarsen_opt += num_new_fine_levels; + } + + IndexSpaceImp fine_isp(m_gshop, amrex::refine(m_geom[0], 1< Date: Mon, 8 Aug 2022 14:17:57 -0400 Subject: [PATCH 042/111] Clear the boundary particle indices' container before updating it. (#2907) This avoids potential segmentation faults when one grid's particles all move to other grids. --- Src/Particle/AMReX_NeighborParticlesI.H | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index bcdaeebdbbf..33cc47d0d5e 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -821,6 +821,11 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) for (int lev = 0; lev < this->numLevels(); ++lev) { + // clear previous neighbor particle ids + for (auto& keyval: m_boundary_particle_ids[lev]) { + keyval.second.clear(); + } + for (MyParIter pti(*this, lev); pti.isValid(); ++pti) { PairIndex index(pti.index(), pti.LocalTileIndex()); From 3f715d29c94b473e624aa9ff3fea9b502da25f97 Mon Sep 17 00:00:00 2001 From: Candace Gilet Date: Mon, 8 Aug 2022 14:40:28 -0400 Subject: [PATCH 043/111] In MLMG::mgFcycle, assert that for EB the linop is cell-centered. (#2905) --- Src/LinearSolvers/MLMG/AMReX_MLMG.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index 0e1762ae3fb..a4ab5762777 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -522,6 +522,10 @@ MLMG::mgFcycle () { BL_PROFILE("MLMG::mgFcycle()"); +#ifdef AMREX_USE_EB + AMREX_ASSERT(linop.isCellCentered()); +#endif + const int amrlev = 0; const int mg_bottom_lev = linop.NMGLevels(amrlev) - 1; IntVect nghost(0); From 1bda173b489024d5f4ec79627f3f612c350e521f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 10 Aug 2022 15:46:43 -0600 Subject: [PATCH 044/111] Add: `MultiFab::sum_unique` (#2909) This provides a new method to sum values in a `MultiFab`. For non-cell-centered data, `MultiFab::sum` double counts box boundary values that are owned by multiple boxes. This provides a function that does not double count these and provides a quick way to get only the sum of physically unique values. Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_MultiFab.H | 7 +++++ Src/Base/AMReX_MultiFab.cpp | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index dfb75dacbf9..44c76d476f6 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -232,6 +232,13 @@ public: */ Real sum (int comp = 0, bool local = false) const; /** + * \brief Same as sum with local=false, but for non-cell-centered data, this + * skips non-unique points that are owned by multiple boxes. + */ + Real sum_unique (int comp = 0, + bool local = false, + const Periodicity& period = Periodicity::NonPeriodic()) const; + /** * \brief Adds the scalar value val to the value of each cell in the * specified subregion of the MultiFab. The subregion consists * of the num_comp components starting at component comp. diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp index 9e2f37adf37..83664b307d4 100644 --- a/Src/Base/AMReX_MultiFab.cpp +++ b/Src/Base/AMReX_MultiFab.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef AMREX_MEM_PROFILING #include @@ -1586,6 +1587,58 @@ MultiFab::sum (int comp, bool local) const return sm; } +Real +MultiFab::sum_unique (int comp, + bool local, + const Periodicity& period) const +{ + BL_PROFILE("MultiFab::sum_unique()"); + + // no duplicatly distributed points if cell centered + if (ixType().cellCentered()) + return this->sum(comp, local); + + // Owner is the grid with the lowest grid number containing the data + std::unique_ptr owner_mask = OwnerMask(period); + + Real sm = Real(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = this->const_arrays(); + auto const& msk = owner_mask->const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, *this, IntVect(0), + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> GpuTuple + { + return msk[box_no](i,j,k) ? ma[box_no](i,j,k,comp) : 0.0_rt; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& a = this->const_array(mfi); + Array4 const& msk = owner_mask->const_array(mfi); + Real tmp = 0.0_rt; + AMREX_LOOP_3D(bx, i, j, k, + { + tmp += msk(i,j,k) ? a(i,j,k,comp) : 0.0_rt; + }); + sm += tmp; // Do it this way so that it does not break regression tests. + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + void MultiFab::minus (const MultiFab& mf, int strt_comp, int num_comp, int nghost) { From d295f2299101705f7c470c813b80542296087328 Mon Sep 17 00:00:00 2001 From: Nuno Miguel Nobre Date: Thu, 11 Aug 2022 03:40:09 +0100 Subject: [PATCH 045/111] [SYCL] Remove amrex::oneapi and update deprecated device descriptors (#2910) * Remove amrex::oneapi in favour of standard features * Change deprecated device descriptors --- Src/Base/AMReX_GpuDevice.cpp | 4 ++-- Src/Base/AMReX_GpuQualifiers.H | 4 ---- Src/Base/AMReX_GpuReduce.H | 8 ++++---- Src/Base/AMReX_Scan.H | 22 +++++++++++----------- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index c0e9b3e6785..e0ab64b76e3 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -463,8 +463,8 @@ Device::initialize_gpu () device_prop.warpSize = warp_size; auto sgss = d.get_info(); device_prop.maxMemAllocSize = d.get_info(); - device_prop.managedMemory = d.get_info(); - device_prop.concurrentManagedAccess = d.get_info(); + device_prop.managedMemory = d.has(sycl::aspect::usm_host_allocations); + device_prop.concurrentManagedAccess = d.has(sycl::aspect::usm_shared_allocations); device_prop.maxParameterSize = d.get_info(); { amrex::Print() << "Device Properties:\n" diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index ce07a3e52c2..b5d5ea58fbd 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -41,10 +41,6 @@ # include -namespace amrex { - namespace oneapi = sycl::ext::oneapi; -} - # define AMREX_REQUIRE_SUBGROUP_SIZE(x) \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic ignored \"-Wattributes\"") \ diff --git a/Src/Base/AMReX_GpuReduce.H b/Src/Base/AMReX_GpuReduce.H index 3907ca385f6..7b9b0e42355 100644 --- a/Src/Base/AMReX_GpuReduce.H +++ b/Src/Base/AMReX_GpuReduce.H @@ -55,10 +55,10 @@ template struct warpReduce { AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T operator() (T x, amrex::oneapi::sub_group const& sg) const noexcept + T operator() (T x, sycl::sub_group const& sg) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - T y = sg.shuffle_down(x, offset); + T y = sycl::shift_group_left(sg, x, offset); x = F()(x,y); } return x; @@ -71,7 +71,7 @@ T blockReduce (T x, WARPREDUCE && warp_reduce, T x0, Gpu::Handler const& h) { T* shared = (T*)h.local; int tid = h.item->get_local_id(0); - amrex::oneapi::sub_group const& sg = h.item->get_sub_group(); + sycl::sub_group const& sg = h.item->get_sub_group(); int lane = sg.get_local_id()[0]; int wid = sg.get_group_id()[0]; int numwarps = sg.get_group_range()[0]; @@ -94,7 +94,7 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void blockReduce_partial (T* dest, T x, WARPREDUCE && warp_reduce, ATOMICOP && atomic_op, Gpu::Handler const& handler) { - amrex::oneapi::sub_group const& sg = handler.item->get_sub_group(); + sycl::sub_group const& sg = handler.item->get_sub_group(); int wid = sg.get_group_id()[0]; if ((wid+1)*warpSize <= handler.numActiveThreads) { x = warp_reduce(x, sg); // full warp diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H index 96aefb870b6..3dc5cb98f9a 100644 --- a/Src/Base/AMReX_Scan.H +++ b/Src/Base/AMReX_Scan.H @@ -197,7 +197,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -226,7 +226,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -244,7 +244,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -277,7 +277,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(1, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -293,7 +293,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = (offset < nblocks) ? blocksum_p[offset] : 0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -311,7 +311,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -417,7 +417,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -472,7 +472,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -490,7 +490,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -543,7 +543,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum // implement our own __ballot unsigned status_bf = (stva.status == 'p') ? (0x1u << lane) : 0; for (int i = 1; i < Gpu::Device::warp_size; i *= 2) { - status_bf |= sg.shuffle_xor(status_bf, i); + status_bf |= sycl::permute_group_by_xor(sg, status_bf, i); } bool stop_lookback = status_bf & 0x1u; @@ -563,7 +563,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum } for (int i = Gpu::Device::warp_size/2; i > 0; i /= 2) { - x += sg.shuffle_down(x,i); + x += sycl::shift_group_left(sg, x,i); } } From 659351846da6f930b4f04cc6cd6b9f78e7752e8a Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 11 Aug 2022 15:24:16 -0700 Subject: [PATCH 046/111] Use 1 atomic instead of two per item in DenseBins::build (#2911) --- Src/Particle/AMReX_DenseBins.H | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H index 93c9415ad25..0f1e94bb176 100644 --- a/Src/Particle/AMReX_DenseBins.H +++ b/Src/Particle/AMReX_DenseBins.H @@ -200,6 +200,7 @@ public: m_bins.resize(nitems); m_perm.resize(nitems); + m_local_offsets.resize(nitems); m_counts.resize(0); m_counts.resize(nbins+1, 0); @@ -209,21 +210,21 @@ public: index_type* pbins = m_bins.dataPtr(); index_type* pcount = m_counts.dataPtr(); + index_type* plocal_offsets = m_local_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { pbins[i] = f(v[i]); - Gpu::Atomic::AddNoRet(&pcount[pbins[i]], index_type{ 1 }); + index_type off = Gpu::Atomic::Add(&pcount[pbins[i]], index_type{ 1 }); + plocal_offsets[i] = off; }); Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin()); - Gpu::copyAsync(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin()); - index_type* pperm = m_perm.dataPtr(); - constexpr index_type max_index = std::numeric_limits::max(); + index_type* poffsets = m_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { - index_type index = Gpu::Atomic::Inc(&pcount[pbins[i]], max_index); + index_type index = poffsets[pbins[i]] + plocal_offsets[i]; pperm[index] = i; }); @@ -503,6 +504,7 @@ private: Gpu::DeviceVector m_bins; Gpu::DeviceVector m_counts; + Gpu::DeviceVector m_local_offsets; Gpu::DeviceVector m_offsets; Gpu::DeviceVector m_perm; }; From 4f639294606d47185d31eaee4af66fc6b590e5a2 Mon Sep 17 00:00:00 2001 From: asalmgren Date: Sat, 13 Aug 2022 09:00:02 -0700 Subject: [PATCH 047/111] enable LinOp to use the right Factory (fixes moving geometry problem) (#2916) --- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index a006976dc08..c8bea8dd2d2 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -34,7 +34,8 @@ MLEBABecLap::MLEBABecLap (const Vector& a_geom, std::unique_ptr > MLEBABecLap::makeFactory (int amrlev, int mglev) const { - return makeEBFabFactory(m_geom[amrlev][mglev], + return makeEBFabFactory(static_cast(Factory(0,0))->getEBIndexSpace(), + m_geom[amrlev][mglev], m_grids[amrlev][mglev], m_dmap[amrlev][mglev], {1,1,1}, EBSupport::full); From bd5f6a9f6a1a3a66c51eefd7950432d3bf3319a1 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 15 Aug 2022 14:24:21 -0700 Subject: [PATCH 048/111] Export GpuDevice Globals (#2918) * Export GpuDevice Globals Implement symbol export via `AMREX_EXPORT` for the global variables in `Src/Base/AMReX_GpuDevice.H`. Follow-up to #1847 #1847 Fix #2917 * Fix: Export `AMReX::m_instance` --- Src/Base/AMReX.H | 2 +- Src/Base/AMReX_GpuDevice.H | 33 +++++++++++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index e02280f3e3b..91f8fc43b7c 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -271,7 +271,7 @@ namespace amrex private: - static std::vector > m_instance; + static AMREX_EXPORT std::vector > m_instance; Geometry* m_geom = nullptr; }; diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H index 8a327704a1d..a61ab4fe406 100644 --- a/Src/Base/AMReX_GpuDevice.H +++ b/Src/Base/AMReX_GpuDevice.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -148,9 +149,9 @@ public: // definition: https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/clang/lib/Basic/Targets/AMDGPU.cpp#L400 // overview wavefront size: https://github.com/llvm/llvm-project/blob/efc063b621ea0c4d1e452bcade62f7fc7e1cc937/clang/test/Driver/amdgpu-macros.cl#L70-L115 // gfx10XX has 32 threads per wavefront else 64 - static constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; + static AMREX_EXPORT constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; # else - static constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); + static AMREX_EXPORT constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); # endif static unsigned int maxBlocksPerLaunch () noexcept { return max_blocks_per_launch; } @@ -166,28 +167,28 @@ private: static void initialize_gpu (); - static int device_id; - static int num_devices_used; - static int verbose; - static int max_gpu_streams; + static AMREX_EXPORT int device_id; + static AMREX_EXPORT int num_devices_used; + static AMREX_EXPORT int verbose; + static AMREX_EXPORT int max_gpu_streams; #ifdef AMREX_USE_GPU - static dim3 numThreadsMin; - static dim3 numBlocksOverride, numThreadsOverride; + static AMREX_EXPORT dim3 numThreadsMin; + static AMREX_EXPORT dim3 numBlocksOverride, numThreadsOverride; // We build gpu_default_stream and gpu_stream_pool. // The non-owning gpu_stream is used to store the current stream that will be used. // gpu_stream is a vector so that it's thread safe to write to it. - static gpuStream_t gpu_default_stream; - static Vector gpu_stream_pool; // The size of this is max_gpu_stream - static Vector gpu_stream; // The size of this is omp_max_threads - static gpuDeviceProp_t device_prop; - static int memory_pools_supported; - static unsigned int max_blocks_per_launch; + static AMREX_EXPORT gpuStream_t gpu_default_stream; + static AMREX_EXPORT Vector gpu_stream_pool; // The size of this is max_gpu_stream + static AMREX_EXPORT Vector gpu_stream; // The size of this is omp_max_threads + static AMREX_EXPORT gpuDeviceProp_t device_prop; + static AMREX_EXPORT int memory_pools_supported; + static AMREX_EXPORT unsigned int max_blocks_per_launch; #ifdef AMREX_USE_DPCPP - static std::unique_ptr sycl_context; - static std::unique_ptr sycl_device; + static AMREX_EXPORT std::unique_ptr sycl_context; + static AMREX_EXPORT std::unique_ptr sycl_device; #endif #endif }; From fa8e20f946b661bd49af2a60898ffca2c5b21cff Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Thu, 18 Aug 2022 08:57:51 -0700 Subject: [PATCH 049/111] Add Polaris to GNUMake (#2908) --- Tools/GNUMake/Make.defs | 7 ++-- Tools/GNUMake/Make.machines | 12 ++++-- Tools/GNUMake/sites/Make.alcf | 75 +++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs index 90a399e98af..f3f712816a6 100644 --- a/Tools/GNUMake/Make.defs +++ b/Tools/GNUMake/Make.defs @@ -757,6 +757,7 @@ else ifeq ($(USE_CUDA),TRUE) LINK_WITH_FORTRAN_COMPILER=TRUE endif + $(info Loading $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak...) include $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak ifeq ($(USE_MPI),TRUE) @@ -966,17 +967,17 @@ endif F90CACHE = ifeq ($(TP_PROFILING),VTUNE) - $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune + $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune include $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune endif ifeq ($(TP_PROFILING),CRAYPAT) - $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat + $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat include $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat endif ifeq ($(TP_PROFILING),FORGE) - $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge + $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge include $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge endif diff --git a/Tools/GNUMake/Make.machines b/Tools/GNUMake/Make.machines index d68e594d80f..738461965d0 100644 --- a/Tools/GNUMake/Make.machines +++ b/Tools/GNUMake/Make.machines @@ -67,9 +67,15 @@ ifdef OLCF_ROCM_ROOT endif endif -ifeq ($(findstring theta, $(host_name)), theta) - which_site := alcf - which_computer := theta +ifeq ($(findstring alcf.anl.gov, $(host_name)),alcf.anl.gov) + ifeq ($(findstring theta, $(host_name)), theta) + which_site := alcf + which_computer := theta + endif + ifeq ($(findstring polaris, $(host_name)), polaris) + which_site := alcf + which_computer := polaris + endif endif ifeq ($(findstring sierra, $(host_name)), sierra) diff --git a/Tools/GNUMake/sites/Make.alcf b/Tools/GNUMake/sites/Make.alcf index 324d419ccce..cf607596515 100644 --- a/Tools/GNUMake/sites/Make.alcf +++ b/Tools/GNUMake/sites/Make.alcf @@ -8,3 +8,78 @@ ifeq ($(which_computer),theta) LIBRARIES += -lmpichf90 endif endif + +ifeq ($(which_computer),$(filter $(which_computer),polaris)) + + ifdef PE_ENV + ifneq ($(USE_GPU),TRUE) + lowercase_peenv := $(shell echo $(PE_ENV) | tr A-Z a-z) + ifneq ($(lowercase_peenv),$(lowercase_comp)) + has_compiler_mismatch = COMP=$(COMP) does not match PrgEnv-$(lowercase_peenv) + endif + ifeq ($(MAKECMDGOALS),) + ifeq ($(lowercase_peenv),nvidia) + $(error PrgEnv-nvidia cannot be used with CPU-only builds. Try PrgEnv-gnu instead.) + endif + endif + endif + endif + + ifeq ($(USE_CUDA),TRUE) + CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))' + CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))' + else ifeq ($(USE_MPI),FALSE) + CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null)) + CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null)) + endif + + ifeq ($(USE_MPI),TRUE) + ifneq ($(USE_CUDA),TRUE) + CC = cc + CXX = CC + FC = ftn + F90 = ftn + LIBRARIES += -lmpichf90 + endif + + includes += $(shell CC --cray-print-opts=cflags) + endif + + ifeq ($(USE_CUDA),TRUE) + CUDA_ARCH = 80 + + ifeq ($(USE_MPI), FALSE) + includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS) + endif + + comm := , + ifneq ($(BL_NO_FORT),TRUE) + LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs)) + else + LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs)) + endif + + ifneq ($(CUDA_ROOT),) + SYSTEM_CUDA_PATH := $(CUDA_ROOT) + COMPILE_CUDA_PATH := $(CUDA_ROOT) + else ifneq ($(CUDA_HOME),) + SYSTEM_CUDA_PATH := $(CUDA_HOME) + COMPILE_CUDA_PATH := $(CUDA_HOME) + else ifneq ($(CUDA_PATH),) + SYSTEM_CUDA_PATH := $(CUDA_PATH) + COMPILE_CUDA_PATH := $(CUDA_PATH) + else ifneq ($(NVIDIA_PATH),) + SYSTEM_CUDA_PATH := $(NVIDIA_PATH)/cuda + COMPILE_CUDA_PATH := $(NVIDIA_PATH)/cuda + else + $(error No CUDA_ROOT nor CUDA_HOME nor CUDA_PATH found. Please load a cuda module.) + endif + + # Provide system configuration information. + + GPUS_PER_NODE=4 + GPUS_PER_SOCKET=4 + + endif + +endif \ No newline at end of file From f270b3d5db8f8b7ab010bc9134632361b8a9009c Mon Sep 17 00:00:00 2001 From: "Marc T. Henry de Frahan" Date: Thu, 18 Aug 2022 13:51:56 -0600 Subject: [PATCH 050/111] Fix OOB access of ref ratio on HDF write header (#2919) --- Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp index 021ed8c4f60..49a761da801 100644 --- a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp +++ b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp @@ -232,11 +232,8 @@ WriteGenericPlotfileHeaderHDF5 (hid_t fid, int ratio = 1; if (ref_ratio.size() > 0) - ratio = ref_ratio[level][0]; + ratio = (level == finest_level)? 1: ref_ratio[level][0]; - if (level == finest_level) { - ratio = 1; - } CreateWriteHDF5AttrInt(grp, "ref_ratio", 1, &ratio); for (int k = 0; k < AMREX_SPACEDIM; ++k) { From 0911fc4b2e066209a590c330bf2ddf7178dca76b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 21 Aug 2022 18:13:07 -0700 Subject: [PATCH 051/111] Open Boundary Poisson Solver (#2912) This adds an open boundary Poisson solver based on the James's algorithm. To use it, the user builds an amrex:OpenBCSolver object, which can be reused until the grids change, and then call OpenBCSolver::solver. Currently, this is for 3D cell-centered data only. The solver works on CPU, Nvidia GPUS, and AMD GPUs. The SYCL version of a couple of kernels for Intel GPUs are to be implemented. --- GNUmakefile.in | 3 + Src/Base/AMReX_DistributionMapping.cpp | 4 +- Src/Boundary/AMReX_LOUtil_K.H | 16 + Src/LinearSolvers/CMakeLists.txt | 12 + Src/LinearSolvers/MLMG/AMReX_MLPoisson.H | 4 + Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp | 59 ++ Src/LinearSolvers/OpenBC/AMReX_OpenBC.H | 136 ++++ Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp | 834 +++++++++++++++++++++ Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H | 166 ++++ Src/LinearSolvers/OpenBC/Make.package | 6 + 10 files changed, 1238 insertions(+), 2 deletions(-) create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC.H create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H create mode 100644 Src/LinearSolvers/OpenBC/Make.package diff --git a/GNUmakefile.in b/GNUmakefile.in index 8a6ce69df09..ad6238543dc 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -19,6 +19,9 @@ ifeq ($(USE_FORTRAN_INTERFACE),TRUE) endif ifeq ($(USE_LINEAR_SOLVERS),TRUE) Pdirs += LinearSolvers/MLMG + ifeq ($(DIM),3) + Pdirs += LinearSolvers/OpenBC + endif ifeq ($(USE_FORTRAN_INTERFACE),TRUE) Pdirs += F_Interfaces/LinearSolvers endif diff --git a/Src/Base/AMReX_DistributionMapping.cpp b/Src/Base/AMReX_DistributionMapping.cpp index a61d5b2f591..6b4c0c8925c 100644 --- a/Src/Base/AMReX_DistributionMapping.cpp +++ b/Src/Base/AMReX_DistributionMapping.cpp @@ -1300,7 +1300,7 @@ DistributionMapping::SFCProcessorMap (const BoxArray& boxes, for (int i = 0, N = boxes.size(); i < N; ++i) { - wgts.push_back(boxes[i].volume()); + wgts.push_back(boxes[i].numPts()); } SFCProcessorMapDoIt(boxes,wgts,nprocs); @@ -1769,7 +1769,7 @@ DistributionMapping::makeSFC (const BoxArray& ba, bool use_box_vol, const int np { const Box& bx = ba[i]; tokens.push_back(makeSFCToken(i, bx.smallEnd())); - const Long v = use_box_vol ? bx.volume() : Long(1); + const Long v = use_box_vol ? bx.numPts() : Long(1); vol_sum += v; wgts.push_back(v); } diff --git a/Src/Boundary/AMReX_LOUtil_K.H b/Src/Boundary/AMReX_LOUtil_K.H index b8fdb2a37ce..71bb1dd41d1 100644 --- a/Src/Boundary/AMReX_LOUtil_K.H +++ b/Src/Boundary/AMReX_LOUtil_K.H @@ -34,6 +34,22 @@ void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, int N, Real* AM } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, Real* AMREX_RESTRICT c) noexcept +{ + for (int j = 0; j < N; ++j) { + Real num = 1.0, den = 1.0; + for (int i = 0; i < N; ++i) { + if (i != j) { + num *= xInt-x[i]; + den *= x[j]-x[i]; + } + } + c[j] = num / den; + } +} + } #endif diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index bbefab67999..63de2af0113 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -98,3 +98,15 @@ if (AMReX_HYPRE) MLMG/AMReX_MLNodeLaplacian_hypre.cpp ) endif () + +if (AMReX_SPACEDIM EQUAL 3) + + target_include_directories(amrex PUBLIC $) + + target_sources(amrex + PRIVATE + OpenBC/AMReX_OpenBC.H + OpenBC/AMReX_OpenBC_K.H + OpenBC/AMReX_OpenBC.cpp + ) +endif () diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H index 81dd431d953..41f8fbf1cae 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H @@ -70,6 +70,10 @@ public: virtual void copyNSolveSolution (MultiFab& dst, MultiFab const& src) const final override; + //! Compute dphi/dn on domain faces after the solver has converged. + void get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi); + private: Vector m_is_singular; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp index ce27eb936fd..15ee75e961a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp @@ -702,4 +702,63 @@ MLPoisson::copyNSolveSolution (MultiFab& dst, MultiFab const& src) const dst.ParallelCopy(src); } +void +MLPoisson::get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi) +{ + BL_PROFILE("MLPoisson::dpdn_faces()"); + + // We do not need to call applyBC because this function is used by the + // OpenBC solver after solver has converged. That means the BC has been + // filled to check the residual. + + Box const& domain0 = m_geom[0][0].Domain(); + AMREX_D_TERM(const Real dxi = m_geom[0][0].InvCellSize(0);, + const Real dyi = m_geom[0][0].InvCellSize(1);, + const Real dzi = m_geom[0][0].InvCellSize(2);) + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(phi); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain0[face]) { + int dir = face.coordDir(); + Array4 const& p = phi.const_array(mfi); + Array4 const& gp = dpdn[dir]->array(mfi); + Box const& b2d = amrex::bdryNode(vbx,face); + if (dir == 0) { + // because it's dphi/dn, not dphi/dx. + Real fac = dxi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i-1,j,k)); + }); + } +#if (AMREX_SPACEDIM > 1) + else if (dir == 1) { + Real fac = dyi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j-1,k)); + }); + } +#if (AMREX_SPACEDIM > 2) + else { + Real fac = dzi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j,k-1)); + }); + } +#endif +#endif + } + } + } +} + } diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H new file mode 100644 index 00000000000..d07c26a9fb3 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H @@ -0,0 +1,136 @@ +#ifndef AMREX_OPENBC_H_ +#define AMREX_OPENBC_H_ +#include + +#include +#include + +namespace amrex +{ + +namespace openbc { + + static constexpr int M = 7; // highest order of moments + static constexpr int P = 3; + + struct Moments + { + typedef GpuArray array_type; + array_type mom; + Real x, y, z; + Orientation face; + }; + + struct MomTag + { + Array4 gp; + Box b2d; + Orientation face; + int offset; + }; + + std::ostream& operator<< (std::ostream& os, Moments const& mom); +} + +#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +template<> +struct Gpu::SharedMemory +{ + AMREX_GPU_DEVICE openbc::Moments::array_type* dataPtr () noexcept { + AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(openbc::Moments::array_type,amrex_openbc_momarray);, + extern __shared__ openbc::Moments::array_type amrex_openbc_momarray[];) + return amrex_openbc_momarray; + } +}; +#endif + +/** + * \brief Open Boundary Poisson Solver + * + * References: + * (1) The Solution of Poisson's Equation for Isolated Source + * Distributions, R. A. James, 1977, JCP 25, 71 + * (2) A Local Corrections Algorithm for Solving Poisson's Equation in Three + * Dimensions, P. McCorquodale, P. Colella, G. T. Balls, & S. B. Baden, + * 2007, Communications in Applied Mathematics and Computational Science, + * 2, 1, 57-81 + */ +class OpenBCSolver +{ +public: + OpenBCSolver (); + + OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + ~OpenBCSolver (); + + OpenBCSolver (const OpenBCSolver&) = delete; + OpenBCSolver (OpenBCSolver&&) = delete; + OpenBCSolver& operator= (const OpenBCSolver&) = delete; + OpenBCSolver& operator= (OpenBCSolver&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + void setVerbose (int v) noexcept; + + Real solve (const Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs); + +public: // public for cuda + + void compute_moments (Gpu::DeviceVector& moments); + void compute_potential (Gpu::DeviceVector const& moments); + void interpolate_potential (MultiFab& solg); + +private: + +#ifdef AMREX_USE_MPI + void bcast_moments (Gpu::DeviceVector& moments); +#endif + + int m_verbose = 0; + Vector m_geom; + Vector m_grids; + Vector m_dmap; + LPInfo m_info; + std::unique_ptr m_poisson_1; + std::unique_ptr m_poisson_2; + std::unique_ptr m_mlmg_1; + std::unique_ptr m_mlmg_2; + + int m_coarsen_ratio = 0; + Array m_dpdn; + Gpu::PinnedVector m_momtags_h; +#ifdef AMREX_USE_GPU + Gpu::DeviceVector m_momtags_d; + Gpu::PinnedVector m_ngpublocks_h; + Gpu::DeviceVector m_ngpublocks_d; + int m_nthreads_momtag; +#endif + + int m_nblocks_local = 0; + int m_nblocks = 0; +#ifdef AMREX_USE_MPI + Vector m_countvec; + Vector m_offset; +#endif + + IntVect m_ngrowdomain; + MultiFab m_crse_grown_faces_phi; + MultiFab m_phind; + BoxArray m_bag; + + BoxArray m_ba_all; + DistributionMapping m_dm_all; + Geometry m_geom_all; +}; + +} + +#endif diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp new file mode 100644 index 00000000000..429d4e79141 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp @@ -0,0 +1,834 @@ +#include +#include +#include + +namespace amrex +{ + +OpenBCSolver::OpenBCSolver () {} + +OpenBCSolver::OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + define(a_geom, a_grids, a_dmap, a_info); +} + +OpenBCSolver::~OpenBCSolver () {} + +void OpenBCSolver::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + BL_PROFILE("OpenBCSoler::define()"); + + m_geom = a_geom; + m_grids = a_grids; + m_dmap = a_dmap; + m_info = a_info; + for (auto& grids : m_grids) { + grids.enclosedCells(); + } + + Box const domain0 = m_geom[0].Domain(); + m_coarsen_ratio = 8; + AMREX_ALWAYS_ASSERT(domain0.coarsenable(m_coarsen_ratio)); + int N1d = static_cast(std::round(std::pow(domain0.d_numPts(),1./3.))); + while (domain0.coarsenable(m_coarsen_ratio*2) + && 4*m_coarsen_ratio*m_coarsen_ratio <= N1d) { + m_coarsen_ratio *= 2; + } + + int ntags = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + Box lo = amrex::coarsen(amrex::bdryLo(domain0, idim), m_coarsen_ratio); + Box hi = amrex::coarsen(amrex::bdryHi(domain0, idim), m_coarsen_ratio); + BoxList bl({lo,hi}); + IntVect chunk = lo.length(); + while (bl.size() < ParallelContext::NProcsSub()) { + IntVect chunk_prev = chunk; + for (int jdim = AMREX_SPACEDIM-1; jdim >= 0; --jdim) { + if (jdim != idim) { + int new_chunk_size = chunk[jdim] / 2; + if (bl.size() < ParallelContext::NProcsSub() + && new_chunk_size > 0) { + chunk[jdim] = new_chunk_size; + bl.maxSize(chunk); + } + } + } + if (chunk == chunk_prev) { + break; + } + } + int mgs = std::max(1, 256/m_coarsen_ratio); + bl.maxSize(mgs); + bl.refine(m_coarsen_ratio); + BoxArray ba2d(std::move(bl)); + DistributionMapping dm2d{ba2d}; + m_dpdn[idim].define(ba2d, dm2d, 1, 0); + ntags += m_dpdn[idim].local_size(); + } + + m_momtags_h.reserve(ntags); + int nblocks = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + for (MFIter mfi(m_dpdn[idim]); mfi.isValid(); ++mfi) { + Box const& b2d = mfi.validbox(); + Orientation::Side side = (b2d.smallEnd(idim) == domain0.smallEnd(idim)) + ? Orientation::low : Orientation::high; + Orientation face(idim, side); + m_momtags_h.push_back({m_dpdn[idim].const_array(mfi), b2d, face, + nblocks}); + nblocks += static_cast(b2d.numPts()) + / (m_coarsen_ratio*m_coarsen_ratio); + } + } + m_nblocks_local = nblocks; + +#ifdef AMREX_USE_GPU + if (ntags > 0) { + m_momtags_d.resize(ntags); + Gpu::copyAsync(Gpu::hostToDevice, m_momtags_h.begin(), m_momtags_h.end(), m_momtags_d.begin()); + + m_nthreads_momtag = (m_coarsen_ratio == 8) ? 64 : 128; + int ntotgpublocks = 0; + m_ngpublocks_h.reserve(ntags+1); + for (auto const& tag : m_momtags_h) { + m_ngpublocks_h.push_back(ntotgpublocks); + Box cb2d = amrex::coarsen(tag.b2d, m_coarsen_ratio); + ntotgpublocks += static_cast(cb2d.numPts()); + } + m_ngpublocks_h.push_back(ntotgpublocks); + m_ngpublocks_d.resize(m_ngpublocks_h.size()); + Gpu::copyAsync(Gpu::hostToDevice, m_ngpublocks_h.begin(), m_ngpublocks_h.end(), + m_ngpublocks_d.begin()); + } +#endif + + auto const dx = m_geom[0].CellSize(); + Real dmax = amrex::max(std::sqrt(dx[0]*dx[0]+dx[1]*dx[1]), + std::sqrt(dx[0]*dx[0]+dx[2]*dx[2]), + std::sqrt(dx[1]*dx[1]+dx[2]*dx[2])); + m_ngrowdomain[0] = static_cast(std::ceil(dmax/dx[0])) * m_coarsen_ratio; + m_ngrowdomain[1] = static_cast(std::ceil(dmax/dx[1])) * m_coarsen_ratio; + m_ngrowdomain[2] = static_cast(std::ceil(dmax/dx[2])) * m_coarsen_ratio; + // This is the minimal size we need to embiggen the domain. + + Box const domain1 = amrex::grow(domain0, m_ngrowdomain); + BoxList bl_crse_grown_faces(IndexType::TheNodeType()); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + Box face_box = amrex::surroundingNodes(amrex::bdryNode(domain1,face)); + face_box.coarsen(m_coarsen_ratio); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (idim != face.coordDir()) { + face_box.grow(idim,openbc::P); + } + } + bl_crse_grown_faces.push_back(face_box); + } + + bl_crse_grown_faces.maxSize(16); // xxxxx make this a parameter? + BoxArray ba_crse_grown_faces(std::move(bl_crse_grown_faces)); + DistributionMapping dm_crse_grown_faces(ba_crse_grown_faces); + m_crse_grown_faces_phi.define(ba_crse_grown_faces, dm_crse_grown_faces, 1, 0); + + BoxList blg = amrex::boxDiff(domain1, domain0); + blg.maxSize(std::max(64,m_coarsen_ratio)); // xxxxx make this a parameter? + m_bag = BoxArray(std::move(blg)); + DistributionMapping dmg(m_bag); + m_phind.define(amrex::coarsen(amrex::convert(m_bag,IntVect(1)),m_coarsen_ratio), + dmg, 1, openbc::P); + + BoxList bl0 = m_grids[0].boxList(); + BoxList bl1 = m_bag.boxList(); + Vector p0 = m_dmap[0].ProcessorMap(); + Vector p1 = dmg.ProcessorMap(); + bl0.join(bl1); + p0.insert(p0.end(), p1.begin(), p1.end()); + IntVect const offset = -domain1.smallEnd(); + for (auto& b : bl0) { + b.shift(offset); + } + m_ba_all = BoxArray(std::move(bl0)); + m_dm_all = DistributionMapping(std::move(p0)); + + auto const problo = m_geom[0].ProbLo(); + auto const probhi = m_geom[0].ProbHi(); + std::array problo_all, probhi_all; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + problo_all[idim] = problo[idim] - m_ngrowdomain[idim]*dx[idim]; + probhi_all[idim] = probhi[idim] + m_ngrowdomain[idim]*dx[idim]; + } + m_geom_all = Geometry(amrex::shift(domain1,offset), + RealBox(problo_all,probhi_all), + m_geom[0].Coord(), m_geom[0].isPeriodic()); +} + +void OpenBCSolver::setVerbose (int v) noexcept +{ + m_verbose = v; +} + +Real OpenBCSolver::solve (const Vector& a_sol, + const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs) +{ + BL_PROFILE("OpenBCSolver::solve()"); + + auto solve_start_time = amrex::second(); + + int nlevels = m_geom.size(); + + BL_PROFILE_VAR("OpenBCSolver::MG1", blp_mg1); + + if (m_poisson_1 == nullptr) { + m_poisson_1 = std::make_unique(m_geom, m_grids, m_dmap, m_info); + m_poisson_1->setVerbose(m_verbose); + m_poisson_1->setMaxOrder(4); + m_poisson_1->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + for (int ilev = 0; ilev < nlevels; ++ilev) { + m_poisson_1->setLevelBC(ilev, nullptr); + } + + m_mlmg_1 = std::make_unique(*m_poisson_1); + m_mlmg_1->setVerbose(m_verbose); + } + m_mlmg_1->solve(a_sol, a_rhs, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg1); + + Array dpdn_tmp; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + dpdn_tmp[idim].define(amrex::convert(m_grids[0], + IntVect::TheDimensionVector(idim)), + m_dmap[0], 1, 0); + } + m_poisson_1->get_dpdn_on_domain_faces(GetArrOfPtrs(dpdn_tmp), *a_sol[0]); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_dpdn[idim].ParallelCopy(dpdn_tmp[idim]); + } + + { + Gpu::DeviceVector moments(m_nblocks_local); + compute_moments(moments); + compute_potential(moments); + } + + MultiFab rhsg(m_bag, m_phind.DistributionMap(), 1, a_rhs[0]->nGrowVect()); + rhsg.setVal(0._rt); + + MultiFab solg(m_bag, m_phind.DistributionMap(), 1, 1); + solg.setVal(0._rt); + interpolate_potential(solg); + + const int nboxes0 = m_grids[0].size(); + MultiFab sol_all(m_ba_all, m_dm_all, 1, solg.nGrowVect(), + MFInfo().SetAlloc(false)); + MultiFab rhs_all(m_ba_all, m_dm_all, 1, rhsg.nGrowVect(), + MFInfo().SetAlloc(false)); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + IntVect const offset = -domain1.smallEnd(); + for (MFIter mfi(sol_all); mfi.isValid(); ++mfi) { + const int index = mfi.index(); + FArrayBox solfab, rhsfab; + if (index < nboxes0) { + FArrayBox& sfab0 = (*a_sol[0])[index]; + if (sol_all.nGrowVect() == a_sol[0]->nGrowVect()) { + solfab = FArrayBox(sfab0, amrex::make_alias, 0, 1); + } else { + Box b = sfab0.box(); + b.grow(sol_all.nGrowVect()-a_sol[0]->nGrowVect()); + solfab.resize(b,1); + solfab.template setVal(0._rt); + } + rhsfab = FArrayBox((*a_rhs[0])[index], amrex::make_alias, 0, 1); + } else { + solfab = FArrayBox(solg[index-nboxes0], amrex::make_alias, 0, 1); + rhsfab = FArrayBox(rhsg[index-nboxes0], amrex::make_alias, 0, 1); + } + solfab.shift(offset); + rhsfab.shift(offset); + sol_all.setFab(index, std::move(solfab)); + rhs_all.setFab(index, std::move(rhsfab)); + } + + BL_PROFILE_VAR("OpenBCSolver::MG2", blp_mg2); + + if (m_poisson_2 == nullptr) { + Vector geom_all = m_geom; + Vector grids_all = m_grids; + Vector dmap_all = m_dmap; + geom_all[0] = m_geom_all; + grids_all[0] = m_ba_all; + dmap_all[0] = m_dm_all; + m_poisson_2 = std::make_unique(geom_all, grids_all, dmap_all, + m_info); + m_poisson_2->setVerbose(m_verbose); + m_poisson_2->setMaxOrder(4); + m_poisson_2->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + m_poisson_2->setLevelBC(0, &sol_all); + for (int ilev = 1; ilev < nlevels; ++ilev) { + m_poisson_2->setLevelBC(ilev, nullptr); + } + + m_mlmg_2 = std::make_unique(*m_poisson_2); + m_mlmg_2->setVerbose(m_verbose); + } + Vector solv_all = a_sol; + Vector rhsv_all = a_rhs; + solv_all[0] = &sol_all; + rhsv_all[0] = &rhs_all; + Real err = m_mlmg_2->solve(solv_all, rhsv_all, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg2); + + if (sol_all.nGrowVect() != a_sol[0]->nGrowVect()) { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*a_sol[0], TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& sall = sol_all.const_array(mfi.index()); + Array4 const& s = a_sol[0]->array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(bx, i, j, k, + { + s(i,j,k) = sall(i,j,k); + }); + } + } + + auto solve_stop_time = amrex::second(); + if (m_verbose >= 1) { + amrex::Print() << "OpenBCSolver time = " + << solve_stop_time - solve_start_time << "\n"; + } + + return err; +} + +void OpenBCSolver::compute_moments (Gpu::DeviceVector& moments) +{ + BL_PROFILE("OpenBCSolver::comp_mom()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const probhi = m_geom[0].ProbHiArray(); + auto const dx = m_geom[0].CellSizeArray(); + +#ifdef AMREX_USE_GPU + if (m_momtags_h.size() > 0) + { + int crse_ratio = m_coarsen_ratio; + int ntags = m_momtags_h.size(); + openbc::Moments* pm = moments.data(); + openbc::MomTag const* ptag = m_momtags_d.data(); + int const* pnblks = m_ngpublocks_d.data(); + std::size_t shared_mem_bytes = m_nthreads_momtag * sizeof(openbc::Moments::array_type); + +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,probhi,dx,crse_ratio,ntags,pm,ptag,pnblks, + shared_mem_bytes); + amrex::Abort("xxxx DPCPP todo: openbc compute_moments"); +#else + amrex::launch(m_ngpublocks_h.back(), m_nthreads_momtag, shared_mem_bytes, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + Gpu::SharedMemory gsm; + openbc::Moments::array_type* const shared = gsm.dataPtr(); + openbc::Moments::array_type& tmom = shared[threadIdx.x]; + for (int i = 0; i < (openbc::M+1)*(openbc::M+2)/2; ++i) { + tmom[i] = Real(0.); + } + + int tag_id = amrex::bisect(pnblks, 0, ntags, static_cast(blockIdx.x)); + int iblock = blockIdx.x - pnblks[tag_id]; // iblock'th gpublock on this box. + auto const& tag = ptag[tag_id]; + openbc::Moments& mom = pm[tag.offset+iblock]; + if (tag.face.coordDir() == 0) { + int const nby = tag.b2d.length(1) / crse_ratio; + int const kb = iblock / nby; + int const jb = iblock - kb*nby; + int const i = tag.b2d.smallEnd(0); + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[1]*dx[2]; + Real const xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int j = icell - k*crse_ratio; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + j += jlo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = xc; + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else if (tag.face.coordDir() == 1) { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const kb = iblock / nbx; + int const ib = iblock - kb*nbx; + int const j = tag.b2d.smallEnd(1); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[0]*dx[2]; + Real const yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int i = icell - k*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + i += ilo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const jb = iblock / nbx; + int const ib = iblock - jb*nbx; + int const k = tag.b2d.smallEnd(2); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + Real const fac = dx[0]*dx[1]; + Real const zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int j = icell/crse_ratio; + int i = icell - j*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + i += ilo; + j += jlo; + Real const charge = tag.gp(i,j,k) * fac; + Real ypow = Real(1.); + int m = 0; + for (int q=0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = zc; + mom.face = tag.face; + } + } + openbc::scale_moments(tmom); + + __syncthreads(); + + if (threadIdx.x < (openbc::M+1)*(openbc::M+2)/2) { + mom.mom[threadIdx.x] = Real(0.); + for (unsigned int i = 0; i < blockDim.x; ++i) { + mom.mom[threadIdx.x] += shared[i][threadIdx.x]; + } + } + }); +#endif + } +#else + for (auto const& tag : m_momtags_h) { + if (tag.face.coordDir() == 0) { + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int i = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[1]*dx[2]; + Real xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int kb = 0; kb < nbz; ++kb) { + for (int jb = 0; jb < nby; ++jb) { + openbc::Moments& mom = moments[tag.offset+jb+kb*nby]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + Real charge = tag.gp(i, jlo+jb*m_coarsen_ratio+jj, + klo+kb*m_coarsen_ratio+kk) * fac; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + // center of the block + mom.x = xc; + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else if (tag.face.coordDir() == 1) { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int j = tag.b2d.smallEnd(1); + int ilo = tag.b2d.smallEnd(0); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[0]*dx[2]; + Real yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int kb = 0; kb < nbz; ++kb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+kb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, j, + klo+kb*m_coarsen_ratio+kk) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int k = tag.b2d.smallEnd(2); + int ilo = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + Real fac = dx[0]*dx[1]; + Real zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int jb = 0; jb < nby; ++jb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+jb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, + jlo+jb*m_coarsen_ratio+jj, k) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real ypow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = zc; + mom.face = tag.face; + }} + } + } +#endif + +#ifdef AMREX_USE_MPI + bcast_moments(moments); +#endif + m_nblocks = moments.size(); +} + +#ifdef AMREX_USE_MPI +void OpenBCSolver::bcast_moments (Gpu::DeviceVector& moments) +{ + if (ParallelContext::NProcsSub() > 1) + { + MPI_Comm comm = ParallelContext::CommunicatorSub(); + if (m_nblocks == 0) { + int count = moments.size(); + count *= static_cast(sizeof(openbc::Moments)); + m_countvec.resize(ParallelContext::NProcsSub()); + MPI_Allgather(&count, 1, MPI_INT, m_countvec.data(), 1, MPI_INT, comm); + + m_offset.resize(m_countvec.size(), 0); + Long count_tot = m_countvec[0]; + for (int i = 1, N = m_offset.size(); i < N; ++i) { + m_offset[i] = m_offset[i-1] + m_countvec[i-1]; + count_tot += m_countvec[i]; + } + + if (count_tot > static_cast(std::numeric_limits::max())) { + amrex::Abort("OpenBC: integer overflow. Let us know and we will fix this."); + } + + m_nblocks = count_tot/sizeof(openbc::Moments); + } + + Gpu::DeviceVector moments_all(m_nblocks); + +#ifdef AMREX_USE_GPU + Gpu::PinnedVector h_moments(moments.size()); + Gpu::PinnedVector h_moments_all(moments_all.size()); + Gpu::copyAsync(Gpu::deviceToHost, moments.begin(), moments.end(), + h_moments.begin()); + Gpu::streamSynchronize(); +#else + auto const& h_moments = moments; + auto& h_moments_all = moments_all; +#endif + + int count = m_nblocks_local*static_cast(sizeof(openbc::Moments)); + MPI_Allgatherv(h_moments.data(), count, MPI_CHAR, h_moments_all.data(), + m_countvec.data(), m_offset.data(), MPI_CHAR, comm); + +#ifdef AMREX_USE_GPU + Gpu::copyAsync(Gpu::hostToDevice, h_moments_all.begin(), h_moments_all.end(), + moments_all.begin()); + Gpu::streamSynchronize(); +#endif + + std::swap(moments, moments_all); + } +} +#endif + +void OpenBCSolver::compute_potential (Gpu::DeviceVector const& moments) +{ + BL_PROFILE("OpenBCSolver::comp_phi()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const dx = m_geom[0].CellSizeArray(); + + int crse_ratio = m_coarsen_ratio; + int nblocks = m_nblocks; + openbc::Moments const* pmom = moments.data(); + for (MFIter mfi(m_crse_grown_faces_phi); mfi.isValid(); ++mfi) { + Box const& b = mfi.validbox(); + Array4 const& phi_arr = m_crse_grown_faces_phi.array(mfi); +#if defined(AMREX_USE_GPU) + const auto lo = amrex::lbound(b); + const auto len = amrex::length(b); + const auto lenxy = len.x*len.y; + const auto lenx = len.x; +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,dx,crse_ratio,nblocks,pmom,b,phi_arr,lo, + lenxy,lenx); + amrex::Abort("xxxxx DPCPP todo: openbc compute_potential"); +#else + amrex::launch(b.numPts(), AMREX_GPU_MAX_THREADS, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + int icell = blockIdx.x; + int k = icell / lenxy; + int j = (icell - k*lenxy) / lenx; + int i = (icell - k*lenxy) - j*lenx; + i += lo.x; + j += lo.y; + k += lo.z; + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = Real(0.); + for (int iblock = threadIdx.x; iblock < nblocks; iblock += blockDim.x) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + Real phitot = Gpu::blockReduceSum(phi); + if (threadIdx.x == 0) { + phi_arr(i,j,k) = phitot; + } + }); +#endif +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) noexcept + { + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = 0._rt; + for (int iblock = 0; iblock < nblocks; ++iblock) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + phi_arr(i,j,k) = phi; + }); +#endif + } + + m_phind.ParallelCopy(m_crse_grown_faces_phi, 0, 0, 1, IntVect(0), + m_phind.nGrowVect()); +} + +void OpenBCSolver::interpolate_potential (MultiFab& solg) +{ + BL_PROFILE("OpenBCSolver::interp_phi"); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + int crse_ratio = m_coarsen_ratio; + + for (MFIter mfi(solg); mfi.isValid(); ++mfi) { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain1[face]) { + Array4 const& solg_arr = solg.array(mfi); + Array4 const& phi_arr = m_phind.const_array(mfi); + Box const& b2d = amrex::bdryNode(vbx, face); + int offset = face.isLow() ? -1 : 0; + if (face.coordDir() == 0) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(1,openbc::P).surroundingNodes(1); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(crse_ratio,1,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int k) noexcept + { + int i = ic*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccy(ic,j,k,ctmp,crse_ratio); + }); + } else if (face.coordDir() == 1) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,crse_ratio,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int jc, int k) noexcept + { + int j = jc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,jc,k,ctmp,crse_ratio); + }); + } else { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,1,crse_ratio)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int kc) noexcept + { + tmp(ic,j,kc) = openbc::interpccy(ic,j,kc,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,1,crse_ratio)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int j, int kc) noexcept + { + int k = kc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,j,kc,ctmp,crse_ratio); + }); + } + } + } + } +} + +namespace openbc { +std::ostream& operator<< (std::ostream& os, Moments const& mom) +{ + os << "Face " << mom.face << ", x = " << mom.x << ", y = " << mom.y + << ", z = " << mom.z << "\n" + << " " << mom.mom[0] << "\n" + << " " << mom.mom[1] << ", " << mom.mom[8] << "\n" + << " " << mom.mom[2] << ", " << mom.mom[9] << ", " << mom.mom[15] << "\n" + << " " << mom.mom[3] << ", " << mom.mom[10] << ", " << mom.mom[16] + << ", " << mom.mom[21] << "\n" + << " " << mom.mom[4] << ", " << mom.mom[11] << ", " << mom.mom[17] + << ", " << mom.mom[22] << ", " << mom.mom[26] << "\n" + << " " << mom.mom[5] << ", " << mom.mom[12] << ", " << mom.mom[18] + << ", " << mom.mom[23] << ", " << mom.mom[27] << ", " << mom.mom[30] << "\n" + << " " << mom.mom[6] << ", " << mom.mom[13] << ", " << mom.mom[19] + << ", " << mom.mom[24] << ", " << mom.mom[28] << ", " << mom.mom[31] + << ", " << mom.mom[33] << "\n" + << " " << mom.mom[7] << ", " << mom.mom[14] << ", " << mom.mom[20] + << ", " << mom.mom[25] << ", " << mom.mom[29] << ", " << mom.mom[32] + << ", " << mom.mom[34] << ", " << mom.mom[35] << "\n"; + return os; +} +} + +} diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H new file mode 100644 index 00000000000..7a6b2643b68 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H @@ -0,0 +1,166 @@ +#ifndef AMREX_OPENBC_K_H_ +#define AMREX_OPENBC_K_H_ + +#include +#include + +namespace amrex { namespace openbc { + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void scale_moments (openbc::Moments::array_type& mom) +{ // p!*q! in the order of 0!*0!, 1!*0!, ..., 7!*0!, 0!*1!, 1!*1!, 2!*1!, ..., 6!*1!, 0!*2!, ..., 0!*7!. + mom[ 2] *= Real(0.5); + mom[ 3] *= Real(1./6.); + mom[ 4] *= Real(1./24.); + mom[ 5] *= Real(1./120.); + mom[ 6] *= Real(1./720.); + mom[ 7] *= Real(1./5040.); + mom[10] *= Real(0.5); + mom[11] *= Real(1./6.); + mom[12] *= Real(1./24.); + mom[13] *= Real(1./120.); + mom[14] *= Real(1./720.); + mom[15] *= Real(0.5); + mom[16] *= Real(0.5); + mom[17] *= Real(0.25); + mom[18] *= Real(1./12.); + mom[19] *= Real(1./48.); + mom[20] *= Real(1./240.); + mom[21] *= Real(1./6.); + mom[22] *= Real(1./6.); + mom[23] *= Real(1./12.); + mom[24] *= Real(1./36.); + mom[25] *= Real(1./144.); + mom[26] *= Real(1./24.); + mom[27] *= Real(1./24.); + mom[28] *= Real(1./48.); + mom[29] *= Real(1./144.); + mom[30] *= Real(1./120.); + mom[31] *= Real(1./120.); + mom[32] *= Real(1./240.); + mom[33] *= Real(1./720.); + mom[34] *= Real(1./720.); + mom[35] *= Real(1./5040.); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real block_potential (openbc::Moments const& mom, Real xb, Real yb, Real zb) +{ + constexpr Real oneover4pi = Real(1.)/Real(4.*3.1415926535897932); + + xb -= mom.x; + yb -= mom.y; + zb -= mom.z; + Real ri = Real(1.)/std::sqrt(xb*xb+yb*yb+zb*zb); + Real ri2 = ri*ri; + Real ri3 = ri2*ri; + Real ri4 = ri3*ri; + Real xr, yr; + if (mom.face.coordDir() == 0) { + xr = yb*ri; + yr = zb*ri; + } else if (mom.face.coordDir() == 1) { + xr = xb*ri; + yr = zb*ri; + } else { + xr = xb*ri; + yr = yb*ri; + } + Real xr2 = xr *xr; + Real xr4 = xr2*xr2; + Real xr6 = xr4*xr2; + Real yr2 = yr *yr; + Real yr4 = yr2*yr2; + Real yr6 = yr4*yr2; + Real phi = ri * mom.mom[0] + + ri2*(xr*mom.mom[1] + yr*mom.mom[8]) + + ri3*((Real(3.) * xr2 - Real(1.)) * mom.mom[2] + + (Real(3.) * xr * yr ) * mom.mom[9] + + (Real(3.) * yr2 - Real(1.)) * mom.mom[15]) + + ri4 * (xr * (Real(15.) * xr2 - Real(9.)) * mom.mom[3] + + yr * (Real(15.) * xr2 - Real(3.)) * mom.mom[10] + + xr * (Real(15.) * yr2 - Real(3.)) * mom.mom[16] + + yr * (Real(15.) * yr2 - Real(9.)) * mom.mom[21]) + + ri4*ri * ((Real(105.) * xr4 - Real(90.) * xr2 + Real(9.)) * mom.mom[4] + + (xr * yr * (Real(105.) * xr2 - Real(45.))) * mom.mom[11] + + (Real(105.) * xr2 * yr2 - Real(15.) * xr2 - Real(15.) * yr2 + Real(3.)) * mom.mom[17] + + (xr * yr * (Real(105.) * yr2 - Real(45.))) * mom.mom[22] + + (Real(105.) * yr4 - Real(90.) * yr2 + Real(9.)) * mom.mom[26]) + + ri4*ri2 * (xr * (Real(945.)*xr4 - Real(1050.)*xr2 + Real(225.)) * mom.mom[5] + + yr * (Real(945.)*xr4 - Real(630.)*xr2 + Real(45.)) * mom.mom[12] + + xr * (Real(945.)*xr2*yr2 - Real(105.)*xr2 - Real(315.)*yr2 + Real(45.)) * mom.mom[18] + + yr * (Real(945.)*xr2*yr2 - Real(315.)*xr2 - Real(105.)*yr2 + Real(45.)) * mom.mom[23] + + xr * (Real(945.)*yr4 - Real(630.)*yr2 + Real(45.)) * mom.mom[27] + + yr * (Real(945.)*yr4 - Real(1050.)*yr2 + Real(225.)) * mom.mom[30]) + + ri4*ri3 * (Real(45.) * (Real(231.)*xr6 - Real(315.)*xr4 + Real(105.)*xr2 - Real(5.)) * mom.mom[6] + + Real(315.)*xr*yr * (Real(33.)*xr4 - Real(30.)*xr2 + Real(5.)) * mom.mom[13] + + Real(45.) * (Real(231.)*xr4*yr2 - Real(21.)*xr4 - Real(126.)*xr2*yr2 + Real(14.)*xr2 + Real(7.)*yr2 - Real(1.)) * mom.mom[19] + + Real(945.)*xr*yr * (Real(11.)*xr2*yr2 - Real(3.)*xr2 - Real(3.)*yr2 + Real(1.)) * mom.mom[24] + + Real(45.) * (Real(231.)*xr2*yr4 - Real(126.)*xr2*yr2 + Real(7.)*xr2 - Real(21.)*yr4 + Real(14.)*yr2 - Real(1.)) * mom.mom[28] + + Real(315.)*xr*yr * (Real(33.)*yr4 - Real(30.)*yr2 + Real(5.)) * mom.mom[31] + + Real(45.) * (Real(231.)*yr6 - Real(315.)*yr4 + Real(105.)*yr2 - Real(5.)) * mom.mom[33]) + + ri4*ri4*(Real(315.)*xr*(Real(429.)*xr6 - Real(693.)*xr4 + Real(315.)*xr2 - Real(35.)) * mom.mom[7] + + Real(315.)*yr*(Real(429.)*xr6 - Real(495.)*xr4 + Real(135.)*xr2 - Real(5.)) * mom.mom[14] + + Real(315.)*xr*(Real(429.)*xr4*yr2 - Real(33.)*xr4 - Real(330.)*xr2*yr2 + Real(30.)*xr2 + Real(45.)*yr2 - Real(5.)) * mom.mom[20] + + Real(945.)*yr*(Real(143.)*xr4*yr2 - Real(33.)*xr4 - Real(66.)*xr2*yr2 + Real(18.)*xr2 + Real(3.)*yr2 - Real(1.)) * mom.mom[25] + + Real(945.)*xr*(Real(143.)*xr2*yr4 - Real(66.)*xr2*yr2 + Real(3.)*xr2 - Real(33.)*yr4 + Real(18.)*yr2 - Real(1.)) * mom.mom[29] + + Real(315.)*yr*(Real(429.)*xr2*yr4 - Real(330.)*xr2*yr2 + Real(45.)*xr2 - Real(33.)*yr4 + Real(30.)*yr2 - Real(5.)) * mom.mom[32] + + Real(315.)*xr*(Real(429.)*yr6 - Real(495.)*yr4 + Real(135.)*yr2 - Real(5.)) * mom.mom[34] + + Real(315.)*yr*(Real(429.)*yr6 - Real(693.)*yr4 + Real(315.)*yr2 - Real(35.)) * mom.mom[35]); + return phi*(-oneover4pi); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void interp_coef (int i, int ii, Real* AMREX_RESTRICT c, int crse_ratio) +{ + static_assert(openbc::P == 3, "openbc::P is assumed to be 3 here"); + Real xint = (ii-i*crse_ratio + Real(0.5))/static_cast(crse_ratio); + constexpr Real x[] = {-3._rt, -2._rt, -1._rt, 0._rt, 1._rt, 2._rt, 3._rt, 4._rt}; + poly_interp_coeff<8>(xint, x, c); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccx (int ii, int j, int k, Array4 const& phi, int crse_ratio) +{ + int i = amrex::coarsen(ii,crse_ratio); + Real c[8]; + interp_coef(i,ii,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i-3+n,j,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccy (int i, int jj, int k, Array4 const& phi, int crse_ratio) +{ + int j = amrex::coarsen(jj,crse_ratio); + Real c[8]; + interp_coef(j,jj,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j-3+n,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccz (int i, int j, int kk, Array4 const& phi, int crse_ratio) +{ + int k = amrex::coarsen(kk,crse_ratio); + Real c[8]; + interp_coef(k,kk,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j,k-3+n); + } + return p; +} + +}} + +#endif diff --git a/Src/LinearSolvers/OpenBC/Make.package b/Src/LinearSolvers/OpenBC/Make.package new file mode 100644 index 00000000000..5fc39f69371 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/Make.package @@ -0,0 +1,6 @@ + +CEXE_headers += AMReX_OpenBC.H AMReX_OpenBC_K.H +CEXE_sources += AMReX_OpenBC.cpp + +VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC From 8294c3afbcbbc503f77e493196d380fbe1666d02 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 22 Aug 2022 10:46:05 -0700 Subject: [PATCH 052/111] Scope of NonLocalBC::ParallelCopy (#2922) Make NonLocalBC::ParallelCopy accessible in namespace amrex, because it can be useful in situations other than non-local BC. --- Src/Base/AMReX_MultiFab.H | 1 + Src/Base/AMReX_NonLocalBC.H | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index 44c76d476f6..bfa377367db 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index 7613a35de5b..fd534685a7b 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -1038,4 +1038,13 @@ FillPolar (FabArray& mf, Box const& domain); #include +namespace amrex { + using NonLocalBC::ParallelCopy; + using NonLocalBC::ParallelCopy_nowait; + using NonLocalBC::ParallelCopy_finish; + using NonLocalBC::MultiBlockIndexMapping; + using NonLocalBC::MultiBlockCommMetaData; + using NonLocalBC::CommHandler; +} + #endif From 3d29fd7d0e816f3c436112d90bdefe815e0ff72a Mon Sep 17 00:00:00 2001 From: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Wed, 24 Aug 2022 16:10:22 -0400 Subject: [PATCH 053/111] Preserve neighbor particles when sorting particles. (#2923) --- Src/Particle/AMReX_ParticleContainerI.H | 32 ++++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index be7763486ab..f6fbe9afc3c 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1117,10 +1117,11 @@ ParticleContainer::So for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - auto& ptile = ParticlesAt(lev, mfi); - auto& aos = ptile.GetArrayOfStructs(); - const size_t np = aos.numParticles(); - auto pstruct_ptr = aos().dataPtr(); + auto& ptile = ParticlesAt(lev, mfi); + auto& aos = ptile.GetArrayOfStructs(); + auto pstruct_ptr = aos().dataPtr(); + const size_t np = aos.numParticles(); + const size_t np_total = np + aos.numNeighborParticles(); const Box& box = mfi.validbox(); @@ -1131,26 +1132,26 @@ ParticleContainer::So if (memEfficientSort) { { - ParticleVector tmp_particles(np); + ParticleVector tmp_particles(np_total); auto src = ptile.getParticleTileData(); ParticleType* dst = tmp_particles.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src.m_aos[inds[i]]; + dst[i] = i < np ? src.m_aos[inds[i]] : src.m_aos[i]; }); Gpu::streamSynchronize(); ptile.GetArrayOfStructs()().swap(tmp_particles); } - RealVector tmp_real(np); + RealVector tmp_real(np_total); for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) { auto src = ptile.GetStructOfArrays().GetRealData(comp).data(); ParticleReal* dst = tmp_real.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1158,13 +1159,13 @@ ParticleContainer::So ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); } - IntVector tmp_int(np); + IntVector tmp_int(np_total); for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) { auto src = ptile.GetStructOfArrays().GetIntData(comp).data(); int* dst = tmp_int.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total , i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1174,8 +1175,11 @@ ParticleContainer::So } else { ParticleTileType ptile_tmp; ptile_tmp.define(m_num_runtime_real, m_num_runtime_int); - ptile_tmp.resize(np); + ptile_tmp.resize(np_total); + // copy re-ordered particles gatherParticles(ptile_tmp, ptile, np, m_bins.permutationPtr()); + // copy neighbor particles + amrex::copyParticles(ptile_tmp, ptile, np, np, np_total-np); ptile.swap(ptile_tmp); } } From acc223f9918284e7d8e595d3861c5e456d84a968 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 30 Aug 2022 16:04:43 -0700 Subject: [PATCH 054/111] Add hypre as an option for OpenBCSolver (#2931) --- Src/LinearSolvers/OpenBC/AMReX_OpenBC.H | 5 ++++ Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp | 31 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H index d07c26a9fb3..00d589e34b4 100644 --- a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H @@ -78,6 +78,9 @@ public: const LPInfo& a_info = LPInfo()); void setVerbose (int v) noexcept; + void setBottomVerbose (int v) noexcept; + + void useHypre (bool use_hypre) noexcept; Real solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs); @@ -95,6 +98,7 @@ private: #endif int m_verbose = 0; + int m_bottom_verbose = 0; Vector m_geom; Vector m_grids; Vector m_dmap; @@ -103,6 +107,7 @@ private: std::unique_ptr m_poisson_2; std::unique_ptr m_mlmg_1; std::unique_ptr m_mlmg_2; + BottomSolver m_bottom_solver_type = BottomSolver::bicgstab; int m_coarsen_ratio = 0; Array m_dpdn; diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp index 429d4e79141..778f3ce3830 100644 --- a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp @@ -173,6 +173,22 @@ void OpenBCSolver::setVerbose (int v) noexcept m_verbose = v; } +void OpenBCSolver::setBottomVerbose (int v) noexcept +{ + m_bottom_verbose = v; +} + +void OpenBCSolver::useHypre (bool use_hypre) noexcept +{ + if (use_hypre) { + m_bottom_solver_type = BottomSolver::hypre; + m_info.setMaxCoarseningLevel(0); +#ifndef AMREX_USE_HYPRE + amrex::Abort("OpenBCSolver: Must enable Hypre support to use it."); +#endif + } +} + Real OpenBCSolver::solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs) @@ -201,6 +217,13 @@ Real OpenBCSolver::solve (const Vector& a_sol, m_mlmg_1 = std::make_unique(*m_poisson_1); m_mlmg_1->setVerbose(m_verbose); + m_mlmg_1->setBottomVerbose(m_bottom_verbose); + m_mlmg_1->setBottomSolver(m_bottom_solver_type); +#ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { + m_mlmg_1->setHypreInterface(Hypre::Interface::structed); + } +#endif } m_mlmg_1->solve(a_sol, a_rhs, a_tol_rel, a_tol_abs); @@ -289,6 +312,14 @@ Real OpenBCSolver::solve (const Vector& a_sol, m_mlmg_2 = std::make_unique(*m_poisson_2); m_mlmg_2->setVerbose(m_verbose); + m_mlmg_2->setBottomVerbose(m_bottom_verbose); + m_mlmg_2->setBottomSolver(m_bottom_solver_type); + if (m_bottom_solver_type == BottomSolver::hypre) { +#ifdef AMREX_USE_HYPRE + m_mlmg_2->setHypreInterface(Hypre::Interface::structed); +#else +#endif + } } Vector solv_all = a_sol; Vector rhsv_all = a_rhs; From cc3cd1470254d37f0cea4f212c2b0f6ffa8d0bee Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 1 Sep 2022 07:39:25 -0700 Subject: [PATCH 055/111] Update CHANGES for 22.09 (#2934) --- CHANGES | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/CHANGES b/CHANGES index 9c2657f7114..726cacffa21 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,38 @@ +# 22.09 + + -- Preserve neighbor particles when sorting particles. (#2923) + + -- Scope of NonLocalBC::ParallelCopy (#2922) + + -- Open Boundary Poisson Solver (#2912) + Add hypre as an option for OpenBCSolver (#2931) + + -- Fix OOB access of ref ratio on HDF write header (#2919) + + -- Add Polaris to GNUMake (#2908) + + -- Export GpuDevice Globals (#2918) + + -- enable LinOp to use the right Factory (fixes moving geometry problem) (#2916) + + -- Use 1 atomic instead of two per item in DenseBins::build (#2911) + + -- [SYCL] Remove amrex::oneapi and update deprecated device descriptors (#2910) + + -- Add: `MultiFab::sum_unique` (#2909) + + -- In MLMG::mgFcycle, assert that for EB the linop is cell-centered. (#2905) + + -- EB: Add Fine Levels (#2881) + + -- Add rpath to lib64 for ZFP. (#2902) + + -- change data types from double to amrex::Real, and thus we can use single precision for the hypre IJ interface (#2896) + + -- MPMD Support (#2895) + + -- MLMG interface (#2858) + # 22.08 -- Let `selectActualNeighbors` return right after starting if there are no From fb0b31e1439b089074514f45ae900af257c66dba Mon Sep 17 00:00:00 2001 From: Nuno Miguel Nobre Date: Sun, 4 Sep 2022 05:18:49 +0100 Subject: [PATCH 056/111] SYCL: Replace deprecated atomic types and operations (#2921) * SYCL: Replace deprecated atomic types and operations * Change atomic refs to device memory scope When using the relaxed memory order, the memory scope is ignored. Thus, for cosmetic reasons only, we set the memory scope to device, the broadest option when using the global address space. Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_GpuAtomic.H | 63 ++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index e6b2780abe0..a07704cb86b 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -30,15 +30,16 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; do { R const new_R = f(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&new_R)); - } while (! a.compare_exchange_strong(old_I, new_I, mo)); + } while (! a.compare_exchange_strong(old_I, new_I)); return *(reinterpret_cast(&old_I)); #else R old = *address; @@ -53,17 +54,18 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; bool test_success; do { R const tmp = op(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&tmp)); test_success = cond(tmp); - } while (test_success && ! a.compare_exchange_strong(old_I, new_I, mo)); + } while (test_success && ! a.compare_exchange_strong(old_I, new_I)); return test_success; #else R old = *address; @@ -131,9 +133,10 @@ namespace detail { return atomicAdd(sum, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(sum)}; - return a.fetch_add(value, mo); + sycl::atomic_ref a{*sum}; + return a.fetch_add(value); #else amrex::ignore_unused(sum, value); return T(); // should never get here, but have to return something @@ -313,9 +316,10 @@ namespace detail { return atomicMin(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_min(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_min(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -373,9 +377,10 @@ namespace detail { return atomicMax(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_max(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_max(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -430,9 +435,10 @@ namespace detail { return atomicOr(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_or(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_or(value); #else int const old = *m; *m = (*m) || value; @@ -451,9 +457,10 @@ namespace detail { return atomicAnd(m, value ? ~0x0 : 0); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_and(value ? ~0x0 : 0, mo); + sycl::atomic_ref a{*m}; + return a.fetch_and(value ? ~0x0 : 0); #else int const old = *m; *m = (*m) && value; @@ -472,11 +479,12 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + constexpr auto ms = sycl::memory_scope::device; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = (oldi >= value) ? 0u : (oldi+1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -509,12 +517,13 @@ namespace detail { return atomicDec(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = ((oldi == 0u) || (oldi > value)) ? value : (oldi-1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -535,9 +544,10 @@ namespace detail { return atomicExch(address, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - return sycl::atomic_exchange(a, val, mo); + sycl::atomic_ref a{*address}; + return a.exchange(val); #else auto const old = *address; *address = val; @@ -557,9 +567,10 @@ namespace detail { return atomicCAS(address, compare, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - a.compare_exchange_strong(compare, val, mo); + sycl::atomic_ref a{*address}; + a.compare_exchange_strong(compare, val); return compare; #else auto const old = *address; From 8f8198c2fb1868704d2b4d14b5b93d8d1d264ea0 Mon Sep 17 00:00:00 2001 From: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Tue, 6 Sep 2022 13:36:35 -0400 Subject: [PATCH 057/111] Check if boundary particles container has been created before clearance. (#2935) This fixes a segmentation fault when using more GPUs for updating particles than fluid. --- Src/Particle/AMReX_NeighborParticlesI.H | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index 33cc47d0d5e..202f41f87f3 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -822,8 +822,10 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) for (int lev = 0; lev < this->numLevels(); ++lev) { // clear previous neighbor particle ids - for (auto& keyval: m_boundary_particle_ids[lev]) { - keyval.second.clear(); + if (!m_boundary_particle_ids.empty()) { + for (auto& keyval: m_boundary_particle_ids[lev]) { + keyval.second.clear(); + } } for (MyParIter pti(*this, lev); pti.isValid(); ++pti) { From 35ed6b4d343215c1ccf6e4d0a59813fc236c9f22 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 6 Sep 2022 15:07:16 -0700 Subject: [PATCH 058/111] Fix: Loading Files Again (#2936) This enables that `amrex::ParmParse::addfile` can be called multiple times. Before this, we accidentially overwrite the `FILE` static keyword. Follow-up to #2842 --- Src/Base/AMReX_ParmParse.H | 2 +- Src/Base/AMReX_ParmParse.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index 6555ee5aec0..504aaa4f256 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -554,7 +554,7 @@ public: const std::string& val); //! keyword for files to load - static std::string FileKeyword; + static std::string const FileKeyword; //! Add keys and values from a file to the end of the PP table. static void addfile (std::string const filename); diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index 79e80fbb8bd..253ad0e37e0 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -34,7 +34,7 @@ static bool finalize_verbose = false; static bool finalize_verbose = true; #endif -std::string ParmParse::FileKeyword = "FILE"; +std::string const ParmParse::FileKeyword = "FILE"; // // Used by constructor to build table. @@ -609,7 +609,8 @@ addDefn (std::string& def, tab.push_back(ParmParse::PP_entry(def,val)); } val.clear(); - def = std::string(); + if ( def != ParmParse::FileKeyword ) + def = std::string(); } void @@ -991,7 +992,8 @@ ParmParse::prefixedName (const std::string& str) const void ParmParse::addfile (std::string const filename) { auto l = std::list{filename}; - addDefn(FileKeyword, + auto file = FileKeyword; + addDefn(file, l, g_table); } From 539427a19b20e49c4f7399c8ea0b0515fb5c79a0 Mon Sep 17 00:00:00 2001 From: drangara <69211175+drangara@users.noreply.github.com> Date: Tue, 6 Sep 2022 18:13:42 -0400 Subject: [PATCH 059/111] EB checkpoint files (#2897) * support for loading EB from checkpoint file * add support for writing chkpt file as well Co-authored-by: Weiqun Zhang --- Src/EB/AMReX_EB2.H | 9 + Src/EB/AMReX_EB2.cpp | 15 + Src/EB/AMReX_EB2_2D_C.cpp | 7 + Src/EB/AMReX_EB2_3D_C.cpp | 8 + Src/EB/AMReX_EB2_C.H | 8 + Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H | 47 +++ Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp | 86 ++++++ Src/EB/AMReX_EB2_Level.H | 2 + Src/EB/AMReX_EB2_Level.cpp | 11 + Src/EB/AMReX_EB2_Level_chkpt_file.H | 31 ++ Src/EB/AMReX_EB2_Level_chkpt_file.cpp | 203 +++++++++++++ Src/EB/AMReX_EB_chkpt_file.H | 60 ++++ Src/EB/AMReX_EB_chkpt_file.cpp | 324 +++++++++++++++++++++ Src/EB/CMakeLists.txt | 6 + Src/EB/Make.package | 6 + 15 files changed, 823 insertions(+) create mode 100644 Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H create mode 100644 Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp create mode 100644 Src/EB/AMReX_EB2_Level_chkpt_file.H create mode 100644 Src/EB/AMReX_EB2_Level_chkpt_file.cpp create mode 100644 Src/EB/AMReX_EB_chkpt_file.H create mode 100644 Src/EB/AMReX_EB_chkpt_file.cpp diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index 6a143bf2a9c..def8d2de9e0 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -128,6 +128,15 @@ void Build (const Geometry& geom, bool extend_domain_face = ExtendDomainFace(), int num_coarsen_opt = NumCoarsenOpt()); + +void BuildFromChkptFile (std::string const& fname, + const Geometry& geom, + int required_coarsening_level, + int max_coarsening_level, + int ngrow = 4, + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace()); + int maxCoarseningLevel (const Geometry& geom); int maxCoarseningLevel (IndexSpace const* ebis, const Geometry& geom); diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 4f2ad5bf873..fc2d75e0a01 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -239,6 +240,20 @@ void addFineLevels (int num_new_fine_levels) } } +void +BuildFromChkptFile (std::string const& fname, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face) +{ + ChkptFile chkpt_file(fname); + IndexSpace::push(new IndexSpaceChkptFile(chkpt_file, + geom, required_coarsening_level, + max_coarsening_level, ngrow, + build_coarse_level_by_coarsening, + a_extend_domain_face)); +} + namespace { static int comp_max_crse_level (Box cdomain, const Box& domain) { diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index bf17844658c..060ed8f4df4 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -391,6 +391,13 @@ void build_cells (Box const& bx, Array4 const& cell, }); } + set_connection_flags(bxg1, cell, fx, fy); +} + +void set_connection_flags (Box const& bxg1, + Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept +{ // Build neighbors. By default, all neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 0077d817ae4..8c8b1e6ed7e 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -936,6 +936,14 @@ void build_cells (Box const& bx, Array4 const& cell, return; } + set_connection_flags(bx, bxg1, cell, ctmp, fx, fy, fz); +} + +void set_connection_flags (Box const& bx, + Box const& bxg1, Array4 const& cell, + Array4 const& ctmp, Array4 const& fx, + Array4 const& fy, Array4 const& fz) noexcept +{ // Build neighbors. By default all 26 neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_C.H b/Src/EB/AMReX_EB2_C.H index 7e752f3d051..0be84fdc913 100644 --- a/Src/EB/AMReX_EB2_C.H +++ b/Src/EB/AMReX_EB2_C.H @@ -36,6 +36,9 @@ void build_cells (Box const& bx, Array4 const& cell, Real small_volfrac, Geometry const& geom, bool extend_domain_face, int& nsmallcells, int const nmulticuts) noexcept; +void set_connection_flags(Box const& bxg1, Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept; + #elif (AMREX_SPACEDIM == 3) int build_faces (Box const& bx, Array4 const& cell, @@ -67,6 +70,11 @@ void build_cells (Box const& bx, Array4 const& cell, bool extend_domain_face, bool cover_multiple_cuts, int& nsmallcells, int& nmulticuts) noexcept; +void set_connection_flags(Box const& bx, Box const& bxg1, + Array4 const& cell, Array4 const& ctmp, + Array4 const& fx, Array4 const& fy, + Array4 const& fz) noexcept; + #endif void intercept_to_edge_centroid (AMREX_D_DECL(Array4 const& excent, diff --git a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H new file mode 100644 index 00000000000..3285978744a --- /dev/null +++ b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H @@ -0,0 +1,47 @@ +#ifndef AMREX_EB2_INDEXSPACE_CHKPTFILE_H_ +#define AMREX_EB2_INDEXSPACE_CHKPTFILE_H_ +#include + +#include +#include + +#include + +namespace amrex { namespace EB2 { + +class IndexSpaceChkptFile + : public IndexSpace +{ +public: + + IndexSpaceChkptFile (const ChkptFile& chkptfile, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face); + + IndexSpaceChkptFile (IndexSpaceChkptFile const&) = delete; + IndexSpaceChkptFile (IndexSpaceChkptFile &&) = delete; + void operator= (IndexSpaceChkptFile const&) = delete; + void operator= (IndexSpaceChkptFile &&) = delete; + + virtual ~IndexSpaceChkptFile () {} + + virtual const Level& getLevel (const Geometry& geom) const final; + virtual const Geometry& getGeometry (const Box& dom) const final; + virtual const Box& coarsestDomain () const final { + return m_geom.back().Domain(); + } + virtual void addFineLevels (int num_new_fine_levels) final; + +private: + + Vector m_chkpt_file_level; + Vector m_geom; + Vector m_domain; + Vector m_ngrow; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp new file mode 100644 index 00000000000..b0318dd402c --- /dev/null +++ b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp @@ -0,0 +1,86 @@ +#include + +namespace amrex { namespace EB2 { + +IndexSpaceChkptFile::IndexSpaceChkptFile (const ChkptFile& chkpt_file, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face) +{ + Gpu::LaunchSafeGuard lsg(true); // Always use GPU + + // build finest level (i.e., level 0) first + AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); + max_coarsening_level = std::max(required_coarsening_level,max_coarsening_level); + max_coarsening_level = std::min(30,max_coarsening_level); + + int ngrow_finest = std::max(ngrow,0); + for (int i = 1; i <= required_coarsening_level; ++i) { + ngrow_finest *= 2; + } + + m_geom.push_back(geom); + m_domain.push_back(geom.Domain()); + m_ngrow.push_back(ngrow_finest); + m_chkpt_file_level.reserve(max_coarsening_level+1); + m_chkpt_file_level.emplace_back(this, chkpt_file, geom, EB2::max_grid_size, ngrow_finest, + extend_domain_face); + + for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) + { + bool coarsenable = m_geom.back().Domain().coarsenable(2,2); + if (!coarsenable) { + if (ilev <= required_coarsening_level) { + amrex::Abort("IndexSpaceImp: domain is not coarsenable at level "+std::to_string(ilev)); + } else { + break; + } + } + + int ng = (ilev > required_coarsening_level) ? 0 : m_ngrow.back()/2; + + Box cdomain = amrex::coarsen(m_geom.back().Domain(),2); + Geometry cgeom = amrex::coarsen(m_geom.back(),2); + m_chkpt_file_level.emplace_back(this, ilev, EB2::max_grid_size, ng, cgeom, m_chkpt_file_level[ilev-1]); + if (!m_chkpt_file_level.back().isOK()) { + m_chkpt_file_level.pop_back(); + if (ilev <= required_coarsening_level) { + if (build_coarse_level_by_coarsening) { + amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); + } else { + amrex::Abort("Chkptfile only stored for finest level. Failed to build "+std::to_string(ilev)); + } + } else { + break; + } + } + m_geom.push_back(cgeom); + m_domain.push_back(cdomain); + m_ngrow.push_back(ng); + } +} + +const Level& +IndexSpaceChkptFile::getLevel (const Geometry& geom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), geom.Domain()); + int i = std::distance(m_domain.begin(), it); + return m_chkpt_file_level[i]; +} + +const Geometry& +IndexSpaceChkptFile::getGeometry (const Box& dom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), dom); + int i = std::distance(m_domain.begin(), it); + return m_geom[i]; +} + +void +IndexSpaceChkptFile::addFineLevels (int /*num_new_fine_levels*/) +{ + amrex::Abort("IndexSpaceChkptFile::addFineLevels: not supported"); +} + +}} diff --git a/Src/EB/AMReX_EB2_Level.H b/Src/EB/AMReX_EB2_Level.H index c42ff2ad5bc..8ebc864b903 100644 --- a/Src/EB/AMReX_EB2_Level.H +++ b/Src/EB/AMReX_EB2_Level.H @@ -60,6 +60,8 @@ public: const Geometry& Geom () const noexcept { return m_geom; } IndexSpace const* getEBIndexSpace () const noexcept { return m_parent; } + void write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const; + protected: Level (Level && rhs) = default; diff --git a/Src/EB/AMReX_EB2_Level.cpp b/Src/EB/AMReX_EB2_Level.cpp index 46277b59ab1..09b6db4a54c 100644 --- a/Src/EB/AMReX_EB2_Level.cpp +++ b/Src/EB/AMReX_EB2_Level.cpp @@ -1,6 +1,7 @@ #include #include +#include #include namespace amrex { namespace EB2 { @@ -916,4 +917,14 @@ Level::fillLevelSet (MultiFab& levelset, const Geometry& geom) const } } +void +Level::write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const +{ + ChkptFile chkptFile(fname); + chkptFile.write_to_chkpt_file(m_grids, m_covered_grids, + m_volfrac, m_centroid, m_bndryarea, m_bndrycent, + m_bndrynorm, m_areafrac, m_facecent, m_edgecent, m_levelset, + m_geom, m_ngrow, extend_domain_face, max_grid_size); +} + }} diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.H b/Src/EB/AMReX_EB2_Level_chkpt_file.H new file mode 100644 index 00000000000..881dd8f22f0 --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.H @@ -0,0 +1,31 @@ +#ifndef AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#define AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#include + +#include +#include + +namespace amrex { namespace EB2 { + +class ChkptFileLevel + : public GShopLevel +{ +public: + + ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, const Geometry& geom, + int max_grid_size, int ngrow, bool extend_domain_face); + + ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel); + +// for cuda support + void define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, + bool extend_domain_face); + + void finalize_cell_flags (); //sets the connection flags and adjustments to cellflags +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.cpp b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp new file mode 100644 index 00000000000..0b2d88e828f --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp @@ -0,0 +1,203 @@ +#include +#include + +#include + +namespace amrex { namespace EB2 { + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, bool extend_domain_face) + : GShopLevel(is, geom) +{ + BL_PROFILE("EB2::ChkptFileLevel()-fine"); + + define_fine_chkpt_file(chkpt_file, geom, max_grid_size, ngrow, extend_domain_face); +} + +void +ChkptFileLevel::define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, + int ngrow, bool extend_domain_face) +{ + BL_PROFILE("EB2::ChkptFileLevel()-define-fine-chkptfile"); + + m_ngrow = IntVect{static_cast(std::ceil(ngrow/16.)) * 16}; + + Box const& domain = geom.Domain(); + Box domain_grown = domain; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (geom.isPeriodic(idim)) { + m_ngrow[idim] = 0; + } else { + m_ngrow[idim] = std::min(m_ngrow[idim], domain_grown.length(idim)); + } + } + + const int ng = GFab::ng; + chkpt_file.read_from_chkpt_file(m_grids, m_covered_grids, + m_dmap, m_volfrac, m_centroid, m_bndryarea, + m_bndrycent, m_bndrynorm, m_areafrac, m_facecent, + m_edgecent, m_levelset, ng, geom, m_ngrow, + extend_domain_face, max_grid_size); + + + if ( m_grids.empty() && + !m_covered_grids.empty()) + { + Abort("AMReX_EB2_Level.H: Domain is completely covered"); + } + + if (m_grids.empty()) { + m_allregular = true; + m_ok = true; + return; + } + + + m_mgf.define(m_grids, m_dmap); + MFInfo mf_info; + m_cellflag.define(m_grids, m_dmap, 1, ng, mf_info); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + + const auto& levelset = m_levelset.const_array(mfi); + const Box& bxg2 = amrex::grow(gfab.validbox(),ng); + const Box& nodal_box = amrex::surroundingNodes(bxg2); + const auto& ls = gfab.getLevelSet().array(); + + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(nodal_box, i, j, k, + { + ls(i,j,k) = levelset(i,j,k); + }); + + auto& cellflag = m_cellflag[mfi]; + gfab.buildTypes(cellflag); + } + + finalize_cell_flags(); +} + +void +ChkptFileLevel::finalize_cell_flags () +{ + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { + EBCellFlagFab cellflagtmp; + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + const Box& vbx = mfi.validbox(); + const Box& bxg1 = amrex::grow(vbx,1); + Array4 const& cell = m_cellflag.array(mfi); + + cellflagtmp.resize(m_cellflag[mfi].box()); + Elixir cellflagtmp_eli = cellflagtmp.elixir(); + Array4 const& ctmp = cellflagtmp.array(); + + auto& facetype = gfab.getFaceType(); + AMREX_D_TERM(Array4 const& fx = facetype[0].array();, + Array4 const& fy = facetype[1].array();, + Array4 const& fz = facetype[2].array();); + + + AMREX_D_TERM(Array4 const& apx = m_areafrac[0].const_array(mfi);, + Array4 const& apy = m_areafrac[1].const_array(mfi);, + Array4 const& apz = m_areafrac[2].const_array(mfi);); + + const Box& xbx = amrex::grow(amrex::surroundingNodes(vbx,0),1); + AMREX_HOST_DEVICE_FOR_3D ( xbx, i, j, k, + { + if (apx(i,j,k) == 0.0_rt) { + fx(i,j,k) = Type::covered; + } else if (apx(i,j,k) == 1.0_rt) { + fx(i,j,k) = Type::regular; + } + }); + + const Box& ybx = amrex::grow(amrex::surroundingNodes(vbx,1),1); + AMREX_HOST_DEVICE_FOR_3D ( ybx, i, j, k, + { + if (apy(i,j,k) == 0.0_rt) { + fy(i,j,k) = Type::covered; + } else if (apy(i,j,k) == 1.0_rt) { + fy(i,j,k) = Type::regular; + } + }); + + #if (AMREX_SPACEDIM == 3) + const Box& zbx = amrex::grow(amrex::surroundingNodes(vbx,2),1); + AMREX_HOST_DEVICE_FOR_3D ( zbx, i, j, k, + { + if (apz(i,j,k) == 0.0_rt) { + fz(i,j,k) = Type::covered; + } else if (apz(i,j,k) == 1.0_rt) { + fz(i,j,k) = Type::regular; + } + }); + #endif + + + #if (AMREX_SPACEDIM == 2) + ignore_unused(ctmp); + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + ignore_unused(k); + if (cell(i,j,0).isSingleValued()) { + if (fx(i,j,0) == Type::regular && fx(i+1,j,0) == Type::regular && + fy(i,j,0) == Type::regular && fy(i,j+1,0) == Type::regular) + { + cell(i,j,0).setRegular(); + } + else if (fx(i,j,0) == Type::covered && fx(i+1,j,0) == Type::covered && + fy(i,j,0) == Type::covered && fy(i,j+1,0) == Type::covered) + { + cell(i,j,0).setCovered(); + } + } + }); + + set_connection_flags(bxg1, cell, fx, fy); + + #else + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + if (cell(i,j,k).isSingleValued()) { + if (fx(i,j,k) == Type::covered && fx(i+1,j,k) == Type::covered && + fy(i,j,k) == Type::covered && fy(i,j+1,k) == Type::covered && + fz(i,j,k) == Type::covered && fz(i,j,k+1) == Type::covered) + { + cell(i,j,k).setCovered(); + } + else if (fx(i,j,k) == Type::regular && fx(i+1,j,k) == Type::regular && + fy(i,j,k) == Type::regular && fy(i,j+1,k) == Type::regular && + fz(i,j,k) == Type::regular && fz(i,j,k+1) == Type::regular) + { + cell(i,j,k).setRegular(); + } + } + }); + + set_connection_flags(vbx, bxg1, cell, ctmp, fx, fy, fz); + + #endif + + } + + m_ok = true; + } +} + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel) +: GShopLevel(is, ilev, max_grid_size, ngrow, geom, fineLevel) +{} + +}} diff --git a/Src/EB/AMReX_EB_chkpt_file.H b/Src/EB/AMReX_EB_chkpt_file.H new file mode 100644 index 00000000000..781db55a1d8 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.H @@ -0,0 +1,60 @@ +#ifndef AMREX_EB_CHKPT_FILE_H_ +#define AMREX_EB_CHKPT_FILE_H_ + +#include + +namespace amrex { namespace EB2 { + +class ChkptFile +{ +private: + std::string m_restart_file = ""; + + const std::string m_volfrac_name = "volfrac"; + const std::string m_centroid_name = "centroid"; + const std::string m_bndryarea_name = "bndryarea"; + const std::string m_bndrycent_name = "bndrycent"; + const std::string m_bndrynorm_name = "bndrynorm"; + const std::string m_levelset_name = "levelset"; + + const amrex::Vector m_areafrac_name + = {AMREX_D_DECL("areafrac_x", "areafrac_y", "areafrac_z")}; + const amrex::Vector m_facecent_name + = {AMREX_D_DECL("facecent_x", "facecent_y", "facecent_z")}; + const amrex::Vector m_edgecent_name + = {AMREX_D_DECL("edgecent_x", "edgecent_y", "edgecent_z")}; + + void writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; + + void writeToFile (const MultiFab& mf, const std::string& mf_name) const; + + +public: + ChkptFile (const std::string &fname); + + void read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, MultiFab& bndryarea, + MultiFab& bndrycent, MultiFab& bndrynorm, + Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, int max_grid_size) const; + + void write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB_chkpt_file.cpp b/Src/EB/AMReX_EB_chkpt_file.cpp new file mode 100644 index 00000000000..cd1c00e9ee5 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.cpp @@ -0,0 +1,324 @@ +#include + +#include +#include +#include // amrex::VisMF::Write(MultiFab) +#include // amrex::[read,write]IntData(array_of_ints) + +namespace { + +const std::string level_prefix = "Level_"; + +void gotoNextLine (std::istream& is) +{ + constexpr std::streamsize bl_ignore_max { 100000 }; + is.ignore(bl_ignore_max, '\n'); +} + +} + +namespace amrex { namespace EB2 { + +// Header information includes the cut and covered boxes (if any) +// Checkpoint file contains data for cut boxes +void +ChkptFile::writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, + const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + if (ParallelDescriptor::IOProcessor()) + { + std::string HeaderFileName(m_restart_file + "/Header"); + VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size); + std::ofstream HeaderFile; + + HeaderFile.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size()); + + HeaderFile.open(HeaderFileName.c_str(), std::ofstream::out | + std::ofstream::trunc | + std::ofstream::binary); + + if ( ! HeaderFile.good() ) + FileOpenFailed(HeaderFileName); + + HeaderFile.precision(17); + + HeaderFile << "Checkpoint version: 1\n"; + + const int nlevels = 1; + HeaderFile << nlevels << "\n"; + + // Geometry + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbLo(i) << ' '; + HeaderFile << '\n'; + + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbHi(i) << ' '; + HeaderFile << '\n'; + + // ngrow + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << ngrow[i] << ' '; + HeaderFile << '\n'; + + // extend domain face + HeaderFile << extend_domain_face << "\n"; + + // max grid size + HeaderFile << max_grid_size << "\n"; + + // BoxArray + for (int lev = 0; lev < nlevels; ++lev) + { + cut_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + + if (! covered_ba.empty()) { + covered_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + } + } + } +} + +void +ChkptFile::writeToFile (const MultiFab& mf, const std::string& mf_name) const +{ + VisMF::Write(mf, MultiFabFileFullPrefix(0, m_restart_file, + level_prefix, mf_name)); +} + + +ChkptFile::ChkptFile (const std::string &fname) + : m_restart_file(fname) +{} + +void +ChkptFile::read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, + MultiFab& bndryarea, MultiFab& bndrycent, + MultiFab& bndrynorm, Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, + int max_grid_size) const +{ + Real prob_lo[AMREX_SPACEDIM]; + Real prob_hi[AMREX_SPACEDIM]; + + std::string File(m_restart_file + "/Header"); + + if (amrex::Verbose()) amrex::Print() << "file=" << File << std::endl; + + VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize()); + + Vector fileCharPtr; + ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr); + std::string fileCharPtrString(fileCharPtr.dataPtr()); + std::istringstream is(fileCharPtrString, std::istringstream::in); + + std::string line, word; + + std::getline(is, line); + + int nlevs; + is >> nlevs; + gotoNextLine(is); + AMREX_ASSERT(nlevs == 1); + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_lo[i++] = std::stod(word); + } + } + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_hi[i++] = std::stod(word); + } + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_lo[idim] - geom.ProbLo()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_hi[idim] - geom.ProbHi()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + } + + IntVect ngrow_chkptfile; + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + ngrow_chkptfile[i++] = std::stoi(word); + } + } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ngrow_chkptfile == ngrow_finest, "EB2::ChkptFile cannot read from different ngrow"); + + bool edf_chkptfile; + is >> edf_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(extend_domain_face == edf_chkptfile, + "EB2::ChkptFile cannot read from different extend_domain_face"); + + int mgs_chkptfile; + is >> mgs_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(max_grid_size == mgs_chkptfile, + "EB2::ChkptFile cannot read from different max_grid_size"); + + if (amrex::Verbose()) amrex::Print() << "Loading cut_grids\n"; + cut_grids.readFrom(is); + gotoNextLine(is); + + if (is.peek() != EOF) { + if (amrex::Verbose()) amrex::Print() << "Loading covered_grids\n"; + covered_grids.readFrom(is); + gotoNextLine(is); + } + + dmap.define(cut_grids, ParallelDescriptor::NProcs()); + + // volfrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_volfrac_name << std::endl; + + volfrac.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_volfrac_name); + VisMF::Read(volfrac, prefix); + } + + // centroid + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_centroid_name << std::endl; + + centroid.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_centroid_name); + VisMF::Read(centroid, prefix); + } + + // bndryarea + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndryarea_name << std::endl; + + bndryarea.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndryarea_name); + VisMF::Read(bndryarea, prefix); + } + + // bndrycent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrycent_name << std::endl; + + bndrycent.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrycent_name); + VisMF::Read(bndrycent, prefix); + } + + // bndrynorm + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrynorm_name << std::endl; + + bndrynorm.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrynorm_name); + VisMF::Read(bndrynorm, prefix); + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + // areafrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_areafrac_name[idim] << std::endl; + + areafrac[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_areafrac_name[idim]); + VisMF::Read(areafrac[idim], prefix); + } + + // facecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_facecent_name[idim] << std::endl; + + facecent[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, AMREX_SPACEDIM-1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_facecent_name[idim]); + VisMF::Read(facecent[idim], prefix); + } + + // edgecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_edgecent_name[idim] << std::endl; + + IntVect edge_type{1}; edge_type[idim] = 0; + edgecent[idim].define(convert(cut_grids, edge_type), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_edgecent_name[idim]); + VisMF::Read(edgecent[idim], prefix); + } + } + + // levelset + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_levelset_name << std::endl; + + levelset.define(convert(cut_grids,IntVect::TheNodeVector()), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_levelset_name); + VisMF::Read(levelset, prefix); + } +} + +void +ChkptFile::write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + + if (ParallelDescriptor::IOProcessor()) { + std::cout << "\n\t Writing checkpoint " << m_restart_file << std::endl; + } + + const int nlevels = 1; + PreBuildDirectorHierarchy(m_restart_file, level_prefix, nlevels, true); + + writeHeader(cut_grids, covered_grids, geom, ngrow, extend_domain_face, max_grid_size); + + writeToFile(volfrac, m_volfrac_name); + writeToFile(centroid, m_centroid_name); + writeToFile(bndryarea, m_bndryarea_name); + writeToFile(bndrycent, m_bndrycent_name); + writeToFile(bndrynorm, m_bndrynorm_name); + writeToFile(levelset, m_levelset_name); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + writeToFile(areafrac[idim], m_areafrac_name[idim]); + writeToFile(facecent[idim], m_facecent_name[idim]); + writeToFile(edgecent[idim], m_edgecent_name[idim]); + } +} + +}} diff --git a/Src/EB/CMakeLists.txt b/Src/EB/CMakeLists.txt index 8ceb433e159..017e4d783a8 100644 --- a/Src/EB/CMakeLists.txt +++ b/Src/EB/CMakeLists.txt @@ -70,11 +70,17 @@ target_sources(amrex AMReX_EB2_${AMReX_SPACEDIM}D_C.H AMReX_EB_STL_utils.H AMReX_EB_STL_utils.cpp + AMReX_EB_chkpt_file.H + AMReX_EB_chkpt_file.cpp AMReX_EB_triGeomOps_K.H AMReX_EB2_Level_STL.H AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.H AMReX_EB2_IndexSpace_STL.cpp + AMReX_EB2_Level_chkpt_file.H + AMReX_EB2_Level_chkpt_file.cpp + AMReX_EB2_IndexSpace_chkpt_file.H + AMReX_EB2_IndexSpace_chkpt_file.cpp ) if (AMReX_SPACEDIM EQUAL 3) diff --git a/Src/EB/Make.package b/Src/EB/Make.package index 5865a2da982..b684523924f 100644 --- a/Src/EB/Make.package +++ b/Src/EB/Make.package @@ -79,6 +79,12 @@ CEXE_headers += AMReX_EB_triGeomOps_K.H CEXE_headers += AMReX_EB2_Level_STL.H AMReX_EB2_IndexSpace_STL.H CEXE_sources += AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.cpp +CEXE_sources += AMReX_EB_chkpt_file.cpp +CEXE_headers += AMReX_EB_chkpt_file.H + +CEXE_headers += AMReX_EB2_Level_chkpt_file.H AMReX_EB2_IndexSpace_chkpt_file.H +CEXE_sources += AMReX_EB2_Level_chkpt_file.cpp AMReX_EB2_IndexSpace_chkpt_file.cpp + ifeq ($(DIM),3) CEXE_sources += AMReX_WriteEBSurface.cpp AMReX_EBToPVD.cpp CEXE_headers += AMReX_WriteEBSurface.H AMReX_EBToPVD.H From 7e040166efc8208e60d8796d4d99b1dd47146ef2 Mon Sep 17 00:00:00 2001 From: Marco Garten Date: Wed, 7 Sep 2022 08:53:20 -0700 Subject: [PATCH 060/111] Update Testing Docs (#2937) - document `abort_on_unused_inputs` - remove duplicate superfluous argument in regtest call --- Docs/sphinx_documentation/source/Testing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Docs/sphinx_documentation/source/Testing.rst b/Docs/sphinx_documentation/source/Testing.rst index b7e32c9477b..bbceae1d1ad 100644 --- a/Docs/sphinx_documentation/source/Testing.rst +++ b/Docs/sphinx_documentation/source/Testing.rst @@ -18,6 +18,7 @@ application codes that use it as a framework. We use an in-house test runner scr operation, originally developed by Michael Zingale for the Castro code, and later expanded to other application codes as well. The results for each night are collected and stored on a web page; see https://ccse.lbl.gov/pub/RegressionTesting/ for the latest set of results. +The runtime option ``amrex.abort_on_unused_inputs`` (``0`` or ``1``; default is ``0`` for false) is useful for making sure that tests always stay up to date with API changes as it will abort the application after the test run if any unused input parameters were detected. Running the test suite locally ============================== @@ -73,7 +74,7 @@ re-run the script without the :cpp:`--make_benchmarks` option: :: - python regtest.py --make_benchmarks 'generating initial benchmarks' AMReX-tests.ini + python regtest.py AMReX-tests.ini The script will generate a set of html pages in the directory specified in your :cpp:`AMReX-tests.ini` file that you can examine using the browser of your choice. From 9525ea8892b9c0910acc2bf2ae8950f6068c34e5 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Sep 2022 09:13:20 -0700 Subject: [PATCH 061/111] HIP: use coarse grained host memory (#2932) --- Src/Base/AMReX_Arena.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index c14fced3872..bcab5834bf0 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -144,7 +144,7 @@ Arena::allocate_system (std::size_t nbytes) else if (arena_info.device_use_hostalloc) { AMREX_HIP_OR_CUDA_OR_DPCPP( - AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped));, + AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped|hipHostMallocNonCoherent));, AMREX_CUDA_SAFE_CALL(cudaHostAlloc(&p, nbytes, cudaHostAllocMapped));, p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())); } From 3e397bb6ba2854245a10d49a5ee37e1ba9f33f0e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Sep 2022 09:13:53 -0700 Subject: [PATCH 062/111] Link to cublas when using CUDA and Hypre (#2933) --- Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp | 5 ++--- Tools/GNUMake/packages/Make.hypre | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp index 778f3ce3830..9e320d7a55f 100644 --- a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp @@ -314,12 +314,11 @@ Real OpenBCSolver::solve (const Vector& a_sol, m_mlmg_2->setVerbose(m_verbose); m_mlmg_2->setBottomVerbose(m_bottom_verbose); m_mlmg_2->setBottomSolver(m_bottom_solver_type); - if (m_bottom_solver_type == BottomSolver::hypre) { #ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { m_mlmg_2->setHypreInterface(Hypre::Interface::structed); -#else -#endif } +#endif } Vector solv_all = a_sol; Vector rhsv_all = a_rhs; diff --git a/Tools/GNUMake/packages/Make.hypre b/Tools/GNUMake/packages/Make.hypre index 11e0690a67e..d2cc0d7c17a 100644 --- a/Tools/GNUMake/packages/Make.hypre +++ b/Tools/GNUMake/packages/Make.hypre @@ -19,5 +19,5 @@ ifdef AMREX_HYPRE_HOME endif ifeq ($(USE_CUDA),TRUE) - LIBRARIES += -lcusparse -lcurand + LIBRARIES += -lcusparse -lcurand -lcublas endif From 5b0c598cc71a5e914bfc4dbb7ea44313d45c8f57 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Sep 2022 09:42:57 -0700 Subject: [PATCH 063/111] Fix a warning in packing communication send buffer (#2940) When we communication double precision data in single precision, there is a conversion from double to float in packing the send buffer. A static cast is added to fix the warning. --- Src/Base/AMReX_FBI.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_FBI.H b/Src/Base/AMReX_FBI.H index 61ef452b601..cc0bfeecbce 100644 --- a/Src/Base/AMReX_FBI.H +++ b/Src/Base/AMReX_FBI.H @@ -924,7 +924,7 @@ FabArray::pack_send_buffer_cpu (FabArray const& src, int scomp, int nc amrex::LoopConcurrentOnCpu( bx, ncomp, [=] (int ii, int jj, int kk, int n) noexcept { - pfab(ii,jj,kk,n) = sfab(ii,jj,kk,n+scomp); + pfab(ii,jj,kk,n) = static_cast(sfab(ii,jj,kk,n+scomp)); }); dptr += (bx.numPts() * ncomp * sizeof(BUF)); } From 67384701a808ca973ad2c24ec86cee4c7a81fd05 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Sep 2022 14:12:34 -0700 Subject: [PATCH 064/111] Changes for Cray & Clang (#2941) * It seems that the new Cray compilers no longer define `_CRAYC`. However it does define `__cray__`. * For Clang based Cray compilers, use -O3 instead of -O2 for optimization. * Clang's vectorization pragma is very aggressive. For some codes, it makes ParallelFor with many if statements on CPU much slower than without vectorization. Unfortunately, it does not have an ivdep pragma. So we disable AMREX_PRAGMA for clang for safety. * No longer need to use -Wno-pass-failed for Clang based compilers. --- .github/workflows/hip.yml | 4 ++-- .github/workflows/macos.yml | 4 ++-- Src/Base/AMReX_Extension.H | 4 ++-- Tools/CMake/AMReXFlagsTargets.cmake | 18 +++++++++--------- Tools/CMake/AMReXParallelBackends.cmake | 2 +- Tools/CMake/AMReXSYCL.cmake | 2 +- Tools/GNUMake/comps/armclang.mak | 2 +- Tools/GNUMake/comps/cray.mak | 10 +++++----- Tools/GNUMake/comps/dpcpp.mak | 2 -- Tools/GNUMake/comps/hip.mak | 2 -- Tools/GNUMake/comps/llvm.mak | 2 +- 11 files changed, 24 insertions(+), 28 deletions(-) diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index a128eabf664..0672287437c 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -20,7 +20,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -66,7 +66,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e1446a038da..61eb9b9ccdb 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -14,7 +14,7 @@ jobs: env: # build universal binaries for M1 "Apple Silicon" and Intel CPUs CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 @@ -39,7 +39,7 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest env: - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 diff --git a/Src/Base/AMReX_Extension.H b/Src/Base/AMReX_Extension.H index a084777f1a0..753b43995f3 100644 --- a/Src/Base/AMReX_Extension.H +++ b/Src/Base/AMReX_Extension.H @@ -57,7 +57,7 @@ #elif defined(__INTEL_COMPILER) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") -#elif defined(_CRAYC) +#elif defined(_CRAYC) || defined(__cray__) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") #elif defined(__PGI) @@ -73,7 +73,7 @@ #define AMREX_PRAGMA_SIMD _Pragma("ibm independent_loop") #elif defined(__clang__) -#define AMREX_PRAGMA_SIMD _Pragma("clang loop vectorize(enable)") +#define AMREX_PRAGMA_SIMD #elif defined(__GNUC__) #define AMREX_PRAGMA_SIMD _Pragma("GCC ivdep") diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 64dcf3f3a5f..2e89c32fddc 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -82,15 +82,15 @@ target_compile_options( Flags_CXX $<${_cxx_cray_dbg}:-O0> $<${_cxx_cray_rwdbg}:> $<${_cxx_cray_rel}:> - $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_clang_rwdbg}:-Wno-pass-failed> - $<${_cxx_clang_rel}:-Wno-pass-failed> - $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_appleclang_rwdbg}:-Wno-pass-failed> - $<${_cxx_appleclang_rel}:-Wno-pass-failed> - $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_intelllvm_rwdbg}:-Wno-pass-failed> - $<${_cxx_intelllvm_rel}:-Wno-pass-failed> + $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_clang_rwdbg}:> + $<${_cxx_clang_rel}:> + $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_appleclang_rwdbg}:> + $<${_cxx_appleclang_rel}:> + $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_intelllvm_rwdbg}:> + $<${_cxx_intelllvm_rel}:> ) # diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index ebf397266f8..b249d28ef60 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -271,7 +271,7 @@ if (AMReX_HIP) # else there will be a runtime issue (cannot find # missing gpu devices) target_compile_options(amrex PUBLIC - $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC} -Wno-pass-failed>) + $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC}>) endif() target_compile_options(amrex PUBLIC $<$:-m64>) diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 8e6c7f2f4d5..007b5f321fe 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -45,7 +45,7 @@ target_compile_features(SYCL INTERFACE cxx_std_17) # target_compile_options( SYCL INTERFACE - $<${_cxx_dpcpp}:-Wno-error=sycl-strict -Wno-pass-failed -fsycl> + $<${_cxx_dpcpp}:-Wno-error=sycl-strict -fsycl> $<${_cxx_dpcpp}:$<$:-fsycl-device-code-split=per_kernel>>) # temporary work-around for DPC++ beta08 bug diff --git a/Tools/GNUMake/comps/armclang.mak b/Tools/GNUMake/comps/armclang.mak index efe4a718106..ccbfbeb77ed 100644 --- a/Tools/GNUMake/comps/armclang.mak +++ b/Tools/GNUMake/comps/armclang.mak @@ -57,7 +57,7 @@ ifeq ($(WARN_ERROR),TRUE) endif # disable some warnings -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions +CXXFLAGS += -Wno-c++17-extensions ######################################################################## diff --git a/Tools/GNUMake/comps/cray.mak b/Tools/GNUMake/comps/cray.mak index 85a1133e412..f75a56c5f75 100644 --- a/Tools/GNUMake/comps/cray.mak +++ b/Tools/GNUMake/comps/cray.mak @@ -53,10 +53,10 @@ else # CCE <= 8. So we adjust some flags to achieve similar optimization. See # this page: # http://pubs.cray.com/content/S-5212/9.0/cray-compiling-environment-cce-release-overview/cce-900-software-enhancements - CXXFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile - CFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile - FFLAGS += -O2 -h list=a - F90FLAGS += -O2 -h list=a + CXXFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile + CFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile + FFLAGS += -O3 -h list=a + F90FLAGS += -O3 -h list=a else GENERIC_COMP_FLAGS += -h list=a @@ -120,7 +120,7 @@ else endif ifeq ($(CRAY_IS_CLANG_BASED),TRUE) - CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions + CXXFLAGS += -Wno-c++17-extensions endif CXXFLAGS += $(GENERIC_COMP_FLAGS) diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index d2f7f72108e..b351f0ac731 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -36,8 +36,6 @@ else endif -CXXFLAGS += -Wno-pass-failed # disable this warning - ifeq ($(WARN_ALL),TRUE) warning_flags = -Wall -Wextra -Wno-sign-compare -Wunreachable-code -Wnull-dereference warning_flags += -Wfloat-conversion -Wextra-semi diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak index d94f8f3c66f..d86f887e087 100644 --- a/Tools/GNUMake/comps/hip.mak +++ b/Tools/GNUMake/comps/hip.mak @@ -86,8 +86,6 @@ ifeq ($(HIP_COMPILER),clang) endif - CXXFLAGS += -Wno-pass-failed # disable this warning - ifeq ($(WARN_ALL),TRUE) warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference warning_flags += -Wfloat-conversion -Wextra-semi diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak index 2bf710c0d94..ad516e0799d 100644 --- a/Tools/GNUMake/comps/llvm.mak +++ b/Tools/GNUMake/comps/llvm.mak @@ -60,7 +60,7 @@ ifeq ($(WARN_ERROR),TRUE) endif # disable some warnings -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions +CXXFLAGS += -Wno-c++17-extensions ######################################################################## From bfbd68f4ed31ca07572be9bf138a59cacb7e800c Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 12 Sep 2022 11:40:55 -0700 Subject: [PATCH 065/111] Fix: Make Finalize->Initialize->F->I->... Work (#2944) Fix assertions in Arena::Initialize. The_BArena never dies (tm) Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_Arena.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index bcab5834bf0..19f4f6f5459 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -253,12 +253,13 @@ Arena::Initialize () if (initialized) return; initialized = true; - BL_ASSERT(the_arena == nullptr); + // see reason on allowed reuse of the default CPU BArena in Arena::Finalize + BL_ASSERT(the_arena == nullptr || the_arena == The_BArena()); BL_ASSERT(the_async_arena == nullptr); - BL_ASSERT(the_device_arena == nullptr); - BL_ASSERT(the_managed_arena == nullptr); + BL_ASSERT(the_device_arena == nullptr || the_device_arena == The_BArena()); + BL_ASSERT(the_managed_arena == nullptr || the_managed_arena == The_BArena()); BL_ASSERT(the_pinned_arena == nullptr); - BL_ASSERT(the_cpu_arena == nullptr); + BL_ASSERT(the_cpu_arena == nullptr || the_cpu_arena == The_BArena()); #ifdef AMREX_USE_GPU #ifdef AMREX_USE_DPCPP @@ -468,6 +469,13 @@ Arena::Finalize () initialized = false; + // we reset Arenas unless they are the default "CPU malloc/free" BArena + // this is because we want to allow users to free their UB objects + // that they forgot to destruct after amrex::Finalize(): + // amrex::Initialize(...); + // MultiFab mf(...); // this should be scoped in { ... } + // amrex::Finalize(); + // mf cannot be used now, but it can at least be freed without a segfault if (!dynamic_cast(the_device_arena)) { if (the_device_arena != the_arena) { delete the_device_arena; From 9aa23c202a13eee489a06030b9aeda6b89856944 Mon Sep 17 00:00:00 2001 From: Cody Balos Date: Mon, 12 Sep 2022 11:49:37 -0700 Subject: [PATCH 066/111] Fix minor typo in fcompare docs (#2945) --- Docs/sphinx_documentation/source/Post_Processing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Docs/sphinx_documentation/source/Post_Processing.rst b/Docs/sphinx_documentation/source/Post_Processing.rst index c2cce7fd7b2..fd707f221db 100644 --- a/Docs/sphinx_documentation/source/Post_Processing.rst +++ b/Docs/sphinx_documentation/source/Post_Processing.rst @@ -76,8 +76,8 @@ variable. **How to build and run** -In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fextract.gnu.ex`` to run. -Typing ``./fextract.gnu.ex`` without inputs will bring up usage and options. +In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fcompare.gnu.ex`` to run. +Typing ``./fcompare.gnu.ex`` without inputs will bring up usage and options. **Example** From 0351c9958be7fdc7e3e0c419fc68d36a0c00f288 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 14 Sep 2022 08:48:25 -0700 Subject: [PATCH 067/111] CMake: HIP_PATH from ROCM_PATH (#2948) * On machines like Crusher, `ROCM_PATH` is more likely to be available then a `HIP_PATH` environment variable. This is mainly needed for our hacky ROCTX hints. * ROCTX: New Include Supposedly, there is a new include we shall use: Ref.: https://github.com/ROCm-Developer-Tools/roctracer/issues/79 * ROCtracer: Include as System library Because of GNU extensions in the roctracer include files for the legacy include. But we should make this `-isystem` anyway to be robust for the future. The 5.2 deprecated include file `` throws warnings because they rely on GNU extensions: ``` In file included from /opt/rocm/hip/../roctracer/include/ext/prof_protocol.h:27: /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:70:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:70:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:75:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:82:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:86:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:90:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:82:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:86:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:90:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ ``` * GNUmake: Update Includes in `hip.mak` Use public prefix. --- .github/workflows/hip.yml | 2 ++ Src/Base/AMReX_GpuDevice.cpp | 4 ++-- Src/Base/AMReX_TinyProfiler.H | 2 +- Tools/CMake/AMReXParallelBackends.cmake | 20 ++++++++++++++------ Tools/GNUMake/comps/hip.mak | 13 ++++++------- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 0672287437c..47d9a89828e 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -47,6 +47,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which flang) \ @@ -91,6 +92,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which hipcc) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index e0ab64b76e3..fe7257ea971 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -22,9 +22,9 @@ #if defined(AMREX_USE_HIP) #include #if defined(AMREX_USE_ROCTX) -#include +#include #if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) -#include +#include #endif #endif #endif diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 677b4448d3b..57c9ea0479c 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -10,7 +10,7 @@ #endif #if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) -#include +#include #endif #include diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index b249d28ef60..61b563f7c51 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -198,10 +198,12 @@ if (AMReX_HIP) unset(_valid_hip_compilers) if(NOT DEFINED HIP_PATH) - if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") - else() + if(DEFINED ENV{HIP_PATH}) set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + elseif(DEFINED ENV{ROCM_PATH}) + set(HIP_PATH "$ENV{ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") endif() endif() @@ -255,9 +257,15 @@ if (AMReX_HIP) if(AMReX_ROCTX) # To be modernized in the future, please see: # https://github.com/ROCm-Developer-Tools/roctracer/issues/56 - target_include_directories(amrex PUBLIC ${HIP_PATH}/../roctracer/include ${HIP_PATH}/../rocprofiler/include) - target_link_libraries(amrex PUBLIC "-L${HIP_PATH}/../roctracer/lib/ -lroctracer64" "-L${HIP_PATH}/../roctracer/lib -lroctx64") - endif () + target_include_directories(amrex SYSTEM PUBLIC + ${HIP_PATH}/../roctracer/include + ${HIP_PATH}/../rocprofiler/include + ) + target_link_libraries(amrex PUBLIC + "-L${HIP_PATH}/../roctracer/lib -lroctracer64" + "-L${HIP_PATH}/../roctracer/lib -lroctx64" + ) + endif() target_link_libraries(amrex PUBLIC hip::hiprand roc::rocrand roc::rocprim) # avoid forcing the rocm LLVM flags on a gfortran diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak index d86f887e087..bb4c6d98a17 100644 --- a/Tools/GNUMake/comps/hip.mak +++ b/Tools/GNUMake/comps/hip.mak @@ -107,7 +107,7 @@ ifeq ($(HIP_COMPILER),clang) # Generic HIP info ROC_PATH=$(realpath $(dir $(HIP_PATH))) - SYSTEM_INCLUDE_LOCATIONS += $(HIP_PATH)/include + SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include $(HIP_PATH)/include # rocRand SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/hiprand $(ROC_PATH)/include/rocrand @@ -120,13 +120,12 @@ ifeq ($(HIP_COMPILER),clang) # rocThrust - Header only # SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/rocthrust - ifeq ($(USE_ROCTX),TRUE) # rocTracer - CXXFLAGS += -DAMREX_USE_ROCTX - HIPCC_FLAGS += -DAMREX_USE_ROCTX - SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/roctracer $(ROC_PATH)/include/rocprofiler - LIBRARY_LOCATIONS += $(ROC_PATH)/lib - LIBRARIES += -lroctracer64 -lroctx64 + ifeq ($(USE_ROCTX),TRUE) + CXXFLAGS += -DAMREX_USE_ROCTX + HIPCC_FLAGS += -DAMREX_USE_ROCTX + LIBRARY_LOCATIONS += $(ROC_PATH)/lib + LIBRARIES += -Wl,--rpath=$(ROC_PATH)/lib -lroctracer64 -lroctx64 endif # hipcc passes a lot of unused arguments to clang From 17c94cc196d779e9f7ec48f7d004088a1c1e11c6 Mon Sep 17 00:00:00 2001 From: Candace Gilet Date: Wed, 14 Sep 2022 11:49:35 -0400 Subject: [PATCH 068/111] Correct MultiFab::norm0 doxygen brief description (#2946) --- Src/Base/AMReX_MultiFab.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index bfa377367db..70e6facaee7 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -191,7 +191,7 @@ public: /** * \brief Returns the maximum *absolute* values contained in - * each component of "comps" of the MultiFab. No ghost cells are used. + * each component of "comps" of the MultiFab. "nghost" ghost cells are used. */ Vector norm0 (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false ) const; Vector norminf (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false) const { From 2cdb9df08e4668bbc9a9b6560217514518f41573 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 15 Sep 2022 10:55:41 -0700 Subject: [PATCH 069/111] Byte spread fixes (#2949) --- Src/Particle/AMReX_ParticleContainer.H | 4 +-- Src/Particle/AMReX_ParticleContainerI.H | 34 +++++++++++++++---------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index a0a96852862..7a940d505b4 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -524,9 +524,9 @@ public: */ bool OK (int lev_min = 0, int lev_max = -1, int nGrow = 0) const; - void ByteSpread () const; + std::array ByteSpread () const; - void PrintCapacity () const; + std::array PrintCapacity () const; void ShrinkToFit (); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index f6fbe9afc3c..d05141fe87c 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -480,8 +480,9 @@ ParticleContainer::Nu template class Allocator> -void -ParticleContainer::ByteSpread () const +std::array +ParticleContainer +::ByteSpread () const { Long cnt = 0; @@ -496,7 +497,7 @@ ParticleContainer::By Long mn = cnt, mx = mn; const int IOProc = ParallelContext::IOProcessorNumberSub(); - const std::size_t sz = sizeof(ParticleType)+NumRealComps()*sizeof(Real)+NumIntComps()*sizeof(int); + const Long sz = sizeof(ParticleType)+NumRealComps()*sizeof(ParticleReal)+NumIntComps()*sizeof(int); #ifdef AMREX_LAZY Lazy::QueueReduction( [=] () mutable { @@ -505,22 +506,27 @@ ParticleContainer::By ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes (num particles): [Min: " << mn*sz << " (" << mn << ")" - << " ... " + << ", Max: " << mx*sz << " (" << mx << ")" - << "] total particles: (" << cnt << ")\n"; + << ", Total: " + << cnt*sz + << " (" << cnt << ")]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn*sz, mx*sz, cnt*sz}; } template class Allocator> -void -ParticleContainer::PrintCapacity () const +std::array +ParticleContainer +::PrintCapacity () const { Long cnt = 0; @@ -543,16 +549,18 @@ ParticleContainer::Pr ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes: [Min: " << mn - << " (" << mn << ")" - << " ... " + << ", Max: " << mx - << " (" << mx << ")" - << "] total memory: (" << cnt << ")\n"; + << ", Total: " + << cnt + << "]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn, mx, cnt}; } template Date: Thu, 15 Sep 2022 13:23:40 -0700 Subject: [PATCH 070/111] Add template parameter to ParallelFor and launch specifying block size (#2947) By default, amrex::ParallelFor launches AMREX_GPU_MAX_THREADS threads per block. We can now explicitly specfiy the block size with `ParallelFor(...)`, where BLOCK_SIZE should be a multiple of the warp size (e.g., 64, 128, etc.). A similar change has also been made to `launch`. The changes are backward compatible. --- Docs/sphinx_documentation/source/GPU.rst | 11 +- Src/Base/AMReX_GpuLaunch.H | 30 +- Src/Base/AMReX_GpuLaunchFunctsC.H | 504 ++++++++++++++++- Src/Base/AMReX_GpuLaunchFunctsG.H | 657 ++++++++++++++++++++--- Src/Base/AMReX_GpuLaunchMacrosG.H | 12 +- Src/Base/AMReX_Reduce.H | 3 +- 6 files changed, 1115 insertions(+), 102 deletions(-) diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 4101c806be2..5e6e153f6e0 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -1001,7 +1001,7 @@ launch function. ``amrex::ParallelFor()`` expands into different variations of a quadruply-nested :cpp:`for` loop depending dimensionality and whether it is being implemented on CPU or GPU. -The best way to understand this macro is to take a look at the 4D :cpp:`amrex::ParallelFor` +The best way to understand this function is to take a look at the 4D :cpp:`amrex::ParallelFor` that is implemented when ``USE_CUDA=FALSE``. A simplified version is reproduced here: .. highlight:: c++ @@ -1103,6 +1103,15 @@ bounds, a :cpp:`long` or :cpp:`int` number of elements is passed to bound the si passing the number of elements to work on and indexing the pointer to the starting element: :cpp:`p[idx + 15]`. +GPU block size +-------------- + +By default, :cpp:`ParallelFor` launches ``AMREX_GPU_MAX_THREADS`` threads +per GPU block, where ``AMREX_GPU_MAX_THREADS`` is a compile-time constant +with a default value of 256. The users can also explcitly specify the +number of threads per block by :cpp:`ParallelFor(...)`, where +``MY_BLOCK_SIZE`` is a multiple of the warp size (e.g., 128). This allows +the users to do performance tuning for individual kernels. Launching general kernels ------------------------- diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index d31bae568c1..d1a9e352336 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -30,11 +30,11 @@ #define AMREX_GPU_Z_STRIDE 1 #ifdef AMREX_USE_CUDA -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - amrex::launch_global<<>>(__VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + amrex::launch_global<<>>(__VA_ARGS__) #elif defined(AMREX_USE_HIP) -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__) #endif @@ -151,6 +151,28 @@ namespace Gpu { dim3 numThreads; std::size_t sharedMem = 0; }; + + template + ExecutionConfig + makeExecutionConfig (Long N) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(N,Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } + + template + ExecutionConfig + makeExecutionConfig (const Box& box) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(box.numPts(),Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } #endif } diff --git a/Src/Base/AMReX_GpuLaunchFunctsC.H b/Src/Base/AMReX_GpuLaunchFunctsC.H index 025b43fec0a..6ce9cca0f3a 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsC.H +++ b/Src/Base/AMReX_GpuLaunchFunctsC.H @@ -55,11 +55,18 @@ namespace detail { } template -void launch (T const& n, L&& f, std::size_t /*shared_mem_bytes*/=0) noexcept +void launch (T const& n, L&& f) noexcept { f(n); } +template +void launch (T const& n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + f(n); +} + template ::value> > void For (T n, L&& f) noexcept { @@ -68,12 +75,26 @@ void For (T n, L&& f) noexcept } } +template ::value> > +void For (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void ParallelFor (T n, L&& f) noexcept { @@ -83,12 +104,26 @@ void ParallelFor (T n, L&& f) noexcept } } +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template void For (Box const& box, L&& f) noexcept { @@ -101,12 +136,26 @@ void For (Box const& box, L&& f) noexcept }}} } +template +void For (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box, std::forward(f)); } +template +void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void ParallelFor (Box const& box, L&& f) noexcept { @@ -120,12 +169,26 @@ void ParallelFor (Box const& box, L&& f) noexcept }}} } +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box, std::forward(f)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { @@ -140,12 +203,26 @@ void For (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box, ncomp, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { @@ -161,12 +238,26 @@ void ParallelFor (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box, ncomp, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -174,12 +265,27 @@ void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept For(box2, std::forward(f2)); } +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For (box1, box2, std::forward(f1), std::forward(f2)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For (box1, box2, std::forward(f1), std::forward(f2)); +} + template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -188,12 +294,28 @@ void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L For(box3, std::forward(f3)); } +template +void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); + For(box3, std::forward(f3)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -204,6 +326,17 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -214,6 +347,17 @@ void For (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -227,6 +371,20 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); + For(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -241,6 +399,21 @@ void For (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -248,12 +421,27 @@ void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept ParallelFor(box2, std::forward(f2)); } +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,f1,f2); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,f1,f2); +} + template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -262,12 +450,28 @@ void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2 ParallelFor(box3, std::forward(f3)); } +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); + ParallelFor(box3, std::forward(f3)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -278,6 +482,17 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -289,6 +504,18 @@ void ParallelFor (Gpu::KernelInfo const&, box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -302,6 +529,20 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); + ParallelFor(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -316,30 +557,73 @@ void ParallelFor (Gpu::KernelInfo const&, box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -347,6 +631,14 @@ void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -356,6 +648,16 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -369,30 +671,72 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -400,6 +744,14 @@ void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -409,6 +761,16 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -422,30 +784,72 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -454,6 +858,15 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -464,6 +877,17 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -478,30 +902,73 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -510,6 +977,15 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -520,6 +996,17 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -534,6 +1021,21 @@ void HostDeviceFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void ParallelForRNG (T n, L&& f) noexcept { diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index 12206f69b70..5f7c067935c 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -64,11 +64,24 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe } } -template +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + launch(nblocks, MT, shared_mem_bytes, stream, std::forward(f)); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + launch(nblocks, MT, stream, std::forward(f)); +} + +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -139,11 +152,11 @@ namespace detail { } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -186,7 +199,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -195,7 +208,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -250,7 +263,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -259,7 +272,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) n const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -437,7 +450,7 @@ void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) return; @@ -452,7 +465,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -491,7 +504,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -513,7 +526,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -561,7 +574,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value> > void ParallelFor (Gpu::KernelInfo const& /*info*/, @@ -580,7 +593,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -623,7 +636,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -649,7 +662,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -709,16 +722,34 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, template void single_task (gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(1, 1, 0, stream, + AMREX_LAUNCH_KERNEL(Gpu::Device::warp_size, 1, 1, 0, stream, [=] AMREX_GPU_DEVICE () noexcept {f();}); AMREX_GPU_ERROR_CHECK(); } +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, shared_mem_bytes, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, 0, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + template void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes, gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(nblocks, nthreads_per_block, shared_mem_bytes, + AMREX_ASSERT(nthreads_per_block <= AMREX_GPU_MAX_THREADS); + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, nblocks, nthreads_per_block, shared_mem_bytes, stream, [=] AMREX_GPU_DEVICE () noexcept { f(); }); AMREX_GPU_ERROR_CHECK(); } @@ -729,12 +760,12 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe launch(nblocks, nthreads_per_block, 0, stream, std::forward(f)); } -template +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (auto const i : Gpu::Range(n)) { f(i); @@ -793,13 +824,13 @@ namespace detail { } } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (T i = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; i < n; i += stride) { @@ -809,7 +840,7 @@ ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { @@ -819,8 +850,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) @@ -837,7 +868,7 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { @@ -847,8 +878,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -871,7 +902,8 @@ ParallelForRNG (T n, L&& f) noexcept if (amrex::isEmpty(n)) return; randState_t* rand_state = getRandState(); const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -896,7 +928,8 @@ ParallelForRNG (Box const& box, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -927,7 +960,8 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -948,7 +982,7 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept @@ -965,8 +999,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -993,7 +1027,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -1016,8 +1050,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1053,7 +1087,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value> @@ -1073,8 +1107,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1105,7 +1139,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -1132,8 +1166,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1183,29 +1217,127 @@ void single_task (L&& f) noexcept single_task(Gpu::gpuStream(), std::forward(f)); } +template +void launch (T const& n, L&& f) noexcept +{ + launch(n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n, std::forward(f)); +} + +template +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info, box, ncomp, std::forward(f)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info, box1, box2, std::forward(f1), + std::forward(f2)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info, box1, box2, box3, std::forward(f1), + std::forward(f2), std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - ParallelFor(info, n,std::forward(f)); + ParallelFor(info, n,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); +} + +template +void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box,std::forward(f)); } template ::value> > void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(info,box,ncomp,std::forward(f)); + ParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info,box,ncomp,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1213,7 +1345,15 @@ void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1244,32 +1409,63 @@ void For (Gpu::KernelInfo const& info, template ::value> > void ParallelFor (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template void ParallelFor (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); +} + +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); } template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1298,32 +1517,63 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, template ::value> > void For (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); +} + +template ::value> > +void For (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); } template void For (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); +} + +template +void For (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); } template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1354,7 +1627,19 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,n,std::forward(f)); + ParallelFor(info,n,std::forward(f)); + } else { + AMREX_PRAGMA_SIMD + for (T i = 0; i < n; ++i) f(i); + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,n,std::forward(f)); } else { AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) f(i); @@ -1365,7 +1650,14 @@ template ::value> HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template @@ -1373,7 +1665,18 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); + } else { + LoopConcurrentOnCpu(box,std::forward(f)); + } +} + +template +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,std::forward(f)); } else { LoopConcurrentOnCpu(box,std::forward(f)); } @@ -1384,7 +1687,18 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,ncomp,std::forward(f)); + ParallelFor(info, box,ncomp,std::forward(f)); + } else { + LoopConcurrentOnCpu(box,ncomp,std::forward(f)); + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,ncomp,std::forward(f)); } else { LoopConcurrentOnCpu(box,ncomp,std::forward(f)); } @@ -1396,21 +1710,34 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } else { LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); } } -template +template +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + } else { + LoopConcurrentOnCpu(box1,std::forward(f1)); + LoopConcurrentOnCpu(box2,std::forward(f2)); + } +} + +template std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,box3, + ParallelFor(info,box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } else { LoopConcurrentOnCpu(box1,std::forward(f1)); @@ -1428,7 +1755,23 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box2, T2 ncomp2, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + } else { + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); + } +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } else { LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); @@ -1446,7 +1789,29 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box3, T3 ncomp3, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, + ParallelFor(info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); + } else { + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); + LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); + } +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1460,26 +1825,51 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - HostDeviceParallelFor(info,n,std::forward(f)); + HostDeviceParallelFor(info,n,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + HostDeviceParallelFor(info,n,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - HostDeviceParallelFor(info,box,std::forward(f)); + HostDeviceParallelFor(info,box,std::forward(f)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,std::forward(f)); } template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(info,box,ncomp,std::forward(f)); + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1487,7 +1877,16 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(info, box1,box2,box3, + HostDeviceParallelFor(info, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1498,7 +1897,17 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, T1 ncomp1, L1&& f1, Box const& box2, T2 ncomp2, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1519,32 +1943,64 @@ void HostDeviceFor (Gpu::KernelInfo const& info, template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); } template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); +} + +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); } template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1554,7 +2010,16 @@ template (f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); diff --git a/Src/Base/AMReX_GpuLaunchMacrosG.H b/Src/Base/AMReX_GpuLaunchMacrosG.H index 89aa1f24bc9..b45076cdab6 100644 --- a/Src/Base/AMReX_GpuLaunchMacrosG.H +++ b/Src/Base/AMReX_GpuLaunchMacrosG.H @@ -40,7 +40,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -111,7 +111,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -202,7 +202,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -269,7 +269,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -333,7 +333,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -410,7 +410,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 05b56b97fa9..d9c886412a7 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -935,7 +935,8 @@ bool AnyOf (Box const& box, P&& pred) } }); #else - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, ec.numBlocks, ec.numThreads, 0, + Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { __shared__ int has_any; if (threadIdx.x == 0) has_any = *dp; From 826cd378f8ba0d844c64e1029f7914c3b066debd Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Thu, 15 Sep 2022 17:26:00 -0700 Subject: [PATCH 071/111] Add roundoff_lo corresponding to roundoff_hi for domains that don't start at 0 (#2950) * Lay groundwork for roundoff_lo * Add dummy implementation of roundoff_lo computation * implement bisect_prob_lo * change idx -> dxinv * use rlo instead of plo in locateParticle Co-authored-by: atmyers --- Src/Base/AMReX_Geometry.H | 47 ++++++++++++++++++++----- Src/Base/AMReX_Geometry.cpp | 20 ++++++----- Src/Particle/AMReX_ParticleContainerI.H | 18 +++++----- Src/Particle/AMReX_ParticleUtil.H | 10 ++++-- 4 files changed, 67 insertions(+), 28 deletions(-) diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 4238793861d..0e0a49f540e 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -69,22 +69,46 @@ public: namespace detail { template - T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real idx, int ilo, int ihi, amrex::Real tol) { + T bisect_prob_lo (amrex::Real plo, amrex::Real /*phi*/, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T lo = static_cast(plo + tol); + bool safe; + { + int i = int(Math::floor((lo - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return lo; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T hi = static_cast(plo + 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + + template + T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { T hi = static_cast(phi - tol); bool safe; { - int i = int(Math::floor((hi - plo)*idx)) + ilo; + int i = int(Math::floor((hi - plo)*dxinv)) + ilo; safe = i >= ilo && i <= ihi; } if (safe) { return hi; } else { // bisect the point at which the cell no longer maps to inside the domain - T lo = static_cast(phi - 0.5_rt/idx); + T lo = static_cast(phi - 0.5_rt/dxinv); T mid = bisect(lo, hi, [=] AMREX_GPU_HOST_DEVICE (T x) -> T { - int i = int(Math::floor((x - plo)*idx)) + ilo; + int i = int(Math::floor((x - plo)*dxinv)) + ilo; bool inside = i >= ilo && i <= ihi; return static_cast(inside) - T(0.5); }, static_cast(tol)); @@ -217,6 +241,13 @@ public: return {{AMREX_D_DECL(prob_domain.hi(0),prob_domain.hi(1),prob_domain.hi(2))}}; } + GpuArray ProbLoArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_lo_f; +#else + return roundoff_lo_d; +#endif + } GpuArray ProbHiArrayInParticleReal () const noexcept { #ifdef AMREX_SINGLE_PRECISION_PARTICLES return roundoff_hi_f; @@ -454,11 +485,11 @@ private: RealBox prob_domain; // Due to round-off errors, not all floating point numbers for which plo >= x < phi - // will map to a cell that is inside "domain". "roundoff_hi_d" and "roundoff_hi_f" each store - // a phi that is very close to that in prob_domain, and for which all doubles and floats less than + // will map to a cell that is inside "domain". "roundoff_{lo,hi}_{f,d}" each store + // a position that is very close to that in prob_domain, and for which all doubles and floats less than // it will map to a cell inside domain. - GpuArray roundoff_hi_d; - GpuArray roundoff_hi_f; + GpuArray roundoff_lo_d, roundoff_hi_d; + GpuArray roundoff_lo_f, roundoff_hi_f; // Box domain; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 1457db6b8d1..2f80f2eb947 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -512,14 +512,16 @@ Geometry::computeRoundoffDomain () int ihi = Domain().bigEnd(idim); Real plo = ProbLo(idim); Real phi = ProbHi(idim); - Real idx = InvCellSize(idim); + Real dxinv = InvCellSize(idim); Real deltax = CellSize(idim); Real ftol = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); Real dtol = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); - roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, idx, ilo, ihi, ftol); - roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, idx, ilo, ihi, dtol); + roundoff_lo_f[idim] = detail::bisect_prob_lo (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_lo_d[idim] = detail::bisect_prob_lo(plo, phi, dxinv, ilo, ihi, dtol); + roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, dxinv, ilo, ihi, dtol); } } @@ -527,18 +529,18 @@ bool Geometry::outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + bool outside = AMREX_D_TERM(x < roundoff_lo_f[0] || x >= roundoff_hi_f[0], - || y < prob_domain.lo(1) + || y < roundoff_lo_f[1] || y >= roundoff_hi_f[1], - || z < prob_domain.lo(2) + || z < roundoff_lo_f[2] || z >= roundoff_hi_f[2]); #else - bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + bool outside = AMREX_D_TERM(x < roundoff_lo_d[0] || x >= roundoff_hi_d[0], - || y < prob_domain.lo(1) + || y < roundoff_lo_d[1] || y >= roundoff_hi_d[1], - || z < prob_domain.lo(2) + || z < roundoff_lo_d[2] || z >= roundoff_hi_d[2]); #endif return outside; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index d05141fe87c..0ce02bbd225 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -239,10 +239,11 @@ ParticleContainer const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); const auto phi = geom.ProbHiArray(); + const auto rlo = geom.ProbLoArrayInParticleReal(); const auto rhi = geom.ProbHiArrayInParticleReal(); const auto is_per = geom.isPeriodicArray(); - return enforcePeriodic(p, plo, phi, rhi, is_per); + return enforcePeriodic(p, plo, phi, rlo, rhi, is_per); } template ::lo { if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) { - RealBox prob_domain = Geom(0).ProbDomain(); - GpuArray phi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rhi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rlo = Geom(0).ProbLoArrayInParticleReal(); for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { - if (p.pos(idim) <= prob_domain.lo(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) prob_domain.lo(idim), phi[idim]); + if (p.pos(idim) <= rlo[idim]) { + p.pos(idim) = std::nextafter(rlo[idim], rhi[idim]); } - if (p.pos(idim) >= phi[idim]) { - p.pos(idim) = std::nextafter(phi[idim], (ParticleReal) prob_domain.lo(idim)); + if (p.pos(idim) >= rhi[idim]) { + p.pos(idim) = std::nextafter(rhi[idim], rlo[idim]); } } @@ -1250,6 +1251,7 @@ ParticleContainer Vector > new_sizes(num_levels); const auto plo = Geom(0).ProbLoArray(); const auto phi = Geom(0).ProbHiArray(); + const auto rlo = Geom(0).ProbLoArrayInParticleReal(); const auto rhi = Geom(0).ProbHiArrayInParticleReal(); const auto is_per = Geom(0).isPeriodicArray(); for (int lev = lev_min; lev <= finest_lev_particles; ++lev) @@ -1271,7 +1273,7 @@ ParticleContainer "perhaps particles have not been initialized correctly?"); int num_stay = partitionParticlesByDest(src_tile, assign_grid, BufferMap(), - plo, phi, rhi, is_per, lev, gid, tid, + plo, phi, rlo, rhi, is_per, lev, gid, tid, lev_min, lev_max, nGrow, remove_negative); int num_move = np - num_stay; diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 6623f353749..ee59cac67d0 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -517,6 +517,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic (P& p, amrex::GpuArray const& plo, amrex::GpuArray const& phi, + amrex::GpuArray const& rlo, amrex::GpuArray const& rhi, amrex::GpuArray const& is_per) noexcept { @@ -529,7 +530,9 @@ bool enforcePeriodic (P& p, p.pos(idim) -= static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) < plo[idim]) p.pos(idim) = static_cast(plo[idim]); + if (p.pos(idim) < rlo[idim]) { + p.pos(idim) = rlo[idim]; + } shifted = true; } else if (p.pos(idim) < plo[idim]) { @@ -538,7 +541,7 @@ bool enforcePeriodic (P& p, } // clamp to avoid precision issues; if (p.pos(idim) > rhi[idim]) { - p.pos(idim) = static_cast(rhi[idim]); + p.pos(idim) = rhi[idim]; } shifted = true; } @@ -555,6 +558,7 @@ int partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBufferMap& pmap, const GpuArray& plo, const GpuArray& phi, + const GpuArray& rlo, const GpuArray& rhi, const GpuArray& is_per, int lev, int gid, int /*tid*/, @@ -602,7 +606,7 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff else { auto p_prime = p; - enforcePeriodic(p_prime, plo, phi, rhi, is_per); + enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup_prime); assigned_lev = amrex::get<1>(tup_prime); From a6e0c11989d34b976245db5719eedd0e9040f264 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 20 Sep 2022 10:01:21 -0700 Subject: [PATCH 072/111] Add more warnings (#2956) * Add -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches to gcc. * Add -Wnon-virtual-dtor to clang. * Add more warnings to CI. * Fix some non-virtual dtors and some other warnings. --- .github/workflows/clang.yml | 4 ++-- .github/workflows/cuda.yml | 4 ++-- .github/workflows/gcc.yml | 16 ++++++++-------- .github/workflows/hip.yml | 4 ++-- .github/workflows/intel.yml | 2 +- .github/workflows/macos.yml | 4 ++-- Src/AmrCore/AMReX_ErrorList.H | 4 ++++ Src/EB/AMReX_distFcnElement.H | 6 +++--- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp | 12 ++++++------ .../MLMG/AMReX_MLNodeLaplacian_misc.cpp | 8 ++++++++ Tools/GNUMake/comps/gnu.mak | 10 +++++++--- Tools/GNUMake/comps/llvm.mak | 2 +- 12 files changed, 46 insertions(+), 30 deletions(-) diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index bdd629ce11f..79bbf1947b7 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -14,7 +14,7 @@ jobs: library_clang: name: Clang@6.0 C++14 SP NOMPI Debug [lib] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wnon-virtual-dtor"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -50,7 +50,7 @@ jobs: tests_clang: name: Clang@6.0 C++14 SP Particles DP Mesh Debug [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1 -Wnon-virtual-dtor"} # It's too slow with -O0 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index c5fbceb5d7e..6e080d8a848 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -11,7 +11,7 @@ jobs: tests-cuda10: name: CUDA@10.2 GNU@6.5.0 C++14 Release [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -42,7 +42,7 @@ jobs: tests-cuda11: name: CUDA@11.2 GNU@9.3.0 C++17 Release [tests] runs-on: ubuntu-20.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 188d7d32f95..5ee581b4fef 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -15,7 +15,7 @@ jobs: library: name: GNU@7.5 C++17 Release [lib] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -43,7 +43,7 @@ jobs: tests_build_3D: name: GNU@7.5 C++14 3D Debug Fortran [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - uses: actions/checkout@v2 @@ -66,7 +66,7 @@ jobs: tests_build_2D: name: GNU@7.5 C++14 2D Debug Fortran [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - uses: actions/checkout@v2 @@ -89,7 +89,7 @@ jobs: tests_build_1D: name: GNU@7.5 C++14 1D Debug Fortran [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # -Werror temporarily skipped until we have functional testing established # It's too slow with -O0 steps: @@ -114,7 +114,7 @@ jobs: tests_cxx20: name: GNU@10.1 C++20 OMP [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -147,7 +147,7 @@ jobs: tests-nonmpi: name: GNU@7.5 C++14 NOMPI [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -176,7 +176,7 @@ jobs: tests-nofortran: name: GNU@7.5 C++14 w/o Fortran [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -274,7 +274,7 @@ jobs: tests_run: name: GNU@7.5 C++14 [tests] runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 47d9a89828e..a487d27bf9c 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -20,7 +20,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies @@ -67,7 +67,7 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6fef4fc0459..80ae98cd2f1 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -11,7 +11,7 @@ jobs: name: DPCPP GFortran@7.5 C++17 [tests] runs-on: ubuntu-20.04 # mkl/rng/device/detail/mrg32k3a_impl.hpp has a number of sign-compare error - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-sign-compare"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-sign-compare"} steps: - uses: actions/checkout@v2 - name: Dependencies diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 61eb9b9ccdb..67db29cdcd8 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -14,7 +14,7 @@ jobs: env: # build universal binaries for M1 "Apple Silicon" and Intel CPUs CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 @@ -39,7 +39,7 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest env: - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - uses: actions/checkout@v2 diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H index 8cf67ea5567..1cc8d61fd07 100644 --- a/Src/AmrCore/AMReX_ErrorList.H +++ b/Src/AmrCore/AMReX_ErrorList.H @@ -420,6 +420,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); struct UserFunc { + virtual ~UserFunc () {} + virtual void operator() (const amrex::Box& bx, amrex::Array4 const& dat, amrex::Array4 const& tag, @@ -470,6 +472,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); const AMRErrorTagInfo& info = AMRErrorTagInfo()) noexcept : m_userfunc(userfunc), m_field(field), m_info(info), m_ngrow(ngrow) {} + virtual ~AMRErrorTag () {} + virtual void operator() (amrex::TagBoxArray& tb, const amrex::MultiFab* mf, char clearval, diff --git a/Src/EB/AMReX_distFcnElement.H b/Src/EB/AMReX_distFcnElement.H index f839bdb5747..2a9c7a0c2f4 100644 --- a/Src/EB/AMReX_distFcnElement.H +++ b/Src/EB/AMReX_distFcnElement.H @@ -12,7 +12,7 @@ class distFcnElement2d { public: //! Constructor distFcnElement2d() {} - ~distFcnElement2d() {} + virtual ~distFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const = 0; @@ -29,7 +29,7 @@ class distFcnElement2d { class LineDistFcnElement2d: public distFcnElement2d { public: LineDistFcnElement2d() {} - ~LineDistFcnElement2d() {} + virtual ~LineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; @@ -58,7 +58,7 @@ class LineDistFcnElement2d: public distFcnElement2d { class SplineDistFcnElement2d: public distFcnElement2d { public: SplineDistFcnElement2d() {} - ~SplineDistFcnElement2d() {} + virtual ~SplineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp index 247e0fb292e..590e062a3a1 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp @@ -598,12 +598,12 @@ MLEBTensorOp::compVelGrad (int amrlev, const Array& fl } - else if ( loc==Location::FaceCenter ) - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } +// else if ( loc==Location::FaceCenter ) +// { +// +// amrex::Abort("compVelGrad not yet implemented for cut-cells "); +// +// } else // loc==Location::FaceCentroid { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp index df5ab489d2f..339ca98e072 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp @@ -26,7 +26,11 @@ MLNodeLaplacian::averageDownCoeffs () { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { +#if (AMREX_SPACEDIM == 1) + int ndims = 1; +#else int ndims = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int idim = 0; idim < ndims; ++idim) { if (m_sigma[amrlev][mglev][idim] == nullptr) { @@ -101,7 +105,11 @@ MLNodeLaplacian::averageDownCoeffsSameAmrLevel (int amrlev) if (m_coarsening_strategy != CoarseningStrategy::Sigma) return; +#if (AMREX_SPACEDIM == 1) + const int nsigma = 1; +#else const int nsigma = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { diff --git a/Tools/GNUMake/comps/gnu.mak b/Tools/GNUMake/comps/gnu.mak index 10510f30a8d..5e621eb140e 100644 --- a/Tools/GNUMake/comps/gnu.mak +++ b/Tools/GNUMake/comps/gnu.mak @@ -97,7 +97,7 @@ else endif ifeq ($(WARN_ALL),TRUE) - warning_flags = -Wall -Wextra + warning_flags = -Wall -Wextra -Wlogical-op ifeq ($(WARN_SIGN_COMPARE),FALSE) warning_flags += -Wno-sign-compare @@ -109,7 +109,7 @@ ifeq ($(WARN_ALL),TRUE) endif ifeq ($(gcc_major_ge_6),1) - warning_flags += -Wnull-dereference + warning_flags += -Wnull-dereference -Wmisleading-indentation -Wduplicated-cond endif ifeq ($(gcc_major_ge_5),1) @@ -124,11 +124,15 @@ ifeq ($(WARN_ALL),TRUE) warning_flags += -Wno-array-bounds endif + ifeq ($(gcc_major_ge7),1) + warning_flags += -Wduplicated-branches + endif + ifeq ($(gcc_major_ge10),1) warning_flags += -Wextra-semi endif - CXXFLAGS += $(warning_flags) -Woverloaded-virtual + CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor CFLAGS += $(warning_flags) endif diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak index ad516e0799d..86da5884b7f 100644 --- a/Tools/GNUMake/comps/llvm.mak +++ b/Tools/GNUMake/comps/llvm.mak @@ -50,7 +50,7 @@ ifeq ($(WARN_ALL),TRUE) warning_flags += -Wshadow endif - CXXFLAGS += $(warning_flags) -Woverloaded-virtual + CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor CFLAGS += $(warning_flags) endif From 3e5cc778028030ecb06bb079c5a6045f8f5fba6e Mon Sep 17 00:00:00 2001 From: "Don E. Willcox" Date: Tue, 20 Sep 2022 17:59:48 -0700 Subject: [PATCH 073/111] add option for makebuildsources to specify the style arguments for 'git describe'. (#2957) --- Tools/C_scripts/makebuildinfo_C.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Tools/C_scripts/makebuildinfo_C.py b/Tools/C_scripts/makebuildinfo_C.py index 8a05cd3f75d..07f31c0585a 100755 --- a/Tools/C_scripts/makebuildinfo_C.py +++ b/Tools/C_scripts/makebuildinfo_C.py @@ -186,11 +186,11 @@ def runcommand(command): out = p.stdout.read() return out.strip().decode("ascii") -def get_git_hash(d): +def get_git_hash(d, git_style): cwd = os.getcwd() os.chdir(d) try: - ghash = runcommand("git describe --always --tags --dirty") + ghash = runcommand("git describe " + git_style) except: ghash = "" os.chdir(cwd) @@ -259,6 +259,10 @@ def get_git_hash(d): help="the full path to the build directory that corresponds to build_git_name", type=str, default="") + parser.add_argument("--GIT_STYLE", + help="style options for the 'git describe' command used to construct hash strings", + type=str, default="--always --tags --dirty") + # parse and convert to a dictionary args = parser.parse_args() @@ -281,7 +285,7 @@ def get_git_hash(d): git_hashes = [] for d in GIT: if d and os.path.isdir(d): - git_hashes.append(get_git_hash(d)) + git_hashes.append(get_git_hash(d, args.GIT_STYLE)) else: git_hashes.append("") @@ -291,7 +295,7 @@ def get_git_hash(d): except: build_git_hash = "directory not valid" else: - build_git_hash = get_git_hash(args.build_git_dir) + build_git_hash = get_git_hash(args.build_git_dir, args.GIT_STYLE) os.chdir(running_dir) else: build_git_hash = "" From c4b7982d067497cc97ccb501ec08720b404d957e Mon Sep 17 00:00:00 2001 From: Luca Fedeli Date: Fri, 23 Sep 2022 21:17:12 +0200 Subject: [PATCH 074/111] Add GPU-compatible upper bound and lower bound algorithms to AMReX_Algorithm (#2958) --- Src/Base/AMReX_Algorithm.H | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index b5a5f4973c7..18c9b59b28b 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -157,6 +157,57 @@ namespace amrex return hi; } + template + AMREX_GPU_HOST_DEVICE + ItType upper_bound (ItType first, ItType last, const ValType& val) + { +#if AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0){ + auto it = first; + const auto step = count/2; + it += step; + if (!(val < *it)){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::upper_bound(first, last, val); +#endif + } + + template + AMREX_GPU_HOST_DEVICE + ItType lower_bound (ItType first, ItType last, const ValType& val) + { +#ifdef AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0) + { + auto it = first; + const auto step = count/2; + it += step; + if (*it < val){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::lower_bound(first, last, val); +#endif + } + namespace detail { struct clzll_tag {}; From 27ef10654c4810fc7cfc0f941a3eec67b018bf34 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 23 Sep 2022 12:23:34 -0700 Subject: [PATCH 075/111] Quartic interpolation for cell centered data (#2960) New Interpolator for interpolation of cell centered data using a fourth-degreee polynomial. Note that the interpolation is not conservative and does not do any slope limiting. --- Src/AmrCore/AMReX_Interp_C.H | 48 +++++++++++++++ Src/AmrCore/AMReX_Interpolater.H | 69 ++++++++++++++++++++++ Src/AmrCore/AMReX_Interpolater.cpp | 93 ++++++++++++++++++++++++++++++ 3 files changed, 210 insertions(+) diff --git a/Src/AmrCore/AMReX_Interp_C.H b/Src/AmrCore/AMReX_Interp_C.H index e12c4495fde..967d3aaa177 100644 --- a/Src/AmrCore/AMReX_Interp_C.H +++ b/Src/AmrCore/AMReX_Interp_C.H @@ -135,5 +135,53 @@ face_linear_interp_z (int i, int j, int k, int n, amrex::Array4 con } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_x (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int ii = amrex::coarsen(i,2); + int s = 2*(i-ii*2) - 1; // if i == ii*2, s = -1; if i == ii*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(ii-2,j,k,n) + + c( -s)*crse(ii-1,j,k,n) + + c( 0)*crse(ii ,j,k,n) + + c( s)*crse(ii+1,j,k,n) + + c( 2*s)*crse(ii+2,j,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_y (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int jj = amrex::coarsen(j,2); + int s = 2*(j-jj*2) - 1; // if j == jj*2, s = -1; if j == jj*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,jj-2,k,n) + + c( -s)*crse(i,jj-1,k,n) + + c( 0)*crse(i,jj ,k,n) + + c( s)*crse(i,jj+1,k,n) + + c( 2*s)*crse(i,jj+2,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_z (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int kk = amrex::coarsen(k,2); + int s = 2*(k-kk*2) - 1; // if k == kk*2, s = -1; if k == kk*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,j,kk-2,n) + + c( -s)*crse(i,j,kk-1,n) + + c( 0)*crse(i,j,kk ,n) + + c( s)*crse(i,j,kk+1,n) + + c( 2*s)*crse(i,j,kk+2,n); +} + } #endif diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index 06398b73097..bdb6cf9d46b 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -844,6 +844,74 @@ public: }; +/** +* \brief Quartic interpolation on cell centered data. +* +* Quartic interpolation on cell centered data. +*/ + +class CellQuartic + : + public Interpolater +{ +public: + + /** + * \brief The constructor. + */ + explicit CellQuartic (); + + /** + * \brief The destructor. + */ + virtual ~CellQuartic () override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, int ratio) override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, const IntVect& ratio) override; + + /** + * \brief Coarse to fine interpolation in space. + * + * \param crse + * \param crse_comp + * \param fine + * \param fine_comp + * \param ncomp + * \param fine_region + * \param ratio + * \param crse_geom + * \param fine_geom + * \param bcr + * \param actual_comp + * \param actual_state + */ + virtual void interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& crse_geom, + const Geometry& fine_geom, + Vector const& bcr, + int actual_comp, + int actual_state, + RunOn gpu_or_cpu) override; +}; //! CONSTRUCT A GLOBAL OBJECT OF EACH VERSION. extern AMREX_EXPORT PCInterp pc_interp; @@ -856,6 +924,7 @@ extern AMREX_EXPORT CellBilinear cell_bilinear_interp; extern AMREX_EXPORT CellConservativeProtected protected_interp; extern AMREX_EXPORT CellConservativeQuartic quartic_interp; extern AMREX_EXPORT CellQuadratic quadratic_interp; +extern AMREX_EXPORT CellQuartic cell_quartic_interp; } diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index a78eac89aa0..8042aa2f322 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -18,6 +18,8 @@ namespace amrex { * * CellQuadratic only works in 2D and 3D on cpu and gpu. * + * CellQuartic works in 1D, 2D and 3D on cpu and gpu with ref ratio of 2 + * * CellConservativeQuartic only works with ref ratio of 2 on cpu and gpu. * * FaceDivFree works in 2D and 3D on cpu and gpu. @@ -37,6 +39,7 @@ CellConservativeProtected protected_interp; CellConservativeQuartic quartic_interp; CellBilinear cell_bilinear_interp; CellQuadratic quadratic_interp; +CellQuartic cell_quartic_interp; NodeBilinear::~NodeBilinear () {} @@ -988,4 +991,94 @@ FaceDivFree::interp_arr (Array const& crse, }); } +CellQuartic::CellQuartic () {} + +CellQuartic::~CellQuartic () {} + +Box +CellQuartic::CoarseBox (const Box& fine, const IntVect& ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +Box +CellQuartic::CoarseBox (const Box& fine, int ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +void +CellQuartic::interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& /*crse_geom*/, + const Geometry& /*fine_geom*/, + Vector const& /*bcr*/, + int /* actual_comp */, + int /* actual_state */, + RunOn runon) +{ + BL_PROFILE("CellQuartic::interp()"); + amrex::ignore_unused(ratio); + AMREX_ASSERT(ratio == 2); + + Box target_fine_region = fine_region & fine.box(); + + bool run_on_gpu = (runon == RunOn::Gpu && Gpu::inLaunchRegion()); + amrex::ignore_unused(run_on_gpu); + + Array4 const& crsearr = crse.const_array(crse_comp); + Array4 const& finearr = fine.array(fine_comp); + +#if (AMREX_SPACEDIM == 3) + Box bz = amrex::coarsen(target_fine_region, IntVect(2,2,1)); + bz.grow(IntVect(2,2,0)); + FArrayBox tmpz(bz, ncomp); + Elixir tmpz_eli; + if (run_on_gpu) tmpz_eli = tmpz.elixir(); + Array4 const& tmpzarr = tmpz.array(); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, bz, ncomp, i, j, k, n, + { + cell_quartic_interp_z(i,j,k,n,tmpzarr,crsearr); + }); +#endif + +#if (AMREX_SPACEDIM >= 2) + Box by = amrex::coarsen(target_fine_region, IntVect(AMREX_D_DECL(2,1,1))); + by.grow(IntVect(AMREX_D_DECL(2,0,0))); + FArrayBox tmpy(by, ncomp); + Elixir tmpy_eli; + if (run_on_gpu) tmpy_eli = tmpy.elixir(); + Array4 const& tmpyarr = tmpy.array(); +#if (AMREX_SPACEDIM == 2) + Array4 srcarr = crsearr; +#else + Array4 srcarr = tmpz.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, by, ncomp, i, j, k, n, + { + cell_quartic_interp_y(i,j,k,n,tmpyarr,srcarr); + }); +#endif + +#if (AMREX_SPACEDIM == 1) + Array4 srcarr = crsearr; +#else + srcarr = tmpy.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, target_fine_region, ncomp, + i, j, k, n, + { + cell_quartic_interp_x(i,j,k,n,finearr,srcarr); + }); +} + } From 2a3cc05dac916961b1a5ae4c18b21bacd889e7fc Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 23 Sep 2022 12:24:05 -0700 Subject: [PATCH 076/111] CellData: data in a single cell (#2959) This adds struct CellData that allows for accessing data in a single cell in Array4. This is convenient sometimes because one can omit the i, j and k indices. It might also be faster sometimes because it can skip the repeated index calculation involving i,j,k. --- Src/Base/AMReX_Array4.H | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Src/Base/AMReX_Array4.H b/Src/Base/AMReX_Array4.H index 0fc4c049437..296762614d3 100644 --- a/Src/Base/AMReX_Array4.H +++ b/Src/Base/AMReX_Array4.H @@ -11,6 +11,50 @@ namespace amrex { + template + struct CellData // Data in a single cell + { + T* AMREX_RESTRICT p = nullptr; + Long stride = 0; + int ncomp = 0; + + AMREX_GPU_HOST_DEVICE + constexpr CellData (T* a_p, Long a_stride, int a_ncomp) + : p(a_p), stride(a_stride), ncomp(a_ncomp) + {} + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE + constexpr CellData (CellData::type> const& rhs) noexcept + : p(rhs.p), stride(rhs.stride), ncomp(rhs.ncomp) + {} + + AMREX_GPU_HOST_DEVICE + explicit operator bool() const noexcept { return p != nullptr; } + + AMREX_GPU_HOST_DEVICE + int nComp() const noexcept { return ncomp; } + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + U& operator[] (int n) const noexcept { +#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) + if (n < 0 || n >= ncomp) { +#if AMREX_DEVICE_COMPILE + AMREX_DEVICE_PRINTF(" %d is out of bound (0:%d)", n, ncomp-1); +#else + std::stringstream ss; + ss << " " << n << " is out of bound: (0:" << ncomp-1 << ")"; + amrex::Abort(ss.str()); +#endif + } +#endif + return p[n*stride]; + } + }; + template struct Array4 { @@ -207,6 +251,11 @@ namespace amrex { } } #endif + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + CellData cellData (int i, int j, int k) const noexcept { + return CellData{this->ptr(i,j,k), nstride, ncomp}; + } }; template From 8b367b0071787f8688d6f7eac55f7be251de6841 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 25 Sep 2022 09:22:13 -0700 Subject: [PATCH 077/111] Volume weighted sum (#2961) Add a new function doing volume weighted sum across AMR levels. This may not be exactly what amrex application codes want. But it should work for many cases. --- Src/Base/AMReX_MultiFabUtil.H | 12 +++ Src/Base/AMReX_MultiFabUtil.cpp | 154 ++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 1444bb90484..009f7a8f110 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -231,6 +231,18 @@ namespace amrex */ Gpu::HostVector sumToLine (MultiFab const& mf, int icomp, int ncomp, Box const& domain, int direction, bool local = false); + + /** \brief Volume weighted sum for a vector of MultiFabs + * + * Return a volume weighted sum of MultiFabs of AMR data. The sum is + * perform on a single component of the data. If the MultiFabs are + * built with EB Factories, the cut cell volume fraction will be + * included in the weight. + */ + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local = false); } namespace amrex { diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 26a7242e89d..6b0768ba649 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -1226,4 +1226,158 @@ namespace amrex } return hv; } + + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local) + { + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + +#ifdef AMREX_USE_EB + bool has_eb = !(mf[0]->isAllRegular()); +#endif + + int nlevels = mf.size(); + for (int ilev = 0; ilev < nlevels-1; ++ilev) { + iMultiFab mask = makeFineMask(*mf[ilev], *mf[ilev+1], IntVect(0), + ratio[ilev],Periodicity::NonPeriodic(), + 0, 1); + auto const& m = mask.const_arrays(); + auto const& a = mf[ilev]->const_arrays(); + auto const dx = geom[ilev].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf[ilev]->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf[ilev]->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[ilev].IsSPHERICAL()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[ilev].IsRZ()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#endif + { + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp); + }); + } + } + Gpu::streamSynchronize(); + } + + auto const& a = mf.back()->const_arrays(); + auto const dx = geom[nlevels-1].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf.back()->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf.back()->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[nlevels-1].IsSPHERICAL()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[nlevels-1].IsRZ()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + }); + } else +#endif + { + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + return dv*a[box_no](i,j,k,icomp); + }); + } + } + + auto const& hv = reduce_data.value(reduce_op); + Real r = amrex::get<0>(hv); + + if (!local) { + ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub()); + } + return r; + } } From 5e84f43241edfec7754d3ebfc369154bf249d992 Mon Sep 17 00:00:00 2001 From: asalmgren Date: Sun, 25 Sep 2022 09:38:51 -0700 Subject: [PATCH 078/111] make tagging routines EB_aware (#2962) --- Src/AmrCore/AMReX_ErrorList.cpp | 220 ++++++++++++++++++++++++------ Src/Base/AMReX_VisMF.H | 2 - Src/Particle/AMReX_ParticleInit.H | 4 - 3 files changed, 180 insertions(+), 46 deletions(-) diff --git a/Src/AmrCore/AMReX_ErrorList.cpp b/Src/AmrCore/AMReX_ErrorList.cpp index 7f37324123a..6dcb5565227 100644 --- a/Src/AmrCore/AMReX_ErrorList.cpp +++ b/Src/AmrCore/AMReX_ErrorList.cpp @@ -300,78 +300,218 @@ AMRErrorTag::operator() (TagBoxArray& tba, if (m_test == GRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = 0.; + ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold) { tagma[bi](i,j,k) = tag_update;} + if (ax >= threshold) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = 0.; + ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { - tagma[bi](i,j,k) = tag_update; - } -#endif - }); + Real az = 0.; + az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == RELGRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tag_update;} + if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) - >= threshold * amrex::Math::abs(dat(i,j,k))) { - tagma[bi](i,j,k) = tag_update; - } -#endif - }); + Real az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == LESS) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol <= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; if (datma[bi](i,j,k) * vol <= threshold) { tagma[bi](i,j,k) = tag_update; } }); + } } else if (m_test == GREATER) { +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; - if (datma[bi](i,j,k) * vol >= threshold) { - tagma[bi](i,j,k) = tag_update; - } + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } }); } else if (m_test == VORT) { const Real fac = threshold * Real(std::pow(2,level)); - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { - if (datma[bi](i,j,k) >= fac) { - tagma[bi](i,j,k) = tag_update; - } - }); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + }); + } } else { diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H index 12777a08307..bfab54abf8d 100644 --- a/Src/Base/AMReX_VisMF.H +++ b/Src/Base/AMReX_VisMF.H @@ -638,7 +638,6 @@ Read (FabArray& fa, const std::string& name) } int totalioreqs = nboxes; - int messtotal = 0; int reqspending = 0; int iopfileindex; std::deque iopreads; @@ -669,7 +668,6 @@ Read (FabArray& fa, const std::string& name) } } else { ParallelDescriptor::Send(vreads, tryproc, readtag); - ++messtotal; ++reqspending; } availablefiles.erase(afilesiter); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index ee8afc778e2..03320383ada 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -1022,8 +1022,6 @@ InitRandom (Long icount, ParticleLocData pld; - int cnt = 0; - Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); host_particles.resize(finestLevel()+1); @@ -1079,8 +1077,6 @@ InitRandom (Long icount, for (int i = 0; i < NArrayInt; i++) { host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]); } - - cnt++; } } From b84d7c069cef7470f195b250926ca0e84ec46fb2 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 26 Sep 2022 16:05:10 -0700 Subject: [PATCH 079/111] Fix MLEBNodeFDLaplacian bottom solver (#2963) MLEBNodeFDLaplacian is never singular because it has Dirichlet boundary on the EB surface. We did set the singular flag to false, but forgot about the bottom solver used a different function to query. This fixes it by overriding the isBottomSingular function. --- Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H | 1 + 1 file changed, 1 insertion(+) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 0d294c9da8c..41190f229a8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -100,6 +100,7 @@ public: virtual void fixUpResidualMask (int amrlev, iMultiFab& resmsk) final override; virtual bool isSingular (int) const final override { return false; } + virtual bool isBottomSingular () const final override { return false; } virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location /*loc*/) const override; From e55d6b4f5375efb22ebed9b467878e301763073b Mon Sep 17 00:00:00 2001 From: Junghyeon Park Date: Thu, 29 Sep 2022 01:20:15 +0900 Subject: [PATCH 080/111] Update the SWFFT project site (#2965) --- Docs/sphinx_documentation/source/SWFFT.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Docs/sphinx_documentation/source/SWFFT.rst b/Docs/sphinx_documentation/source/SWFFT.rst index 3e886dcc2a8..9e6192ff048 100644 --- a/Docs/sphinx_documentation/source/SWFFT.rst +++ b/Docs/sphinx_documentation/source/SWFFT.rst @@ -98,7 +98,7 @@ AMReX contains two SWFFT tutorials, `SWFFT Poisson`_ and `SWFFT Simple`_: .. _`SWFFT Simple`: https://amrex-codes.github.io/amrex/tutorials_html/SWFFT_Tutorial.html#swfft-simple .. [1] - https://xgitlab.cels.anl.gov/hacc/SWFFT + https://git.cels.anl.gov/hacc/SWFFT .. [2] SWFFT source code directory in AMReX: amrex/Src/Extern/SWFFT From cd07b0d84244d08cf2690a19e0312f349ec0aeaa Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 28 Sep 2022 09:20:42 -0700 Subject: [PATCH 081/111] Fix int overflow in amrex::bisect (#2964) Change from (lo+hi)/2 to lo+(hi-lo)/2. Although it's very unlikely, it's possible (lo+hi), where both lo and hi are integers, could overflow. --- Src/Base/AMReX_Algorithm.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index 18c9b59b28b..65a5f8cb763 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -145,7 +145,7 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE I bisect (T const* d, I lo, I hi, T const& v) { while (lo <= hi) { - int mid = (lo+hi)/2; + int mid = lo + (hi-lo)/2; if (v >= d[mid] && v < d[mid+1]) { return mid; } else if (v < d[mid]) { From d65e09e4a85dd2765a8cbe0ac9eba6223c47121b Mon Sep 17 00:00:00 2001 From: Roberto Porcu <53792251+rporcu@users.noreply.github.com> Date: Thu, 29 Sep 2022 15:46:19 -0400 Subject: [PATCH 082/111] Solve an issue with particles async IO when having runtime added variables (#2966) --- Src/Particle/AMReX_WriteBinaryParticleData.H | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 70fcbfda237..d07e86ac0b2 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -766,7 +766,25 @@ void WriteBinaryParticleDataAsync (PC const& pc, if (np_per_grid_local[lev][mfi.index()] > 0) { const auto& ptile = pc.ParticlesAt(lev, mfi); - new_ptile.resize(np_per_grid_local[lev][mfi.index()]); + + const auto np = np_per_grid_local[lev][mfi.index()]; + + new_ptile.resize(np); + + const auto runtime_real_comps = ptile.NumRuntimeRealComps(); + const auto runtime_int_comps = ptile.NumRuntimeIntComps(); + + constexpr auto NReal = NArrayReal + NStructReal; + constexpr auto NInt = NArrayInt + NStructInt; + + new_ptile.define(runtime_real_comps, runtime_int_comps); + + for (auto comp(0); comp < runtime_real_comps; ++comp) + new_ptile.push_back_real(NReal+comp, np, 0.); + + for (auto comp(0); comp < runtime_int_comps; ++comp) + new_ptile.push_back_int(NInt+comp, np, 0); + amrex::filterParticles(new_ptile, ptile, KeepValidFilter()); } } From 62379fbac96867437070c4852d3d741a76dc1a4b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 30 Sep 2022 15:37:35 -0700 Subject: [PATCH 083/111] Update CHANGES for 22.10 (#2968) --- CHANGES | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGES b/CHANGES index 726cacffa21..a9ab0555a58 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,47 @@ +# 22.10 + + -- Solve an issue with particles async IO when having runtime added variables (#2966) + + -- Fix int overflow in amrex::bisect (#2964) + + -- Fix MLEBNodeFDLaplacian bottom solver (#2963) + + -- make tagging routines EB_aware (#2962) + + -- Volume weighted sum (#2961) + + -- CellData: data in a single cell (#2959) + + -- Quartic interpolation for cell centered data (#2960) + + -- Add GPU-compatible upper bound and lower bound algorithms to AMReX_Algorithm (#2958) + + -- add option for makebuildsources to specify the style arguments for 'git describe'. (#2957) + + -- Add roundoff_lo corresponding to roundoff_hi for domains that don't start at 0 (#2950) + + -- Add template parameter to ParallelFor and launch specifying block size (#2947) + + -- Byte spread fixes (#2949) + + -- CMake: HIP_PATH from ROCM_PATH (#2948) + + -- Fix: Make Finalize->Initialize->F->I->... Work (#2944) + + -- Changes for Cray & Clang (#2941) + + -- Link to cublas when using CUDA and Hypre (#2933) + + -- HIP: use coarse grained host memory (#2932) + + -- EB checkpoint files (#2897) + + -- Fix: Loading Files Again (#2936) + + -- Check if boundary particles container has been created before clearance. (#2935) + + -- SYCL: Replace deprecated atomic types and operations (#2921) + # 22.09 -- Preserve neighbor particles when sorting particles. (#2923) From 13aa4df0f5a4af40270963ad5b42ac7ce662e045 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 30 Sep 2022 17:48:22 -0700 Subject: [PATCH 084/111] Disable host device for macros for SYCL/DPC++ (#2969) The host part of the AMREX_HOST_DEVICE_FOR_* macros is disabled for SYCL/DPC++. It's really slow for compilation. --- Src/Base/AMReX_GpuLaunch.H | 107 ++++++++++++++++++++++++++++++ Src/Base/AMReX_GpuLaunchFunctsG.H | 52 +++++++++++++++ Src/Base/AMReX_GpuLaunchMacrosG.H | 56 ++++++++++++++++ Src/EB/AMReX_EB2_GeometryShop.H | 1 + 4 files changed, 216 insertions(+) diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index d1a9e352336..a91cf45297d 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -243,6 +243,8 @@ namespace Gpu { #ifdef AMREX_USE_GPU +#ifndef AMREX_USE_DPCPP + #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ { using amrex_i_inttype = typename std::remove_const::type; \ if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ @@ -366,6 +368,111 @@ namespace Gpu { block3; \ } +#else +// xxxxx DPCPP todo: host disabled in host device + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#endif + #else #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index 5f7c067935c..7940b5589a0 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -1629,8 +1629,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept if (Gpu::inLaunchRegion()) { ParallelFor(info,n,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) f(i); +#endif } } @@ -1641,8 +1645,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept if (Gpu::inLaunchRegion()) { ParallelFor(info,n,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) f(i); +#endif } } @@ -1667,7 +1675,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc if (Gpu::inLaunchRegion()) { ParallelFor(info, box,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,std::forward(f)); +#endif } } @@ -1678,7 +1690,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc if (Gpu::inLaunchRegion()) { ParallelFor(info, box,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,std::forward(f)); +#endif } } @@ -1689,7 +1705,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& if (Gpu::inLaunchRegion()) { ParallelFor(info, box,ncomp,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif } } @@ -1700,7 +1720,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& if (Gpu::inLaunchRegion()) { ParallelFor(info, box,ncomp,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif } } @@ -1712,8 +1736,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif } } @@ -1725,8 +1753,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif } } @@ -1740,9 +1772,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, ParallelFor(info,box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); LoopConcurrentOnCpu(box3,std::forward(f3)); +#endif } } @@ -1757,8 +1793,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif } } @@ -1773,8 +1813,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif } } @@ -1794,9 +1838,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif } } @@ -1816,9 +1864,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif } } diff --git a/Src/Base/AMReX_GpuLaunchMacrosG.H b/Src/Base/AMReX_GpuLaunchMacrosG.H index b45076cdab6..e1c643454bc 100644 --- a/Src/Base/AMReX_GpuLaunchMacrosG.H +++ b/Src/Base/AMReX_GpuLaunchMacrosG.H @@ -29,10 +29,16 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \ { auto const& amrex_i_tn = TN; \ @@ -93,6 +99,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -100,6 +110,8 @@ block2 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \ @@ -179,6 +191,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -189,6 +205,8 @@ block3 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \ @@ -434,6 +452,18 @@ // FOR_1D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ +{ \ + auto const& amrex_i_n = n; \ + using amrex_i_inttype = typename std::remove_const::type; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ { \ auto const& amrex_i_n = n; \ @@ -446,6 +476,7 @@ for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \ { \ @@ -455,6 +486,17 @@ // FOR_3D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ +{ \ + auto const& amrex_i_box = box; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ { \ auto const& amrex_i_box = box; \ @@ -464,6 +506,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \ { \ @@ -472,6 +515,18 @@ // FOR_4D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + auto const& amrex_i_box = box; \ + auto const& amrex_i_ncomp = ncomp; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ auto const& amrex_i_box = box; \ @@ -482,6 +537,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ diff --git a/Src/EB/AMReX_EB2_GeometryShop.H b/Src/EB/AMReX_EB2_GeometryShop.H index ff80dd20593..2a7565abad2 100644 --- a/Src/EB/AMReX_EB2_GeometryShop.H +++ b/Src/EB/AMReX_EB2_GeometryShop.H @@ -244,6 +244,7 @@ public: } } } + amrex::ignore_unused(nzero); if (nbody == 0) { return allregular; From de7b7f44afda2227368a30646faeeea0d4679bec Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 3 Oct 2022 14:06:58 -0700 Subject: [PATCH 085/111] Fix Tensor Solver BC (#2930) This fixes some bugs in the physical domain BC of tensor linear solver. At the corner of two no-slip walls (e.g., (0,0)), we have u(-1,0) = -u(0,0) and u(0,-1) = -u(0,0). It's incorrect to fill the corner ghost cell with u(-1,-1) = u(-1,0) + u(0,-1) - u(0,0), because it will result in u(-1,-1) = -3 * u(0,0). In the old approach, to avoid branches in computing transverse derivatives on cell faces, we fill the ghost cells first. For example, to compute du/dy at the lo-x boundary, we use the data in i = -1 and 0, just like we compute du/dy(i) using u(i-1) and u(i) for interior faces. The problem is the normal velocity in the ghost cells outside a wall is filled with extrapolation of the Dirichlet value (which is zero) and more than 1 interior cells. Because of the high-order extrapolation, u(-1) != -u(0). This is the desired approach for computing du/dx on the wall. However, this produces incorrect results in dudy. In the new approach, we explicitly handle the boundaries in the derivative stencil. For example, to compute transverse derivatives on an inflow face, we use the boundary values directly. Co-authored-by: cgilet --- Src/Base/AMReX_Orientation.H | 26 +- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H | 3 +- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp | 300 +- .../MLMG/AMReX_MLEBTensorOp_bc.cpp | 58 +- .../MLMG/AMReX_MLEBTensor_2D_K.H | 138 +- .../MLMG/AMReX_MLEBTensor_3D_K.H | 539 +++- Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H | 139 + Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp | 140 +- .../MLMG/AMReX_MLTensorOp_grad.cpp | 160 +- Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H | 382 ++- Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H | 2844 +++++++++++------ Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H | 117 + 12 files changed, 3487 insertions(+), 1359 deletions(-) diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H index 064344cafd4..de9c54a1b6c 100644 --- a/Src/Base/AMReX_Orientation.H +++ b/Src/Base/AMReX_Orientation.H @@ -75,7 +75,7 @@ public: * according to the above ordering. */ AMREX_GPU_HOST_DEVICE - operator int () const noexcept { return val; } + constexpr operator int () const noexcept { return val; } //! Return opposite orientation. AMREX_GPU_HOST_DEVICE Orientation flip () const noexcept @@ -97,6 +97,30 @@ public: //! Read from an istream. friend std::istream& operator>> (std::istream& os, Orientation& o); + //! Int value of the x-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xlo () noexcept { return 0; } + + //! Int value of the x-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xhi () noexcept { return AMREX_SPACEDIM; } + + //! Int value of the y-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int ylo () noexcept { return 1; } + + //! Int value of the y-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; } + + //! Int value of the z-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zlo () noexcept { return 2; } + + //! Int value of the z-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; } + private: //! Used internally. AMREX_GPU_HOST_DEVICE diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H index a522d5aa927..1ed29a84801 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H @@ -105,7 +105,8 @@ public: // for cuda void applyBCTensor (int amrlev, int mglev, MultiFab& vel, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const; - void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const; + void compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp index 590e062a3a1..87bb78da730 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp @@ -226,7 +226,7 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode MultiFab const& kapebmf = m_eb_kappa[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, in); + compCrossTerms(amrlev, mglev, in, bndry); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -289,15 +289,23 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode } void -MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const +MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const { auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; auto area = (factory) ? factory->getAreaFrac() : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; + const Geometry& geom = m_geom[amrlev][mglev]; const auto dxinv = geom.InvCellSizeArray(); + const Box& domain = geom.growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -346,56 +354,143 @@ MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const } ); } else { - AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, - Array4 const fyfab = fluxmf[1].array(mfi);, - Array4 const fzfab = fluxmf[2].array(mfi);); - Array4 const vfab = mf.const_array(mfi); - AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, - Array4 const etayfab = etamf[1].const_array(mfi);, - Array4 const etazfab = etamf[2].const_array(mfi);); - AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, - Array4 const kapyfab = kapmf[1].const_array(mfi);, - Array4 const kapzfab = kapmf[2].const_array(mfi);); - - if (fabtyp == FabType::regular) - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, + Array4 const fyfab = fluxmf[1].array(mfi);, + Array4 const fzfab = fluxmf[2].array(mfi);); + Array4 const vfab = mf.const_array(mfi); + AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, + Array4 const etayfab = etamf[1].const_array(mfi);, + Array4 const etazfab = etamf[2].const_array(mfi);); + AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, + Array4 const kapyfab = kapmf[1].const_array(mfi);, + Array4 const kapzfab = kapmf[2].const_array(mfi);); + + if (fabtyp == FabType::regular) + { + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); } - ); - } - else - { - AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, - Array4 const& apy = area[1]->const_array(mfi);, - Array4 const& apz = area[2]->const_array(mfi);); - Array4 const& flag = flags->const_array(mfi); + } + else + { + AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, + Array4 const& apy = area[1]->const_array(mfi);, + Array4 const& apz = area[2]->const_array(mfi);); + Array4 const& flag = flags->const_array(mfi); + + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); - } - , ybx, tybx, - { - mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); - } - , zbx, tzbx, - { - mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); - } - ); - } + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv, bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv, bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv, bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } + } } } @@ -411,7 +506,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe BL_PROFILE("MLEBTensorOp::compFlux()"); if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); + amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); const int mglev = 0; const int ncomp = getNComp(); @@ -429,7 +524,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe Array& fluxmf = m_tauflux[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, sol); + compCrossTerms(amrlev, mglev, sol, m_bndry_sol[amrlev].get()); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -515,104 +610,11 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe } void -MLEBTensorOp::compVelGrad (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const +MLEBTensorOp::compVelGrad (int /*amrlev*/, + const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const { - BL_PROFILE("MLEBTensorOp::compVelGrad()"); - - if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compVelGrad() unknown location for VelGradients."); - - const int mglev = 0; - - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); - - auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; - - const Geometry& geom = m_geom[amrlev][mglev]; - const auto dxinv = geom.InvCellSizeArray(); - - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; - - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - { - Array fluxfab_tmp; - for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - - auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; - if (fabtyp == FabType::covered) continue; - - if (fabtyp == FabType::regular) - { - - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv); - } - , ybx, tybx, - { - mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv); - } - , zbx, tzbx, - { - mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); - } - ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); - } - - - } -// else if ( loc==Location::FaceCenter ) -// { -// -// amrex::Abort("compVelGrad not yet implemented for cut-cells "); -// -// } - else // loc==Location::FaceCentroid - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } - - } - } + amrex::Abort("compVelGrad not yet implemented for EB."); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp index c9c6eb232bb..98beecf01df 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp @@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; @@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -83,14 +83,37 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bvzhi = (bndry != nullptr) ? (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } AMREX_HOST_DEVICE_FOR_1D ( 8, icorner, { @@ -98,13 +121,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); + #endif } } - - // Notet that it is incorrect to call EnforcePeriodicity on vel. } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H index 165497d1a20..d93ea3a5d1a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H @@ -6,10 +6,95 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,0) == 0.0) + { + fx(i,j,0,0) = 0.0; + fx(i,j,0,1) = 0.0; + } + else + { + int jhip = j + flag(i ,j,0).isConnected(0, 1,0); + int jhim = j - flag(i ,j,0).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,0).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,0) == 0.0) + { + fy(i,j,0,0) = 0.0; + fy(i,j,0,1) = 0.0; + } + else + { + int ihip = i + flag(i,j ,0).isConnected( 1,0,0); + int ihim = i - flag(i,j ,0).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,0).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } } } @@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& kapx, Array4 const& apx, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,0,0)-vel(i ,jhim,0,0))*whi - +(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,0,1)-vel(i ,jhim,0,1))*whi - +(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dvdy; Real xif = kapx(i,j,0); - Real mun = 0.75*(etax(i,j,0,0)-xif); // restore the original eta + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta Real mut = etax(i,j,0,1); fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,0,1) = -mut*dudy; @@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& kapy, Array4 const& apy, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,0,0)-vel(ihim,j ,0,0))*whi - +(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,0,1)-vel(ihim,j ,0,1))*whi - +(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); Real divu = dudx; Real xif = kapy(i,j,0); - Real mun = 0.75*(etay(i,j,0,1)-xif); // restore the original eta - Real mut = etay(i,j,0,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); fy(i,j,0,0) = -mut*dvdx; fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H index 3c26566e7ac..2651addee2c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H @@ -6,11 +6,44 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); - } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i ,j,khip,n)-vel(i ,j,khim,n))*whi + + (vel(i-1,j,klop,n)-vel(i-1,j,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i,j ,khip,n)-vel(i,j ,khim,n))*whi + + (vel(i,j-1,klop,n)-vel(i,j-1,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j,k ,n)-vel(ihim,j,k ,n))*whi + + (vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i,jhip,k ,n)-vel(i,jhim,k ,n))*whi + + (vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n))*wlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -46,26 +79,24 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,k,0)-vel(i ,jhim,k,0))*whi - +(vel(i-1,jlop,k,0)-vel(i-1,jlom,k,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,k,1)-vel(i ,jhim,k,1))*whi - +(vel(i-1,jlop,k,1)-vel(i-1,jlom,k,1))*wlo); - + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); int khip = k + flag(i ,j,k).isConnected(0,0, 1); int khim = k - flag(i ,j,k).isConnected(0,0,-1); int klop = k + flag(i-1,j,k).isConnected(0,0, 1); int klom = k - flag(i-1,j,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dudz = (0.5*dzi) * ((vel(i ,j,khip,0)-vel(i ,j,khim,0))*whi - +(vel(i-1,j,klop,0)-vel(i-1,j,klom,0))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i ,j,khip,2)-vel(i ,j,khim,2))*whi - +(vel(i-1,j,klop,2)-vel(i-1,j,klom,2))*wlo); - + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); - Real mun = 0.75*(etax(i,j,k,0)-xif); // restore the original eta - Real mut = etax(i,j,k,1); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,k,1) = -mut*dudy; fx(i,j,k,2) = -mut*dudz; @@ -108,26 +139,24 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,k,0)-vel(ihim,j ,k,0))*whi - +(vel(ilop,j-1,k,0)-vel(ilom,j-1,k,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,k,1)-vel(ihim,j ,k,1))*whi - +(vel(ilop,j-1,k,1)-vel(ilom,j-1,k,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); int khip = k + flag(i,j ,k).isConnected(0,0, 1); int khim = k - flag(i,j ,k).isConnected(0,0,-1); int klop = k + flag(i,j-1,k).isConnected(0,0, 1); int klom = k - flag(i,j-1,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dvdz = (0.5*dzi) * ((vel(i,j ,khip,1)-vel(i,j ,khim,1))*whi - +(vel(i,j-1,klop,1)-vel(i,j-1,klom,1))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i,j ,khip,2)-vel(i,j ,khim,2))*whi - +(vel(i,j-1,klop,2)-vel(i,j-1,klom,2))*wlo); - + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); - Real mun = 0.75*(etay(i,j,k,1)-xif); // restore the original eta - Real mut = etay(i,j,k,0); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); fy(i,j,k,0) = -mut*dvdx; fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; fy(i,j,k,2) = -mut*dvdz; @@ -170,27 +199,457 @@ void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + int jhip = j + flag(i,j,k ).isConnected(0, 1,0); + int jhim = j - flag(i,j,k ).isConnected(0,-1,0); + int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); + int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); + whi = mlebtensor_weight(jhip-jhim); + wlo = mlebtensor_weight(jlop-jlom); + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); + + fz(i,j,k,0) = -mut*dwdx; + fz(i,j,k,1) = -mut*dwdy; + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvxlo(i-1,j,khip,n)-bvxlo(i-1,j,khim,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvxhi(i,j,klop,n)-bvxhi(i,j,klom,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i-1,j,klop,n)-vel(i-1,j,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_xface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvylo(i,j-1,khip,n)-bvylo(i,j-1,khim,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvyhi(i,j,klop,n)-bvyhi(i,j,klom,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i,j-1,klop,n)-vel(i,j-1,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_yface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvzlo(ihip,j,k-1,n)-bvzlo(ihim,j,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvzhi(ilop,j,k,n)-bvzhi(ilom,j,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_zface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvzlo(i,jhip,k-1,n)-bvzlo(i,jhim,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvzhi(i,jlop,k,n)-bvzhi(i,jlom,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_zface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept + +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,k) == 0.0) + { + fx(i,j,k,0) = 0.0; + fx(i,j,k,1) = 0.0; + fx(i,j,k,2) = 0.0; + } + else + { + int jhip = j + flag(i ,j,k).isConnected(0, 1,0); + int jhim = j - flag(i ,j,k).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,k).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + int khip = k + flag(i ,j,k).isConnected(0,0, 1); + int khim = k - flag(i ,j,k).isConnected(0,0,-1); + int klop = k + flag(i-1,j,k).isConnected(0,0, 1); + int klom = k - flag(i-1,j,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*dudy; + fx(i,j,k,2) = -mut*dudz; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,k) == 0.0) + { + fy(i,j,k,0) = 0.0; + fy(i,j,k,1) = 0.0; + fy(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j ,k).isConnected( 1,0,0); + int ihim = i - flag(i,j ,k).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,k).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + int khip = k + flag(i,j ,k).isConnected(0,0, 1); + int khim = k - flag(i,j ,k).isConnected(0,0,-1); + int klop = k + flag(i,j-1,k).isConnected(0,0, 1); + int klom = k - flag(i,j-1,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*dvdx; + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*dvdz; + } + } + } + } +} - Real dudx = (0.5*dxi) * ((vel(ihip,j,k ,0)-vel(ihim,j,k ,0))*whi - +(vel(ilop,j,k-1,0)-vel(ilom,j,k-1,0))*wlo); - Real dwdx = (0.5*dxi) * ((vel(ihip,j,k ,2)-vel(ihim,j,k ,2))*whi - +(vel(ilop,j,k-1,2)-vel(ilom,j,k-1,2))*wlo); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + Array4 const& apz, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apz(i,j,k) == 0.0) + { + fz(i,j,k,0) = 0.0; + fz(i,j,k,1) = 0.0; + fz(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j,k ).isConnected( 1,0,0); + int ihim = i - flag(i,j,k ).isConnected(-1,0,0); + int ilop = i + flag(i,j,k-1).isConnected( 1,0,0); + int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); int jhip = j + flag(i,j,k ).isConnected(0, 1,0); int jhim = j - flag(i,j,k ).isConnected(0,-1,0); int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); whi = mlebtensor_weight(jhip-jhim); wlo = mlebtensor_weight(jlop-jlom); - Real dvdy = (0.5*dyi) * ((vel(i,jhip,k ,1)-vel(i,jhim,k ,1))*whi - +(vel(i,jlop,k-1,1)-vel(i,jlom,k-1,1))*wlo); - Real dwdy = (0.5*dyi) * ((vel(i,jhip,k ,2)-vel(i,jhim,k ,2))*whi - +(vel(i,jlop,k-1,2)-vel(i,jlom,k-1,2))*wlo); - + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); - Real mun = 0.75*(etaz(i,j,k,2)-xif); // restore the original eta - Real mut = etaz(i,j,k,0); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); fz(i,j,k,0) = -mut*dwdx; fz(i,j,k,1) = -mut*dwdy; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H index c814b3b8e41..8abdde8a7c0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H @@ -4,6 +4,145 @@ #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_weight (int d) { + return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i ,jhip,k,n)-vel(i ,jhim,k,n))*whi + + (vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j ,k,n)-vel(ihim,j ,k,n))*whi + + (vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvxlo(i-1,jhip,k,n)-bvxlo(i-1,jhim,k,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvxhi(i,jlop,k,n)-bvxhi(i,jlom,k,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_xface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvylo(ihip,j-1,k,n)-bvylo(ihim,j-1,k,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvyhi(ilop,j,k,n)-bvyhi(ilom,j,k,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_yface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + } + return ddx; +} + +} + #if (AMREX_SPACEDIM == 1) #elif (AMREX_SPACEDIM == 2) #include diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp index d4e77f312dc..0750ffdd969 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp @@ -210,9 +210,16 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc if (mglev >= m_kappa[amrlev].size()) return; - applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry ); + applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -247,20 +254,65 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } if (m_overset_mask[amrlev][mglev]) { const auto& osm = m_overset_mask[amrlev][mglev]->array(mfi); @@ -288,18 +340,18 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, #if (AMREX_SPACEDIM == 1) amrex::ignore_unused(amrlev,mglev,vel,bc_mode,bndry); #else + const int inhomog = bc_mode == BCMode::Inhomogeneous; const int imaxorder = maxorder; const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); - - // Domain and coarse-fine boundaries are handled below. + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); @@ -315,14 +367,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -341,14 +392,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; #if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_FOR_1D ( 4, icorner, { mltensor_fill_corners(icorner, vbx, velfab, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -360,18 +410,40 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; // only edge vals used in 3D stencil - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } #endif } - // Notet that it is incorrect to call EnforcePeriodicity on vel. #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp index 705f38052d1..d395ecdac13 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp @@ -16,9 +16,15 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, const int ncomp = getNComp(); MLABecLaplacian::compFlux(amrlev, fluxes, sol, loc); - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -52,20 +58,59 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(mfi.tilebox())) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { const Box& nbx = mfi.nodaltilebox(idim); @@ -95,33 +140,36 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux const int mglev = 0; - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - Array fluxfab_tmp; + Array4 const vfab = sol.const_array(mfi); + AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, + Box const ybx = mfi.nodaltilebox(1);, + Box const zbx = mfi.nodaltilebox(2);) + AMREX_D_TERM(Array4 const fxfab = fluxes[0]->array(mfi);, + Array4 const fyfab = fluxes[1]->array(mfi);, + Array4 const fzfab = fluxes[2]->array(mfi);) - for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); +// The derivatives are put in the array with the following order: +// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 +// in 2D: dU/dx, dV/dx, dU/dy, dV/dy +// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz + if (domain.strictly_contains(mfi.tilebox())) { AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM ( xbx, txbx, { @@ -136,23 +184,39 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); } ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } } + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv,bvxlo,bvxhi,bct,dlo,dhi); + } + , ybx, tybx, + { + mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv,bvylo,bvyhi,bct,dlo,dhi); + } + , zbx, tzbx, + { + mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv,bvzlo,bvzhi,bct,dlo,dhi); + } + ); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H index 8f10f08ec58..a40fa4611a8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H @@ -17,110 +17,168 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalylo, Array4 const& bcvalxhi, Array4 const& bcvalyhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int oxhi = 2; - constexpr int oyhi = 3; - constexpr int xdir = 0; - constexpr int ydir = 1; + constexpr int k = 0; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (icorner) { - case 0: { - // xlo & ylo - if (mxlo(vlo.x-1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel(vlo.x-1,vlo.y-1,0,icomp) = vel(vlo.x-1,vlo.y,0,icomp) - + vel(vlo.x,vlo.y-1,0,icomp) - vel(vlo.x,vlo.y,0,icomp); - } else if (vlo.x == dlo.x || mylo(vlo.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + if (icorner == 0) { // xlo & ylo + int const i = vlo.x-1; + int const j = vlo.y-1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; // i+1,j is a valid cell inside domain + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; // i+1,j is a ghost cell inside domain + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 1: { - // xhi & ylo - if (mxhi(vhi.x+1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel(vhi.x+1,vlo.y-1,0,icomp) = vel(vhi.x+1,vlo.y,0,icomp) - + vel(vhi.x,vlo.y-1,0,icomp) - vel(vhi.x,vlo.y,0,icomp); - } else if (vhi.x == dhi.x || mylo(vhi.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 1) { // xhi & ylo + int const i = vhi.x+1; + int const j = vlo.y-1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 2: { - // xlo & yhi - if (mxlo(vlo.x-1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel(vlo.x-1,vhi.y+1,0,icomp) = vel(vlo.x-1,vhi.y,0,icomp) - + vel(vlo.x,vhi.y+1,0,icomp) - vel(vlo.x,vhi.y,0,icomp); - } else if (vlo.x == dlo.x || myhi(vlo.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 2) { // xlo & yhi + int const i = vlo.x-1; + int const j = vhi.y+1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 3: { - // xhi & yhi - if (mxhi(vhi.x+1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel(vhi.x+1,vhi.y+1,0,icomp) = vel(vhi.x+1,vhi.y,0,icomp) - + vel(vhi.x,vhi.y+1,0,icomp) - vel(vhi.x,vhi.y,0,icomp); - } else if (vhi.x == dhi.x || myhi(vhi.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 3) { // xhi & yhi + int const i = vhi.x+1; + int const j = vhi.y+1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; - } - default: {} } } } @@ -137,11 +195,12 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); Real divu = dvdy; Real xif = kapx(i,j,0); Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta @@ -164,11 +223,80 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + // Three BC types: reflect odd, neumann, and dirichlet + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx; Real xif = kapy(i,j,0); Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta @@ -241,13 +369,14 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); fx(i,j,0,0) = dudx; fx(i,j,0,1) = dvdx; fx(i,j,0,2) = dudy; @@ -266,11 +395,74 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; + Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; + fy(i,j,0,0) = dudx; + fy(i,j,0,1) = dvdx; + fy(i,j,0,2) = dudy; + fy(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; + Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,0,0) = dudx; + fx(i,j,0,1) = dvdx; + fx(i,j,0,2) = dudy; + fx(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; fy(i,j,0,0) = dudx; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H index a4a4c7df9ef..a5de05a385e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H @@ -6,6 +6,643 @@ namespace amrex { +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool ylo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !ylo_domain)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mylo, + Array4 const& bcvalxhi, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool ylo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !ylo_domain)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& myhi, + Array4 const& bcvalxlo, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool yhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !yhi_domain)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool yhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !yhi_domain)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzlo, + Array4 const& bcvalxlo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zlo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zlo_domain)) { + bool x_interior = mzlo(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zlo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zlo_domain)) { + bool x_interior = mzlo(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zhi_domain)) { + bool x_interior = mzhi(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzhi, + Array4 const& bcvalxhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zhi_domain)) { + bool x_interior = mzhi(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zlo_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k+1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzlo, + Array4 const& bcvalyhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zlo_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k+1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzhi, + Array4 const& bcvalylo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zhi_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k-1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zhi_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k-1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +#ifdef AMREX_USE_EB AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& vel, @@ -21,495 +658,680 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { switch (icorner) { case 0: { // xlo & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vlo.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vlo.z-1; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zlo_domain)) { + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 1: { // xhi & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vlo.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vlo.z-1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 2: { // xlo & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vhi.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 3: { // xhi & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vhi.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 4: { // xlo & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y , vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y-1, vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - - vel(vlo.x-1, vlo.y , vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 5: { // xhi & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y-1,vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vlo.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 6: { // xlo & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x-1,vhi.y ,vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 7: { // xhi & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vhi.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; @@ -518,9 +1340,10 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box } } } +#endif -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box +inline +void mltensor_fill_edges (Box const& vbox, // vbox: the valid box Array4 const& vel, Array4 const& mxlo, Array4 const& mylo, @@ -534,522 +1357,486 @@ void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept + { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (iedge) { - case 0: { - // xlo & ylo - if (vlo.x == dlo.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vlo.y-1,k,icomp) - = vel(vlo.x ,vlo.y-1,k,icomp) - + vel(vlo.x-1,vlo.y ,k,icomp) - - vel(vlo.x ,vlo.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vlo.y-1,k),IntVect(vlo.x-1,vlo.y-1,k)); - if (mylo(vlo.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } - } - break; + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + + for (int k = vlo.z; k <= vhi.z; ++k) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + + for (int j = vlo.y; j <= vhi.y; ++j) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + + for (int i = vlo.x; i <= vhi.x; ++i) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } +} + +#ifdef AMREX_USE_GPU +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges (int const bid, int const tid, int const bdim, + Box const& vbox, // vbox: the valid box + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const auto blen = amrex::length(vbox); + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + if (bid == 0) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); } - case 1: { - // xhi & ylo - if (vhi.x == dhi.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vlo.y-1,k,icomp) - = vel(vhi.x ,vlo.y-1,k,icomp) - + vel(vhi.x+1,vlo.y ,k,icomp) - - vel(vhi.x ,vlo.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vlo.y-1,k),IntVect(vhi.x+1,vlo.y-1,k)); - if (mylo(vhi.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } else if (bid == 1) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + } + } else if (bid == 2) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + } + } else if (bid == 3) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + } else if (bid == 4) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + } + } else if (bid == 5) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + } + } else if (bid == 6) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + } + } else if (bid == 7) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + } else if (bid == 8) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + } + } else if (bid == 9) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + } + } else if (bid == 10) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + } + } else if (bid == 11) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } + } +} +#endif + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i-1,j,k+1,n)-vel(i,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i,j-1,k+1,n)-vel(i,j,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j,k-1,n)-vel(i-1,j,k,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i,j+1,k-1,n)-vel(i,j-1,k,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*(dudy); + fx(i,j,k,2) = -mut*(dudz); } - break; } - case 2: { - // xlo & yhi - if (vlo.x == dlo.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vhi.y+1,k,icomp) - = vel(vlo.x ,vhi.y+1,k,icomp) - + vel(vlo.x-1,vhi.y ,k,icomp) - - vel(vlo.x ,vhi.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vhi.y+1,k),IntVect(vlo.x-1,vhi.y+1,k)); - if (myhi(vlo.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*(dvdx); + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*(dvdz); } - break; } - case 3: { - // xhi & yhi - if (vhi.x == dhi.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vhi.y+1,k,icomp) - = vel(vhi.x ,vhi.y+1,k,icomp) - + vel(vhi.x+1,vhi.y ,k,icomp) - - vel(vhi.x ,vhi.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vhi.y+1,k),IntVect(vhi.x+1,vhi.y+1,k)); - if (myhi(vhi.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta + Real mut = etaz(i,j,k,0); + fz(i,j,k,0) = -mut*(dwdx); + fz(i,j,k,1) = -mut*(dwdy); + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; } - break; } - case 4: { - // xlo & zlo - if (vlo.x == dlo.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vlo.z-1,icomp) - = vel(vlo.x ,j,vlo.z-1,icomp) - + vel(vlo.x-1,j,vlo.z ,icomp) - - vel(vlo.x ,j,vlo.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vlo.z-1),IntVect(vlo.x-1,j,vlo.z-1)); - if (mzlo(vlo.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxlo(i-1,j,k+1,n)-bvxlo(i-1,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 5: { - // xhi & zlo - if (vhi.x == dhi.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vlo.z-1,icomp) - = vel(vhi.x ,j,vlo.z-1,icomp) - + vel(vhi.x+1,j,vlo.z ,icomp) - - vel(vhi.x ,j,vlo.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vlo.z-1),IntVect(vhi.x+1,j,vlo.z-1)); - if (mzlo(vhi.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxhi(i,j,k+1,n)-bvxhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i-1,j,k+1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 6: { - // xlo & zhi - if (vlo.x == dlo.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vhi.z+1,icomp) - = vel(vlo.x ,j,vhi.z+1,icomp) - + vel(vlo.x-1,j,vhi.z ,icomp) - - vel(vlo.x ,j,vhi.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_xface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vhi.z+1),IntVect(vlo.x-1,j,vhi.z+1)); - if (mzhi(vlo.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvylo(i,j-1,k+1,n)-bvylo(i,j-1,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 7: { - // xhi & zhi - if (vhi.x == dhi.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vhi.z+1,icomp) - = vel(vhi.x ,j,vhi.z+1,icomp) - + vel(vhi.x+1,j,vhi.z ,icomp) - - vel(vhi.x ,j,vhi.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vhi.z+1),IntVect(vhi.x+1,j,vhi.z+1)); - if (mzhi(vhi.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvyhi(i,j,k+1,n)-bvyhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j-1,k+1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 8: { - // ylo & zlo - if (vlo.y == dlo.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vlo.z-1,icomp) - = vel(i,vlo.y ,vlo.z-1,icomp) - + vel(i,vlo.y-1,vlo.z ,icomp) - - vel(i,vlo.y ,vlo.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_yface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vlo.z-1),IntVect(i,vlo.y-1,vlo.z-1)); - if (mzlo(i,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzlo(i+1,j,k-1,n)-bvzlo(i-1,j,k-1,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 9: { - // yhi & zlo - if (vhi.y == dhi.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vlo.z-1,icomp) - = vel(i,vhi.y ,vlo.z-1,icomp) - + vel(i,vhi.y+1,vlo.z ,icomp) - - vel(i,vhi.y ,vlo.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vlo.z-1),IntVect(i,vhi.y+1,vlo.z-1)); - if (mzlo(i,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzhi(i+1,j,k,n)-bvzhi(i-1,j,k,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 10: { - // ylo & zhi - if (vlo.y == dlo.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vhi.z+1,icomp) - = vel(i,vlo.y ,vhi.z+1,icomp) - + vel(i,vlo.y-1,vhi.z ,icomp) - - vel(i,vlo.y ,vhi.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddx = mltensor_dx_on_zface(i,j,k,n,vel,dxi); + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vhi.z+1),IntVect(i,vlo.y-1,vhi.z+1)); - if (mzhi(i,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzlo(i,j+1,k-1,n)-bvzlo(i,j-1,k-1,n))*(Real(0.5)*dyi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } - case 11: { - // yhi & zhi - if (vhi.y == dhi.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vhi.z+1,icomp) - = vel(i,vhi.y ,vhi.z+1,icomp) - + vel(i,vhi.y+1,vhi.z ,icomp) - - vel(i,vhi.y ,vhi.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vhi.z+1),IntVect(i,vhi.y+1,vhi.z+1)); - if (mzhi(i,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzhi(i,j+1,k,n)-bvzhi(i,j-1,k,n))*(Real(0.5)*dyi); } - break; - } - default: {} + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } + } else { + ddy = mltensor_dy_on_zface(i,j,k,n,vel,dyi); } + return ddy; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -1057,7 +1844,13 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& vel, Array4 const& etax, Array4 const& kapx, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const Real dzi = dxinv[2]; @@ -1067,12 +1860,11 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta @@ -1090,7 +1882,13 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& vel, Array4 const& etay, Array4 const& kapy, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dzi = dxinv[2]; @@ -1100,12 +1898,11 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta @@ -1123,7 +1920,13 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, Array4 const& vel, Array4 const& etaz, Array4 const& kapz, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dyi = dxinv[1]; @@ -1133,12 +1936,11 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta @@ -1242,13 +2044,13 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i-1,j+1,k,2)-vel(i,j-1,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i-1,j,k+1,1)-vel(i,j,k-1,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); fx(i,j,k,0) = dudx; fx(i,j,k,1) = dvdx; @@ -1281,17 +2083,17 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j-1,k,2)-vel(i-1,j,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi); Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; - Real dudz = (vel(i,j,k+1,0)+vel(i,j-1,k+1,0)-vel(i,j,k-1,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); fy(i,j,k,0) = dudx; fy(i,j,k,1) = dvdx; @@ -1324,13 +2126,13 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j,k-1,1)-vel(i-1,j,k,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); - Real dudy = (vel(i,j+1,k,0)+vel(i,j+1,k-1,0)-vel(i,j-1,k,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; @@ -1351,6 +2153,138 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,k,0) - vel(i-1,j,k,0))*dxi; + Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; + Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,k,0) = dudx; + fx(i,j,k,1) = dvdx; + fx(i,j,k,2) = dwdx; + fx(i,j,k,3) = dudy; + fx(i,j,k,4) = dvdy; + fx(i,j,k,5) = dwdy; + fx(i,j,k,6) = dudz; + fx(i,j,k,7) = dvdz; + fx(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; + Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; + Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + fy(i,j,k,0) = dudx; + fy(i,j,k,1) = dvdx; + fy(i,j,k,2) = dwdx; + fy(i,j,k,3) = dudy; + fy(i,j,k,4) = dvdy; + fy(i,j,k,5) = dwdy; + fy(i,j,k,6) = dudz; + fy(i,j,k,7) = dvdz; + fy(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; + Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; + Real dwdz = (vel(i,j,k,2) - vel(i,j,k-1,2))*dzi; + fz(i,j,k,0) = dudx; + fz(i,j,k,1) = dvdx; + fz(i,j,k,2) = dwdx; + fz(i,j,k,3) = dudy; + fz(i,j,k,4) = dvdy; + fz(i,j,k,5) = dwdy; + fz(i,j,k,6) = dudz; + fz(i,j,k,7) = dvdz; + fz(i,j,k,8) = dwdz; + + } + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H index 4440f57e7a8..33457ec1ced 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H @@ -5,6 +5,123 @@ #include #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i-1,j+1,k,n)-vel(i,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j-1,k,n)-vel(i-1,j,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxlo(i-1,j+1,k,n)-bvxlo(i-1,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxhi(i,j+1,k,n)-bvxhi(i,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i-1,j+1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mltensor_dy_on_xface(i,j,k,n,vel,dyi); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvylo(i+1,j-1,k,n)-bvylo(i-1,j-1,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvyhi(i+1,j,k,n)-bvyhi(i-1,j,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mltensor_dx_on_yface(i,j,k,n,vel,dxi); + } + return ddx; +} +} + #if (AMREX_SPACEDIM == 1) #include #elif (AMREX_SPACEDIM == 2) From 1bc4e4eb5a25f4bdf9933695ead86f17dfdee9ed Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 3 Oct 2022 16:50:45 -0700 Subject: [PATCH 086/111] Remove sycl namespace alias (#2971) This causes a conflict with new compilers. --- Src/Base/AMReX_GpuTypes.H | 1 - Src/Base/AMReX_Math.H | 1 - Src/Base/AMReX_RandomEngine.H | 1 - 3 files changed, 3 deletions(-) diff --git a/Src/Base/AMReX_GpuTypes.H b/Src/Base/AMReX_GpuTypes.H index 737a47e665c..12b8fbc1829 100644 --- a/Src/Base/AMReX_GpuTypes.H +++ b/Src/Base/AMReX_GpuTypes.H @@ -8,7 +8,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { diff --git a/Src/Base/AMReX_Math.H b/Src/Base/AMReX_Math.H index 7996830d534..3eed941fb00 100644 --- a/Src/Base/AMReX_Math.H +++ b/Src/Base/AMReX_Math.H @@ -9,7 +9,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { inline namespace disabled { diff --git a/Src/Base/AMReX_RandomEngine.H b/Src/Base/AMReX_RandomEngine.H index a639e4731d7..967b9e66569 100644 --- a/Src/Base/AMReX_RandomEngine.H +++ b/Src/Base/AMReX_RandomEngine.H @@ -15,7 +15,6 @@ #include #elif defined(AMREX_USE_DPCPP) #include -namespace sycl = cl::sycl; #include namespace mkl = oneapi::mkl; #endif From e4ab0485621d5566c96cae58a816860ee7d4997f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 5 Oct 2022 12:03:41 -0700 Subject: [PATCH 087/111] FillPatcher class (#2972) This adds a class FillPatcher for filling fine level data. It's not as general as the various FillPatch functions (e.g., FillPatchTwoLevels). However, it can reduce the amount of communication data. Suppose we use RK2 with subcycling and the refinement ratio is 2. For each step on level 0, there are two steps on level 1. With RK2, each fine step needs to call FillPatch twice. So the total number of FillPatch calls is 4 in the two fine steps. Using the free function, one ParallelCopy per FillPatch call is needed for copying coarse data for spatial interpolation. With the FillPatcher class, two ParallelCopy calls will be done to copy old and new coarse data. Then these data will be used in the four FillPatcher::fill calls. This new approach saves two ParallelCopy calls per coarse step for a two levels run. It could save more if the time stepping requires more substeps or the refinement ratio is higher. Note that many of our AMReX codes use a time stepping algorithm that needs only one FillPatch call per step. For those codes, this new approach will not save any communication for a refinement ratio of 2. However, it will save communication when the refinement ratio is 4. --- Src/Amr/AMReX_AmrLevel.H | 29 +- Src/Amr/AMReX_AmrLevel.cpp | 60 +++ Src/AmrCore/AMReX_FillPatchUtil.H | 11 +- Src/AmrCore/AMReX_FillPatcher.H | 343 ++++++++++++++++++ Src/AmrCore/CMakeLists.txt | 1 + Src/AmrCore/Make.package | 2 + Src/Base/AMReX_FArrayBox.H | 2 +- Src/Base/AMReX_Geometry.H | 4 + Src/Base/AMReX_Geometry.cpp | 20 +- .../Source/AdvancePhiAllLevels.cpp | 3 +- .../Source/AdvancePhiAtLevel.cpp | 3 +- .../Amr/Advection_AmrCore/Source/AmrCoreAdv.H | 11 +- .../Advection_AmrCore/Source/AmrCoreAdv.cpp | 62 +++- .../Source/Src_K/Make.package | 2 +- .../Advection_AmrLevel/Source/AmrLevelAdv.H | 2 +- .../Advection_AmrLevel/Source/AmrLevelAdv.cpp | 37 +- 16 files changed, 542 insertions(+), 50 deletions(-) create mode 100644 Src/AmrCore/AMReX_FillPatcher.H diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index 0aaf7fc2620..cca2e9776cd 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include #endif @@ -243,12 +244,14 @@ public: Long countCells () const noexcept; //! Get the area not to tag. - const BoxArray& getAreaNotToTag() noexcept; - const Box& getAreaToTag() noexcept; + const BoxArray& getAreaNotToTag () noexcept; + const Box& getAreaToTag () noexcept; //! Construct the area not to tag. - void constructAreaNotToTag(); + void constructAreaNotToTag (); //! Set the area not to tag. - void setAreaNotToTag(BoxArray& ba) noexcept; + void setAreaNotToTag (BoxArray& ba) noexcept; + + void resetFillPatcher (); /** * \brief Error estimation for regridding. This is a pure virtual @@ -365,6 +368,20 @@ public: virtual void particle_redistribute (int /*lbase*/ = 0, bool /*a_init*/ = false) {;} #endif + /** + * \brief Fill with FillPatcher on level > 0 and AmrLevel::FillPatch on level 0. + * + * \param mf destination MultiFab + * \param dcomp starting component for the destination + * \param ncomp number of component to fill + * \param nghost number of ghost cells to fill + * \param time time + * \param state_index StateData index + * \param scomp starting component in the StateData + */ + void FillPatcherFill (amrex::MultiFab& mf, int dcomp, int ncomp, int nghost, + amrex::Real time, int state_index, int scomp); + static void FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, int boxGrow, @@ -425,7 +442,7 @@ protected: IntVect fine_ratio; // Refinement ratio to finer level. static DeriveList derive_lst; // List of derived quantities. static DescriptorList desc_lst; // List of state variables. - Vector state; // Array of state data. + Vector state; // Array of state data. BoxArray m_AreaNotToTag; //Area which shouldn't be tagged on this level. Box m_AreaToTag; //Area which is allowed to be tagged on this level. @@ -436,6 +453,8 @@ protected: std::unique_ptr > m_factory; + Vector>> m_fillpatcher; + private: mutable BoxArray edge_grids[AMREX_SPACEDIM]; // face-centered grids diff --git a/Src/Amr/AMReX_AmrLevel.cpp b/Src/Amr/AMReX_AmrLevel.cpp index a88489f9512..fbeba917255 100644 --- a/Src/Amr/AMReX_AmrLevel.cpp +++ b/Src/Amr/AMReX_AmrLevel.cpp @@ -102,6 +102,7 @@ AmrLevel::AmrLevel (Amr& papa, } state.resize(desc_lst.size()); + m_fillpatcher.resize(desc_lst.size()); #ifdef AMREX_USE_EB if (EB2::TopIndexSpaceIfPresent()) { @@ -451,6 +452,8 @@ AmrLevel::restart (Amr& papa, } } + m_fillpatcher.resize(ndesc); + if (parent->useFixedCoarseGrids()) constructAreaNotToTag(); post_step_regrid = 0; @@ -2096,6 +2099,63 @@ void AmrLevel::constructAreaNotToTag () } } +void +AmrLevel::resetFillPatcher () +{ + for (auto& fp : m_fillpatcher) { + fp.reset(); + } +} + +void +AmrLevel::FillPatcherFill (MultiFab& mf, int dcomp, int ncomp, int nghost, + Real time, int state_index, int scomp) +{ + if (level == 0) { + FillPatch(*this, mf, nghost, time, state_index, scomp, ncomp, dcomp); + } else { + AmrLevel& fine_level = *this; + AmrLevel& crse_level = parent->getLevel(level-1); + const Geometry& geom_fine = fine_level.geom; + const Geometry& geom_crse = crse_level.geom; + + Vector smf_crse; + Vector stime_crse; + StateData& statedata_crse = crse_level.state[state_index]; + statedata_crse.getData(smf_crse,stime_crse,time); + StateDataPhysBCFunct physbcf_crse(statedata_crse,scomp,geom_crse); + + Vector smf_fine; + Vector stime_fine; + StateData& statedata_fine = fine_level.state[state_index]; + statedata_fine.getData(smf_fine,stime_fine,time); + StateDataPhysBCFunct physbcf_fine(statedata_fine,scomp,geom_fine); + + const StateDescriptor& desc = AmrLevel::desc_lst[state_index]; + + if (level > 1 &&!amrex::ProperlyNested(fine_level.crse_ratio, + parent->blockingFactor(fine_level.level), + nghost, mf.ixType(), + desc.interp(scomp))) { + amrex::Abort("FillPatcherFill: Grids are not properly nested. Must increase blocking factor."); + } + + auto& fillpatcher = m_fillpatcher[state_index]; + if (fillpatcher == nullptr) { + fillpatcher = std::make_unique> + (parent->boxArray(level), parent->DistributionMap(level), geom_fine, + parent->boxArray(level-1), parent->DistributionMap(level-1), geom_crse, + IntVect(nghost), desc.nComp(), desc.interp(scomp)); + } + + fillpatcher->fill(mf, IntVect(nghost), time, + smf_crse, stime_crse, smf_fine, stime_fine, + scomp, dcomp, ncomp, + physbcf_crse, scomp, physbcf_fine, scomp, + desc.getBCs(), scomp); + } +} + void AmrLevel::FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, diff --git a/Src/AmrCore/AMReX_FillPatchUtil.H b/Src/AmrCore/AMReX_FillPatchUtil.H index 51a5f457391..495cbc180b6 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil.H +++ b/Src/AmrCore/AMReX_FillPatchUtil.H @@ -28,12 +28,17 @@ namespace amrex { - template + template struct NullInterpHook { - void operator() (FAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (MFFAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} - void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + + template ::value,int> = 0> + void operator() (MFFAB& /*mf*/, int /*icomp*/, int /*ncomp*/) const {} }; template diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H new file mode 100644 index 00000000000..41ed75318c6 --- /dev/null +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -0,0 +1,343 @@ +#ifndef AMREX_FILLPATCHER_H_ +#define AMREX_FILLPATCHER_H_ +#include + +#include + +namespace amrex { + +/** + * \brief FillPatcher is for filling a fine level MultiFab/FabArray. + * + * This class is not as general as the FillPatchTwoLevels functions. It + * fills the fine ghost cells not overlapping any fine level valid cells + * with interpolation of the coarse data. Then it fills the fine ghost + * cells overlapping fine level valid cells with the fine level data. If + * the valid cells of the destination need to be filled, it will be done as + * well. Finally, it will fill the physical bounbary using the user + * provided functor. The `fill` member function can be used to do the + * operations just described. Alternatively, one can also use the + * `fillCoarseFineBounary` to fill the ghost cells at the coarse/fine + * boundary only. Then one can manually call FillBoundary to fill the other + * ghost cells, and use the physical BC functor to handle the physical + * boundeary. + * + * The communication of the coarse data needed for spatial interpolation is + * optimized at the cost of being error-prone. One must follow the + * following guidelines. + * + * (1) This class is for filling data during time stepping, not during + * regrid. The fine level data passed as input must have the same BoxArray + * and DistributionMapping as the destination. It's OK they are the same + * MultiFab. For AmrLevel based codes, AmrLevel::FillPatcherFill wil try to + * use FillPatcher if it can, and AmrLevel::FillPatch will use the fillpatch + * functions. + * + * (2) When to build? It is recommended that one uses `std::unique_ptr` to + * store the FillPatcher object, and build it only when it is needed and + * it's a nullptr. For AmrLevel based codes, the AmrLevel class will build + * it for you as needed when you call the AmrLevel::FillPatcherFill + * function. + * + * (3) When to destroy? Usually, we do time steppig on a coarse level + * first. Then we recursively do time stepping on fine levels. After the + * finer level finishes, we do reflux and averge the fine data down to the + * coarse level. After that we should destroy the FillPatcher object + * associated with these two levels, because the coarse data stored in the + * object has become outdated. For AmrCore based codes, you could use + * Tests/Amr/Advection_AmrCore as an example. For AmrLevel based codes, you + * should do this in the post_timestep virtual function (see + * Tests/Amr/Advection_AmrLevel for an example). + * + * (4) The source MultiFabs/FabArrays (i.e., the crse_data and fine_data + * arguments of the fill function) need to have exactly the same number of + * components as the ncomp argument of the constructor, even though it's + * allowed to fill only some of the components with the fill function. + * + * (5) This only works for cell-centered and nodal data. + */ + +template +class FillPatcher +{ +public: + + /** + * \brief Constructor of FillPatcher + * + * \param fba fine level BoxArray + * \param fdm fine level DistributionMapping + * \param fgeom fine level Geometry + * \param cba coarse level BoxArray + * \param cdm coarse level DistributionMapping + * \param cgeom coarse level Geometry + * \param nghost max number of ghost cells to be filled at coarse/fine boundary + * \param ncomp the number of components + * \param interp for spatial interpolation + * \param eb_index_space optional argument for specifying EB IndexSpace + */ + FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, +#ifdef AMREX_USE_EB + EB2::IndexSpace const* eb_index_space = EB2::TopIndexSpaceIfPresent()); +#else + EB2::IndexSpace const* eb_index_space = nullptr); +#endif + + /** + * \brief Function to fill data + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param fine_data fine level data + * \param fine_time time associated with the fine data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param fbc for filling fine level physical BC + * \param fbccomp starting component of the fine level BC functor + * \param bcs BCRec specifying physical boundary types + * \parame bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fill (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, Vector const& crse_time, + Vector const& fine_data, Vector const& fine_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + + /** + * \brief Function to fill data at coarse/fine boundary only + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param bcs BCRec specifying physical boundary types + * \param bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, + Vector const& crse_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + +private: + + BoxArray m_fba; + BoxArray m_cba; + DistributionMapping m_fdm; + DistributionMapping m_cdm; + Geometry m_fgeom; + Geometry m_cgeom; + IntVect m_nghost; + int m_ncomp; + InterpBase* m_interp; + EB2::IndexSpace const* m_eb_index_space = nullptr; + Vector>> m_cf_crse_data; + std::unique_ptr m_cf_fine_data; +}; + +template +FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, + EB2::IndexSpace const* eb_index_space) + : m_fba(fba), + m_cba(cba), + m_fdm(fdm), + m_cdm(cdm), + m_fgeom(fgeom), + m_cgeom(cgeom), + m_nghost(nghost), + m_ncomp(ncomp), + m_interp(interp), + m_eb_index_space(eb_index_space) +{ + static_assert(IsFabArray::value, + "FillPatcher: MF must be FabArray type"); + AMREX_ALWAYS_ASSERT(m_fba.ixType().cellCentered() || m_fba.ixType().nodeCentered()); +} + +template +template +void +FillPatcher::fill (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, Vector const& ct, + Vector const& fmf, Vector const& ft, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fill()"); + + AMREX_ALWAYS_ASSERT(m_fba == fmf[0]->boxArray() && + m_fdm == fmf[0]->DistributionMap()); + + fillCoarseFineBoundary(mf, nghost, time, cmf, ct, scomp, dcomp, ncomp, + cbc, cbccomp, bcs, bcscomp, pre_interp, post_interp); + + FillPatchSingleLevel(mf, nghost, time, fmf, ft, scomp, dcomp, ncomp, + m_fgeom, fbc, fbccomp); +} + +template +template +void +FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, + Vector const& ct, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fillCFB"); + + AMREX_ALWAYS_ASSERT(nghost.allLE(m_nghost) && + m_fba == mf.boxArray() && + m_fdm == mf.DistributionMap() && + m_cba == cmf[0]->boxArray() && + m_cdm == cmf[0]->DistributionMap() && + m_ncomp >= ncomp && + m_ncomp == cmf[0]->nComp()); + + IntVect ratio; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); + } + AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),ratio)); + + const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(ratio); + const FabArrayBase::FPinfo& fpc = FabArrayBase::TheFPinfo(mf, mf, + m_nghost, + coarsener, + m_fgeom, + m_cgeom, + m_eb_index_space); + + if ( ! fpc.ba_crse_patch.empty()) + { + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique + (make_mf_fine_patch(fpc, m_ncomp)); + } + + int ncmfs = cmf.size(); + for (int icmf = 0; icmf < ncmfs; ++icmf) { + Real t = ct[icmf]; + auto it = std::find_if(m_cf_crse_data.begin(), m_cf_crse_data.end(), + [=] (auto const& x) { + return amrex::almostEqual(x.first,t,5); + }); + + if (it == std::end(m_cf_crse_data)) { + MF mf_crse_patch = make_mf_crse_patch(fpc, m_ncomp); + mf_crse_patch.ParallelCopy(*cmf[icmf], m_cgeom.periodicity()); + + std::pair> tmp; + tmp.first = t; + tmp.second = std::make_unique(std::move(mf_crse_patch)); + m_cf_crse_data.push_back(std::move(tmp)); + } + } + + MF mf_crse_patch; + if (m_cf_crse_data.size() > 0 && + amrex::almostEqual(time, m_cf_crse_data[0].first,5)) + { + mf_crse_patch = MF(*m_cf_crse_data[0].second, amrex::make_alias, + scomp, ncomp); + } + else if (m_cf_crse_data.size() > 1 && + amrex::almostEqual(time, m_cf_crse_data[1].first,5)) + { + mf_crse_patch = MF(*m_cf_crse_data[1].second, amrex::make_alias, + scomp, ncomp); + } + else if (m_cf_crse_data.size() == 2) + { + mf_crse_patch = make_mf_crse_patch(fpc, ncomp); + int const ng_space_interp = 8; // Need to be big enough + Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); + domain.convert(mf.ixType()); + Real t0 = m_cf_crse_data[0].first; + Real t1 = m_cf_crse_data[1].first; + Real alpha = (t1-time)/(t1-t0); + Real beta = (time-t0)/(t1-t0); + AMREX_ASSERT(alpha >= 0._rt && beta >= 0._rt); + auto const& a = mf_crse_patch.arrays(); + auto const& a0 = m_cf_crse_data[0].second->const_arrays(); + auto const& a1 = m_cf_crse_data[1].second->const_arrays(); + amrex::ParallelFor(mf_crse_patch, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + if (domain.contains(i,j,k)) { + a[bi](i,j,k,n) + = alpha*a0[bi](i,j,k,scomp+n) + + beta*a1[bi](i,j,k,scomp+n); + } + }); + Gpu::streamSynchronize(); + } + else + { + amrex::Abort("FillPatcher: High order interpolation in time not supported. Or FillPatcher was not properly deleted."); + } + + cbc(mf_crse_patch, 0, ncomp, nghost, time, cbccomp); + + pre_interp(mf_crse_patch, 0, ncomp); + + FillPatchInterp(*m_cf_fine_data, scomp, mf_crse_patch, 0, + ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),nghost), + ratio, m_interp, bcs, bcscomp); + + post_interp(*m_cf_fine_data, scomp, ncomp); + + mf.ParallelCopy(*m_cf_fine_data, scomp, dcomp, ncomp, IntVect{0}, nghost); + } +} + +} + +#endif diff --git a/Src/AmrCore/CMakeLists.txt b/Src/AmrCore/CMakeLists.txt index f9ff24f243b..be7c87eee4f 100644 --- a/Src/AmrCore/CMakeLists.txt +++ b/Src/AmrCore/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(amrex AMReX_FluxRegister.cpp AMReX_FillPatchUtil.H AMReX_FillPatchUtil_I.H + AMReX_FillPatcher.H AMReX_FluxRegister.H AMReX_InterpBase.H AMReX_InterpBase.cpp diff --git a/Src/AmrCore/Make.package b/Src/AmrCore/Make.package index 5b3afa61ccb..df3c2e83d40 100644 --- a/Src/AmrCore/Make.package +++ b/Src/AmrCore/Make.package @@ -6,6 +6,8 @@ CEXE_sources += AMReX_AmrCore.cpp AMReX_Cluster.cpp AMReX_ErrorList.cpp AMReX_Fi AMReX_Interpolater.cpp AMReX_MFInterpolater.cpp AMReX_TagBox.cpp AMReX_AmrMesh.cpp \ AMReX_InterpBase.cpp +CEXE_headers += AMReX_FillPatcher.H + CEXE_headers += AMReX_Interp_C.H AMReX_Interp_$(DIM)D_C.H CEXE_headers += AMReX_MFInterp_C.H AMReX_MFInterp_$(DIM)D_C.H diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index 3d3cda3674b..b678986c0e9 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -272,7 +272,7 @@ public: virtual ~FArrayBox () noexcept override {} FArrayBox (FArrayBox&& rhs) noexcept = default; - FArrayBox& operator= (FArrayBox&&) = default; + FArrayBox& operator= (FArrayBox&&) noexcept = default; FArrayBox (const FArrayBox&) = delete; FArrayBox& operator= (const FArrayBox&) = delete; diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 0e0a49f540e..890ec2e0f7e 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -420,9 +420,13 @@ public: const Box& src, Vector& out) const noexcept; + //! Return domain box with non-periodic directions grown by ngrow. + Box growNonPeriodicDomain (IntVect const& ngrow) const noexcept; //! Return domain box with non-periodic directions grown by ngrow. Box growNonPeriodicDomain (int ngrow) const noexcept; //! Return domain box with periodic directions grown by ngrow. + Box growPeriodicDomain (IntVect const& ngrow) const noexcept; + //! Return domain box with periodic directions grown by ngrow. Box growPeriodicDomain (int ngrow) const noexcept; //! Set periodicity flags and return the old flags. diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 2f80f2eb947..235c7bb7674 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -473,29 +473,41 @@ Geometry::periodicShift (const Box& target, } Box -Geometry::growNonPeriodicDomain (int ngrow) const noexcept +Geometry::growNonPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (!isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } Box -Geometry::growPeriodicDomain (int ngrow) const noexcept +Geometry::growPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } +Box +Geometry::growNonPeriodicDomain (int ngrow) const noexcept +{ + return growNonPeriodicDomain(IntVect(ngrow)); +} + +Box +Geometry::growPeriodicDomain (int ngrow) const noexcept +{ + return growPeriodicDomain(IntVect(ngrow)); +} + void Geometry::computeRoundoffDomain () { diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp index b5e48e6e409..4f97cbf3184 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp @@ -35,7 +35,8 @@ AmrCoreAdv::AdvancePhiAllLevels (Real time, Real dt_lev, int /*iteration*/) // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], phi_new[lev].nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_function); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp index 3ddd055eda0..7a5e1abbaa7 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp @@ -33,7 +33,8 @@ AmrCoreAdv::AdvancePhiAtLevel (int lev, Real time, Real dt_lev, int /*iteration* // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], S_new.nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_class); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H index e330d30e740..1b6832d8663 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef AMREX_USE_OMP # include @@ -98,15 +99,18 @@ private: // more flexible version of AverageDown() that lets you average down across multiple levels void AverageDownTo (int crse_lev); + enum class FillPatchType { fillpatch_class, fillpatch_function }; + // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) - void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); + void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, + int ncomp, FillPatchType fptype); // fill an entire multifab by interpolating from the coarser level // this comes into play when a new level of refinement appears void FillCoarsePatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); - // utility to copy in data from phi_old and/or phi_new into another multifab + // Pack pointers to phi_old and/or phi_new and associated times. void GetData (int lev, amrex::Real time, amrex::Vector& data, amrex::Vector& datatime); @@ -165,6 +169,9 @@ private: // used in the reflux operation amrex::Vector > flux_reg; + // This is for fillpatch during timestepping, but not for regridding. + amrex::Vector>> fillpatcher; + // Velocity on all faces at all levels amrex::Vector< amrex::Array > facevel; diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp index 62c9dc7417e..3300e4622cc 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -95,6 +94,10 @@ AmrCoreAdv::AmrCoreAdv () // with the lev/lev-1 interface (and has grid spacing associated with lev-1) // therefore flux_reg[0] is never actually used in the reflux operation flux_reg.resize(nlevs_max+1); + + // fillpatcher[lev] is for filling data on level lev using the data on + // lev-1 and lev. + fillpatcher.resize(nlevs_max+1); } AmrCoreAdv::~AmrCoreAdv () @@ -230,7 +233,8 @@ AmrCoreAdv::RemakeLevel (int lev, Real time, const BoxArray& ba, MultiFab new_state(ba, dm, ncomp, ng); MultiFab old_state(ba, dm, ncomp, ng); - FillPatch(lev, time, new_state, 0, ncomp); + // Must use fillpatch_function + FillPatch(lev, time, new_state, 0, ncomp, FillPatchType::fillpatch_function); std::swap(new_state, phi_new[lev]); std::swap(old_state, phi_old[lev]); @@ -257,6 +261,7 @@ AmrCoreAdv::ClearLevel (int lev) phi_new[lev].clear(); phi_old[lev].clear(); flux_reg[lev].reset(nullptr); + fillpatcher[lev].reset(nullptr); } // Make a new level from scratch using provided BoxArray and DistributionMapping. @@ -418,7 +423,8 @@ AmrCoreAdv::AverageDownTo (int crse_lev) // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) void -AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) +AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp, + FillPatchType fptype) { if (lev == 0) { @@ -450,16 +456,31 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) Interpolater* mapper = &cell_cons_interp; + if (fptype == FillPatchType::fillpatch_class) { + if (fillpatcher[lev] == nullptr) { + fillpatcher[lev] = std::make_unique> + (boxArray(lev ), DistributionMap(lev ), Geom(lev ), + boxArray(lev-1), DistributionMap(lev-1), Geom(lev-1), + mf.nGrowVect(), mf.nComp(), mapper); + } + } + if(Gpu::inLaunchRegion()) { GpuBndryFuncFab gpu_bndry_func(AmrCoreFill{}); PhysBCFunct > cphysbc(geom[lev-1],bcs,gpu_bndry_func); PhysBCFunct > fphysbc(geom[lev],bcs,gpu_bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } else { @@ -467,10 +488,16 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) PhysBCFunct cphysbc(geom[lev-1],bcs,bndry_func); PhysBCFunct fphysbc(geom[lev],bcs,bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } } } @@ -513,21 +540,18 @@ AmrCoreAdv::FillCoarsePatch (int lev, Real time, MultiFab& mf, int icomp, int nc } } -// utility to copy in data from phi_old and/or phi_new into another multifab void AmrCoreAdv::GetData (int lev, Real time, Vector& data, Vector& datatime) { data.clear(); datatime.clear(); - const Real teps = (t_new[lev] - t_old[lev]) * 1.e-3; - - if (time > t_new[lev] - teps && time < t_new[lev] + teps) + if (amrex::almostEqual(time, t_new[lev], 5)) { data.push_back(&phi_new[lev]); datatime.push_back(t_new[lev]); } - else if (time > t_old[lev] - teps && time < t_old[lev] + teps) + else if (amrex::almostEqual(time, t_old[lev], 5)) { data.push_back(&phi_old[lev]); datatime.push_back(t_old[lev]); @@ -631,6 +655,8 @@ AmrCoreAdv::timeStepWithSubcycling (int lev, Real time, int iteration) } AverageDownTo(lev); // average lev+1 down to lev + + fillpatcher[lev+1].reset(); // Because the data on lev have changed. } @@ -694,6 +720,10 @@ AmrCoreAdv::timeStepNoSubcycling (Real time, int iteration) // Make sure the coarser levels are consistent with the finer levels AverageDown (); + for (auto& fp : fillpatcher) { + fp.reset(); // Because the data have changed. + } + for (int lev = 0; lev <= finest_level; lev++) ++istep[lev]; diff --git a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package index e98f493727c..5254ff6f63f 100644 --- a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package +++ b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package @@ -1,3 +1,3 @@ CEXE_headers += Adv_K.H -CEXE_headers += compute_flux_K_$(DIM).H +CEXE_headers += compute_flux_$(DIM)D_K.H CEXE_headers += slope_K.H diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H index 1e5bacbc497..faf56357e29 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H @@ -231,7 +231,7 @@ protected: /* * The data. */ - amrex::FluxRegister* flux_reg; + std::unique_ptr flux_reg; /* * Static data members. diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp index db69749a85f..7fae3038f72 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp @@ -36,7 +36,6 @@ int AmrLevelAdv::do_tracers = 0; */ AmrLevelAdv::AmrLevelAdv () { - flux_reg = 0; } /** @@ -51,9 +50,9 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, : AmrLevel(papa,lev,level_geom,bl,dm,time) { - flux_reg = 0; - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -61,7 +60,6 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, */ AmrLevelAdv::~AmrLevelAdv () { - delete flux_reg; } /** @@ -74,9 +72,9 @@ AmrLevelAdv::restart (Amr& papa, { AmrLevel::restart(papa,is,bReadSpecial); - BL_ASSERT(flux_reg == 0); - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -88,11 +86,11 @@ AmrLevelAdv::checkPoint (const std::string& dir, VisMF::How how, bool dump_old) { - AmrLevel::checkPoint(dir, os, how, dump_old); + AmrLevel::checkPoint(dir, os, how, dump_old); #ifdef AMREX_PARTICLES - if (do_tracers && level == 0) { - TracerPC->WritePlotFile(dir, "Tracer"); - } + if (do_tracers && level == 0) { + TracerPC->WritePlotFile(dir, "Tracer"); + } #endif } @@ -285,7 +283,8 @@ AmrLevelAdv::advance (Real time, // State with ghost cells MultiFab Sborder(grids, dmap, NUM_STATE, NUM_GROW); - FillPatch(*this, Sborder, NUM_GROW, time, Phi_Type, 0, NUM_STATE); + // We use FillPatcher to do fillpatch here if we can + FillPatcherFill(Sborder, 0, NUM_STATE, NUM_GROW, time, Phi_Type, 0); // MF to hold the mac velocity MultiFab Umac[BL_SPACEDIM]; @@ -601,11 +600,19 @@ AmrLevelAdv::post_timestep (int iteration) // int finest_level = parent->finestLevel(); - if (do_reflux && level < finest_level) + if (do_reflux && level < finest_level) { reflux(); + } - if (level < finest_level) + if (level < finest_level) { avgDown(); + } + + if (level < finest_level) { + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); + } #ifdef AMREX_PARTICLES if (TracerPC) From 2d87a4c8ad5d375008ee9b1c23a50404fe0dfa21 Mon Sep 17 00:00:00 2001 From: Brandon Runnels Date: Mon, 10 Oct 2022 09:49:29 -0600 Subject: [PATCH 088/111] add templating for the cell bilinear interpolators (#2979) This templates the `mf_cell_bilin_interp` functions so that the interpolators can be used with `BaseFab`s of arbitrary type. --- Src/AmrCore/AMReX_MFInterp_1D_C.H | 5 +++-- Src/AmrCore/AMReX_MFInterp_2D_C.H | 5 +++-- Src/AmrCore/AMReX_MFInterp_3D_C.H | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Src/AmrCore/AMReX_MFInterp_1D_C.H b/Src/AmrCore/AMReX_MFInterp_1D_C.H index 37751acc3b9..8fcadec5794 100644 --- a/Src/AmrCore/AMReX_MFInterp_1D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_1D_C.H @@ -149,9 +149,10 @@ void mf_cell_cons_lin_interp_sph (int i, int ns, Array4 const& fine, int f + xoff * slope(ic,0,0,ns); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int ioff = i - ic*ratio[0]; diff --git a/Src/AmrCore/AMReX_MFInterp_2D_C.H b/Src/AmrCore/AMReX_MFInterp_2D_C.H index c505ef2655c..e02084e2e8e 100644 --- a/Src/AmrCore/AMReX_MFInterp_2D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_2D_C.H @@ -189,9 +189,10 @@ void mf_cell_cons_lin_interp_rz (int i, int j, int ns, Array4 const& fine, + yoff * slope(ic,jc,0,ns+ncomp); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); diff --git a/Src/AmrCore/AMReX_MFInterp_3D_C.H b/Src/AmrCore/AMReX_MFInterp_3D_C.H index dc0da5dba40..17d14ff689b 100644 --- a/Src/AmrCore/AMReX_MFInterp_3D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_3D_C.H @@ -128,9 +128,10 @@ void mf_cell_cons_lin_interp (int i, int j, int k, int ns, Array4 const& f + zoff * slope(ic,jc,kc,ns+ncomp*2); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); From 0019b3a41065caf6d9486000b9c6fbf86ad9837e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 11 Oct 2022 11:00:13 -0700 Subject: [PATCH 089/111] MLLinOp::postSolve (#2981) Add a virtual function MLLinOp::postSolve. This allows WarpX to set EB covered nodes to prescribed values in the solver's output for visualization purpose. --- .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 2 ++ .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 35 +++++++++++++++++++ Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 2 ++ Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp | 3 ++ Src/LinearSolvers/MLMG/AMReX_MLMG.cpp | 2 ++ 5 files changed, 44 insertions(+) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 41190f229a8..1c074ff115b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -119,6 +119,8 @@ public: Array4 const& bfab) const override; #endif + virtual void postSolve (Vector& sol) const override; + private: GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; Real m_s_phi_eb = std::numeric_limits::lowest(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index 62a7c3af282..fe32603e995 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -655,4 +655,39 @@ MLEBNodeFDLaplacian::fillRHS (MFIter const& /*mfi*/, Array4 const& /* } #endif +void +MLEBNodeFDLaplacian::postSolve (Vector& sol) const +{ +#ifdef AMREX_USE_EB + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + const auto phieb = m_s_phi_eb; + auto factory = dynamic_cast(m_factory[amrlev][0].get()); + auto const& levset_mf = factory->getLevelSet(); + auto const& levset_ar = levset_mf.const_arrays(); + MultiFab& mf = sol[amrlev].get(); + auto const& sol_ar = mf.arrays(); + if (phieb == std::numeric_limits::lowest()) { + auto const& phieb_ar = m_phi_eb[amrlev].const_arrays(); + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb_ar[bi](i,j,k); + } + }); + } else { + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb; + } + }); + } + } +#else + amrex::ignore_unused(sol); +#endif +} + } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index f7096b93778..09d835d8b86 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -330,6 +330,8 @@ public: virtual void AnyAverageDownAndSync (Vector& sol) const = 0; + virtual void postSolve (Vector& sol) const; + Real MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const; bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp index 5f71895320d..e53ed376d97 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp @@ -1159,6 +1159,9 @@ MLLinOp::AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const interpAssign(amrlev, fmglev, fine.get(), crse.get()); } +void +MLLinOp::postSolve (Vector& /* sol */) const {} + bool MLLinOp::isMFIterSafe (int amrlev, int mglev1, int mglev2) const { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index a4ab5762777..28c833397b4 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -218,6 +218,8 @@ MLMG::solve (Vector& a_sol, const Vector& a_rhs, timer[iter_time] = amrex::second() - iter_start_time; } + linop.postSolve(sol); + IntVect ng_back = final_fill_bc ? IntVect(1) : IntVect(0); if (linop.hasHiddenDimension()) { ng_back[linop.hiddenDirection()] = 0; From 53e34d17913cc76bdd4bbaad1582dd1b04058914 Mon Sep 17 00:00:00 2001 From: Andy Nonaka Date: Tue, 11 Oct 2022 12:00:34 -0700 Subject: [PATCH 090/111] fix docs; Robin BC's for MLMG (#2982) Update the MLMG Robin BC description in the docs. --- .../source/LinearSolvers.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index c8743a3e8e2..a1df8760f3a 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -222,6 +222,8 @@ The supported BC types at the physical domain boundaries are - :cpp:`LinOpBCType::inhomogNeumann` for inhomogeneous Neumann boundary condition. +- :cpp:`LinOpBCType::Robin` for Robin boundary conditions, :math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + - :cpp:`LinOpBCType::reflect_odd` for reflection with sign changed. 2) Cell-centered solvers only: @@ -255,12 +257,12 @@ before the solve one must always call the :cpp:`MLLinOp` member function :: virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a, - const MultiFab* robinbc_b, - const MultiFab* robinbc_f) = 0; + const MultiFab* robinbc_a = nullptr, + const MultiFab* robinbc_b = nullptr, + const MultiFab* robinbc_f = nullptr) = 0; -If we want to supply an inhomogeneous Dirichlet, inhomogeneous Neumann, or -Robin boundary conditions at the domain boundaries, we must supply those values +If we want to supply an inhomogeneous Dirichlet or inhomogeneous Neumann +boundary condition at the domain boundaries, we must supply those values in ``MultiFab* levelbcdata``, which must have at least one ghost cell. Note that the argument :cpp:`amrlev` is relative to the solve, not necessarily the full AMR hierarchy; amrlev = 0 refers to the coarsest @@ -286,6 +288,11 @@ Dirichlet or Neumann boundaries are assumed to be exactly on the face of the physical domain; storing these values in the ghost cell of a cell-centered array is a convenience of implementation. +For Robin boundary conditions, the ghost cells in +``MultiFab* robinbc_a``, ``MultiFab* robinbc_b``, and ``MultiFab* robinbc_f`` +store the numerical values in the condition, +:math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + .. _sec:linearsolver:pars: Parameters From 5acfe07a830305cc7cbafd1e5dd26e3c3598435b Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 11 Oct 2022 14:51:48 -0700 Subject: [PATCH 091/111] MFIter::Finalize (#2983) Add a Finalize function to MFIter. The idea about this is, that we can call this already before destruction in Python, where `for` loops do not create scope. This function must be robust enough to be called again in the constructor (or we need to add an extra bool to guard that it is not called again in the destructor). Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_MFIter.H | 2 ++ Src/Base/AMReX_MFIter.cpp | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/Src/Base/AMReX_MFIter.H b/Src/Base/AMReX_MFIter.H index eb259ac7b6d..9e968aa1d8b 100644 --- a/Src/Base/AMReX_MFIter.H +++ b/Src/Base/AMReX_MFIter.H @@ -180,6 +180,7 @@ protected: IndexType typ; bool dynamic; + bool finalized = false; struct DeviceSync { DeviceSync () = default; @@ -201,6 +202,7 @@ protected: static AMREX_EXPORT int allow_multiple_mfiters; void Initialize (); + void Finalize (); }; //! Is it safe to have these two MultiFabs in the same MFiter? diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index e8a97256d3d..4cd9832b747 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -209,6 +209,15 @@ MFIter::MFIter (const FabArrayBase& fabarray_, const MFItInfo& info) MFIter::~MFIter () { + Finalize(); +} + +void +MFIter::Finalize () +{ + if (finalized) return; + finalized = true; + #ifdef AMREX_USE_OMP #pragma omp master #endif From ed1ecd62acb3fd7d39b8a23aa4e9ad09669741bb Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 12 Oct 2022 08:46:34 -0700 Subject: [PATCH 092/111] MFIter: Make Finalize Public (#2985) Follow-up to #2983 --- Src/Base/AMReX_MFIter.H | 3 ++- Src/Base/AMReX_MFIter.cpp | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_MFIter.H b/Src/Base/AMReX_MFIter.H index 9e968aa1d8b..9c01e38b138 100644 --- a/Src/Base/AMReX_MFIter.H +++ b/Src/Base/AMReX_MFIter.H @@ -164,6 +164,8 @@ public: static int allowMultipleMFIters (int allow); + void Finalize (); + protected: std::unique_ptr m_fa; //!< This must be the first member! @@ -202,7 +204,6 @@ protected: static AMREX_EXPORT int allow_multiple_mfiters; void Initialize (); - void Finalize (); }; //! Is it safe to have these two MultiFabs in the same MFiter? diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index 4cd9832b747..76e27b4c0e3 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -215,9 +215,13 @@ MFIter::~MFIter () void MFIter::Finalize () { + // avoid double finalize if (finalized) return; finalized = true; + // mark as invalid + currentIndex = endIndex; + #ifdef AMREX_USE_OMP #pragma omp master #endif From f84c7a8f77d6f80f6f8ba4ee9161ee5a73a839a5 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 12 Oct 2022 10:44:11 -0700 Subject: [PATCH 093/111] Fix MLMG::getGradSolution & getFluxes for inhomogeneous Neumann and Robin BC (#2984) Because of the way how inhomogeneous and Robin BC are handled, we must add the inhomogeneous fluxes back, otherwise they would be zero at those boundaries. --- Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H | 4 + .../MLMG/AMReX_MLCellABecLap.cpp | 111 ++++++++++++++++++ Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 5 + Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp | 2 + 4 files changed, 122 insertions(+) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 8849a2be292..0cc6456b7c8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -61,6 +61,10 @@ public: virtual void applyInhomogNeumannTerm (int amrlev, Any& rhs) const final override; + virtual void addInhomogNeumannFlux ( + int amrlev, const Array& grad, + MultiFab const& sol, bool mult_bcoef) const final override; + virtual void applyOverset (int amlev, Any& rhs) const override; #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp index af094d89406..db57162c21f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp @@ -189,6 +189,7 @@ MLCellABecLap::getFluxes (const Vector >& a_flux a_flux[alev][idim]->mult(betainv); } } + addInhomogNeumannFlux(alev, a_flux[alev], *a_sol[alev], true); } } @@ -416,6 +417,116 @@ MLCellABecLap::applyInhomogNeumannTerm (int amrlev, Any& a_rhs) const } } +void +MLCellABecLap::addInhomogNeumannFlux ( + int amrlev, const Array& grad, MultiFab const& sol, + bool mult_bcoef) const +{ + /* + * if (mult_bcoef == true) + * grad is -bceof*grad phi + * else + * grad is grad phi + */ + Real fac = mult_bcoef ? Real(-1.0) : Real(1.0); + + bool has_inhomog_neumann = hasInhomogNeumannBC(); + bool has_robin = hasRobinBC(); + + if (!has_inhomog_neumann && !has_robin) return; + + int ncomp = getNComp(); + const int mglev = 0; + + const auto dxinv = m_geom[amrlev][mglev].InvCellSize(); + const Box domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + + Array bcoef = {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (mult_bcoef) { + bcoef = getBCoeffs(amrlev,mglev); + } + + const auto& bndry = *m_bndry_sol[amrlev]; + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter orit; orit.isValid(); ++orit) { + const Orientation ori = orit(); + const int idim = ori.coordDir(); + const Box& ccb = amrex::adjCell(vbx, ori); + const Dim3 os = IntVect::TheDimensionVector(idim).dim3(); + const Real dxi = dxinv[idim]; + if (! domain.contains(ccb)) { + for (int icomp = 0; icomp < ncomp; ++icomp) { + auto const& phi = sol.const_array(mfi,icomp); + auto const bv = bndry.bndryValues(ori).multiFab().const_array(mfi,icomp); + auto const bc = bcoef[idim] ? bcoef[idim]->const_array(mfi,icomp) + : Array4{}; + auto const& f = grad[idim]->array(mfi,icomp); + if (ori.isLow()) { + if (m_lobc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*bv(i,j,k); + }); + } else if (m_lobc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*dxi*((Real(1.0)-RB)*phi(ii,jj,kk)-RA); + }); + } + } else { + if (m_hibc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*bv(i,j,k); + }); + } else if (m_hibc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*dxi*(RA+(RB-Real(1.0))* + phi(i-os.x,j-os.y,k-os.z)); + }); + } + } + } + } + } + } +} + + void MLCellABecLap::applyOverset (int amrlev, Any& a_rhs) const { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index 457f7565df3..9a6bb222113 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -164,6 +164,11 @@ public: virtual void AnyAverageDownAndSync (Vector& sol) const override; + virtual void addInhomogNeumannFlux (int /*amrlev*/, + const Array& /*grad*/, + MultiFab const& /*sol*/, + bool /*mult_bcoef*/) const {} + struct BCTL { BoundCond type; Real location; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp index e4c9cef953f..5c8edcbb1a6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp @@ -938,6 +938,8 @@ MLCellLinOp::compGrad (int amrlev, const Array& grad, }); #endif } + + addInhomogNeumannFlux(amrlev, grad, sol, false); } void From 9c2264bb5ff60b353250b3654866aef06f93bdcc Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 14 Oct 2022 07:41:06 -0700 Subject: [PATCH 094/111] `MFIter::Finalize`: Free `m_fa` (#2988) This `free` should potentially not be delayed until the destructor is called. Follow-up to #2985 #2983 --- Src/Base/AMReX_MFIter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index 76e27b4c0e3..c761c466449 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -250,6 +250,9 @@ MFIter::Finalize () #endif m_fa->clearThisBD(); } + if (m_fa) { + m_fa.reset(nullptr); + } } void From 975b830a012e4677d070b46d2f92353c117ad65a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Oct 2022 09:53:22 -0700 Subject: [PATCH 095/111] Fix EB data inconsistency when fixing small cells and multiple cuts (#2943) ## Summary For consistency, we need to call the function that zeros out the level set even if that box does not have any small cells or multiple cuts. This is because a node could exist in multiple boxes. Furthermore, a covered cell or covered face may have a node with a level set < 0. ## Additional background This is usually not an issue. However, in WarpX, we use the level set to decide whether a node is an unknown in the linear system. The inconsistency makes the solver fail in some cases. --- Src/EB/AMReX_EB2_3D_C.cpp | 155 +++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 8c8b1e6ed7e..767626eb9e9 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -853,90 +853,89 @@ void build_cells (Box const& bx, Array4 const& cell, nsmallcells += hp[0]; nmulticuts += hp[1]; + Box const& nbxg1 = amrex::surroundingNodes(bxg1); + Box const& bxg1x = amrex::surroundingNodes(bxg1,0); + Box const& bxg1y = amrex::surroundingNodes(bxg1,1); + Box const& bxg1z = amrex::surroundingNodes(bxg1,2); + AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, + { + if (levset(i,j,k) < Real(0.0)) { + bool zero_levset = false; + if (bxg1.contains(i-1,j-1,k-1) + && cell(i-1,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k-1) + && cell(i ,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k-1) + && cell(i-1,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k-1) + && cell(i ,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j-1,k ) + && cell(i-1,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k ) + && cell(i ,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k ) + && cell(i-1,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k ) + && cell(i ,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k-1) + && fx(i ,j-1,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k-1) + && fx(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k ) + && fx(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k ) + && fx(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k-1) + && fy(i-1,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k-1) + && fy(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k ) + && fy(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k ) + && fy(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j-1,k ) + && fz(i-1,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j-1,k ) + && fz(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j ,k ) + && fz(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j ,k ) + && fz(i ,j ,k ) == Type::covered) { + zero_levset = true; + } + if (zero_levset) { + levset(i,j,k) = Real(0.0); + } + } + }); + if (nsmallcells > 0 || nmulticuts > 0) { if (!cover_multiple_cuts && nmulticuts > 0) { amrex::Abort("amrex::EB2::build_cells: multi-cuts not supported"); } - Box const& nbxg1 = amrex::surroundingNodes(bxg1); - Box const& bxg1x = amrex::surroundingNodes(bxg1,0); - Box const& bxg1y = amrex::surroundingNodes(bxg1,1); - Box const& bxg1z = amrex::surroundingNodes(bxg1,2); - AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, - { - if (levset(i,j,k) < Real(0.0)) { - bool zero_levset = false; - if (bxg1.contains(i-1,j-1,k-1) - && cell(i-1,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k-1) - && cell(i ,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k-1) - && cell(i-1,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k-1) - && cell(i ,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j-1,k ) - && cell(i-1,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k ) - && cell(i ,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k ) - && cell(i-1,j ,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k ) - && cell(i ,j ,k ).isCovered()) { - zero_levset = true; - } else if (cover_multiple_cuts) { - if (bxg1x.contains(i ,j-1,k-1) - && fx(i ,j-1,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k-1) - && fx(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j-1,k ) - && fx(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k ) - && fx(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k-1) - && fy(i-1,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k-1) - && fy(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k ) - && fy(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k ) - && fy(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j-1,k ) - && fz(i-1,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j-1,k ) - && fz(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j ,k ) - && fz(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j ,k ) - && fz(i ,j ,k ) == Type::covered) { - zero_levset = true; - } - } - if (zero_levset) { - levset(i,j,k) = Real(0.0); - } - } - }); return; + } else { + set_connection_flags(bx, bxg1, cell, ctmp, fx, fy, fz); } - - set_connection_flags(bx, bxg1, cell, ctmp, fx, fy, fz); } void set_connection_flags (Box const& bx, From c841ae81ddd519c088b29523aa71b6b280da440e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Oct 2022 10:03:34 -0700 Subject: [PATCH 096/111] Fourth-order interpolation from fine to coarse level (#2987) For fourth-order finite-difference methods with data at cell centers, we cannot use the usual averageDown function to overwrite coarse level data with fine data. We actually need to do interpolation. --- Src/Base/AMReX_MultiFabUtil.H | 17 ++ Src/Base/AMReX_MultiFabUtil.cpp | 315 ++++++++++++++++++++------------ 2 files changed, 218 insertions(+), 114 deletions(-) diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 009f7a8f110..21f89c8ed6c 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -243,6 +243,23 @@ namespace amrex Vector const& geom, Vector const& ratio, bool local = false); + + /** + * \brief Fourth-order interpolation from fine to coarse level. + * + * This is for high-order "average-down" of finite-difference data. If + * ghost cell data are used, it's the caller's responsibility to fill + * the ghost cells before calling this function. + * + * \param cmf coarse data + * \param scomp starting component + * \param ncomp number of component + * \param fmf fine data + * \param ratio refinement ratio. + */ + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio); } namespace amrex { diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 6b0768ba649..3ae4aa91b9f 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -1227,157 +1227,244 @@ namespace amrex return hv; } - Real volumeWeightedSum (Vector const& mf, int icomp, - Vector const& geom, - Vector const& ratio, - bool local) - { - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local) + { + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); #ifdef AMREX_USE_EB - bool has_eb = !(mf[0]->isAllRegular()); + bool has_eb = !(mf[0]->isAllRegular()); #endif - int nlevels = mf.size(); - for (int ilev = 0; ilev < nlevels-1; ++ilev) { - iMultiFab mask = makeFineMask(*mf[ilev], *mf[ilev+1], IntVect(0), - ratio[ilev],Periodicity::NonPeriodic(), - 0, 1); - auto const& m = mask.const_arrays(); - auto const& a = mf[ilev]->const_arrays(); - auto const dx = geom[ilev].CellSizeArray(); - Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); -#ifdef AMREX_USE_EB - if (has_eb) { - AMREX_ASSERT(mf[ilev]->hasEBFabFactory()); - auto const& f = dynamic_cast - (mf[ilev]->Factory()); - auto const& vfrac = f.getVolFrac(); - auto const& va = vfrac.const_arrays(); - reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - -> Real - { - return m[box_no](i,j,k) ? Real(0.) - : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); - }); - } else -#endif - { -#if (AMREX_SPACEDIM == 1) - if (geom[ilev].IsSPHERICAL()) { - const auto rlo = geom[ilev].CellSize(0); - reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) - noexcept -> Real - { - if (m[box_no](i,j,k)) { - return Real(0.); - } else { - constexpr Real pi = Real(3.1415926535897932); - Real ri = rlo + dx[0]*i; - Real ro = ri + dx[0]; - return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) - * a[box_no](i,j,k,icomp); - } - }); - } else -#elif (AMREX_SPACEDIM == 2) - if (geom[ilev].IsRZ()) { - const auto rlo = geom[ilev].CellSize(0); - reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) - noexcept -> Real - { - if (m[box_no](i,j,k)) { - return Real(0.); - } else { - Real ri = rlo + dx[0]*i; - Real ro = ri + dx[0]; - constexpr Real pi = Real(3.1415926535897932); - return pi*dx[1]*dx[0]*(ro+ri) - * a[box_no](i,j,k,icomp); - } - }); - } else -#endif - { - reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) - noexcept -> Real - { - return m[box_no](i,j,k) ? Real(0.) - : dv*a[box_no](i,j,k,icomp); - }); - } - } - Gpu::streamSynchronize(); - } - - auto const& a = mf.back()->const_arrays(); - auto const dx = geom[nlevels-1].CellSizeArray(); + int nlevels = mf.size(); + for (int ilev = 0; ilev < nlevels-1; ++ilev) { + iMultiFab mask = makeFineMask(*mf[ilev], *mf[ilev+1], IntVect(0), + ratio[ilev],Periodicity::NonPeriodic(), + 0, 1); + auto const& m = mask.const_arrays(); + auto const& a = mf[ilev]->const_arrays(); + auto const dx = geom[ilev].CellSizeArray(); Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); #ifdef AMREX_USE_EB if (has_eb) { - AMREX_ASSERT(mf.back()->hasEBFabFactory()); + AMREX_ASSERT(mf[ilev]->hasEBFabFactory()); auto const& f = dynamic_cast - (mf.back()->Factory()); + (mf[ilev]->Factory()); auto const& vfrac = f.getVolFrac(); auto const& va = vfrac.const_arrays(); - reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> Real { - return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); }); } else #endif { #if (AMREX_SPACEDIM == 1) - if (geom[nlevels-1].IsSPHERICAL()) { - const auto rlo = geom[nlevels-1].CellSize(0); - reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + if (geom[ilev].IsSPHERICAL()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> Real { - constexpr Real pi = Real(3.1415926535897932); - Real ri = rlo + dx[0]*i; - Real ro = ri + dx[0]; - return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) - * a[box_no](i,j,k,icomp); + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + } }); } else #elif (AMREX_SPACEDIM == 2) - if (geom[nlevels-1].IsRZ()) { - const auto rlo = geom[nlevels-1].CellSize(0); - reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + if (geom[ilev].IsRZ()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> Real { - Real ri = rlo + dx[0]*i; - Real ro = ri + dx[0]; - constexpr Real pi = Real(3.1415926535897932); - return pi*dx[1]*dx[0]*(ro+ri) - * a[box_no](i,j,k,icomp); + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + } }); } else #endif { - reduce_op.eval(*mf.back(), IntVect(0), reduce_data, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real { - return dv*a[box_no](i,j,k,icomp); + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp); }); } } + Gpu::streamSynchronize(); + } + + auto const& a = mf.back()->const_arrays(); + auto const dx = geom[nlevels-1].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf.back()->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf.back()->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[nlevels-1].IsSPHERICAL()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[nlevels-1].IsRZ()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + }); + } else +#endif + { + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + return dv*a[box_no](i,j,k,icomp); + }); + } + } + + auto const& hv = reduce_data.value(reduce_op); + Real r = amrex::get<0>(hv); + + if (!local) { + ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub()); + } + return r; + } + + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio) + { + AMREX_ASSERT(AMREX_D_TERM( (ratio[0] == 2 || ratio[0] == 4), + && (ratio[1] == 2 || ratio[1] == 4), + && (ratio[2] == 2 || ratio[2] == 4))); + + MultiFab tmp(amrex::coarsen(fmf.boxArray(), ratio), fmf.DistributionMap(), + ncomp, 0); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { +#if (AMREX_SPACEDIM > 1) + FArrayBox xtmp; +#if (AMREX_SPACEDIM > 2) + FArrayBox ytmp; +#endif +#endif + for (MFIter mfi(tmp,TilingIfNotGPU()); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fa = fmf.const_array(mfi,scomp); - auto const& hv = reduce_data.value(reduce_op); - Real r = amrex::get<0>(hv); + Box xbx = bx; +#if (AMREX_SPACEDIM == 1) + auto const& xa = tmp.array(mfi); +#else + xbx.refine(IntVect(AMREX_D_DECL(1,ratio[1],ratio[2]))); + if (ratio[1] == 2) { xbx.grow(1,1); } +#if (AMREX_SPACEDIM == 3) + if (ratio[2] == 2) { xbx.grow(2,1); } +#endif + xtmp.resize(xbx,ncomp); + Elixir eli = xtmp.elixir(); + auto const& xa = xtmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(xbx, ncomp, i, j, k, n, + { + int ii = 2*i; + xa(i,j,k,n) = Real(1./16)*(Real(9.)*(fa(ii ,j,k,n) + + fa(ii+1,j,k,n)) + - fa(ii-1,j,k,n) + - fa(ii+2,j,k,n)); + }); - if (!local) { - ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub()); +#if (AMREX_SPACEDIM > 1) + Box ybx = bx; + auto const& xca = xtmp.const_array(); +#if (AMREX_SPACEDIM == 2) + auto const& ya = tmp.array(mfi); +#else + ybx.refine(IntVect(AMREX_D_DECL(1,1,ratio[2]))); + if (ratio[2] == 2) { ybx.grow(2,1); } + ytmp.resize(ybx,ncomp); + eli.append(ytmp.elixir()); + auto const& ya = ytmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(ybx, ncomp, i, j, k, n, + { + int jj = 2*j; + ya(i,j,k,n) = Real(1./16)*(Real(9.)*(xca(i,jj ,k,n) + + xca(i,jj+1,k,n)) + - xca(i,jj-1,k,n) + - xca(i,jj+2,k,n)); + }); + +#if (AMREX_SPACEDIM == 3) + auto const& yca = ytmp.const_array(); + auto const& ca = tmp.array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, + { + int kk = 2*k; + ca(i,j,k,n) = Real(1./16)*(Real(9.)*(yca(i,j,kk ,n) + + yca(i,j,kk+1,n)) + - yca(i,j,kk-1,n) + - yca(i,j,kk+2,n)); + }); +#endif +#endif } - return r; } + + cmf.ParallelCopy(tmp, 0, scomp, ncomp); + } } From 1ad4144668b0656d42950be92936073c64c56db7 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Oct 2022 10:36:17 -0700 Subject: [PATCH 097/111] Runge-Kutta support for AMR (#2974) This adds RK2, RK3 and RK4 in a new namespace RungeKutta. Together with the enhanced FillPatcher class, these functions can be used for RK time stepping in AMR simulations. A new function AmrLevel::RK is added for AmrLevel based codes. See CNS::advance in Tests/GPU/CNS/CNS_advance.cpp for an example of using the new AmrLevel::RK function. The main motivation for this PR is that ghost cell filling for high order (> 2) RK methods at coarse/fine boundary is non-trivial when there is subcycling. Co-authored-by: Jean M. Sexton --- Src/Amr/AMReX_AmrLevel.H | 113 ++++++- Src/Amr/AMReX_AmrLevel.cpp | 27 ++ Src/AmrCore/AMReX_FillPatcher.H | 275 +++++++++++++++- Src/Base/AMReX_RungeKutta.H | 293 ++++++++++++++++++ Src/Base/CMakeLists.txt | 1 + Src/Base/Make.package | 2 +- Tests/GPU/CNS/Source/CNS.H | 2 + Tests/GPU/CNS/Source/CNS.cpp | 5 + Tests/GPU/CNS/Source/CNS_advance.cpp | 34 +- .../CNS/Source/diffusion/CNS_diffusion_K.H | 20 +- 10 files changed, 714 insertions(+), 58 deletions(-) create mode 100644 Src/Base/AMReX_RungeKutta.H diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index cca2e9776cd..5034df1b5e5 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -15,6 +15,7 @@ #include #include #include +#include #include #ifdef AMREX_USE_EB #include @@ -153,11 +154,10 @@ public: int ncycle) = 0; /** - * \brief Contains operations to be done after a timestep. This is a - * pure virtual function and hence MUST be implemented by derived - * classes. + * \brief Contains operations to be done after a timestep. If this + * function is overridden, don't forget to reset FillPatcher. */ - virtual void post_timestep (int iteration) = 0; + virtual void post_timestep (int iteration); /** * \brief Contains operations to be done only after a full coarse * timestep. The default implementation does nothing. @@ -397,8 +397,33 @@ public: Real time, int index, int scomp, - int ncomp, - int dcomp=0); + int ncomp, + int dcomp=0); + + /** + * \brief Evolve one step with Runge-Kutta (2, 3, or 4) + * + * To use RK, the StateData must have all the ghost cells needed. See + * namespace RungeKutta for expected function signatures of the callable + * parameters. + * + * \param order order of RK + * \param state_type index of StateData + * \param time time at the beginning of the step. + * \param dt time step + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param f computing right-hand side for evolving the StateData. + * One can also register data for flux registers in this. + * \param p optionally post-processing RK stage results + */ + template + void RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p = RungeKutta::PostStageNoOp()); #ifdef AMREX_USE_EB static void SetEBMaxGrowCells (int nbasic, int nvolume, int nfull) noexcept { @@ -457,6 +482,14 @@ protected: private: + template + void storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk); + + void FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle); + mutable BoxArray edge_grids[AMREX_SPACEDIM]; // face-centered grids mutable BoxArray nodal_grids; // all nodal grids }; @@ -577,6 +610,74 @@ private: std::map< int,Vector< Vector< Vector > > > m_fbid; // [grid][level][fillablesubbox][oldnew] }; +template +void AmrLevel::RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p) +{ + BL_PROFILE("AmrLevel::RK()"); + + AMREX_ASSERT(AmrLevel::desc_lst[state_type].nExtra() > 0); // Need ghost cells in StateData + + MultiFab& S_old = get_old_data(state_type); + MultiFab& S_new = get_new_data(state_type); + const Real t_old = state[state_type].prevTime(); + const Real t_new = state[state_type].curTime(); + AMREX_ALWAYS_ASSERT(amrex::almostEqual(time,t_old) && amrex::almostEqual(time+dt,t_new)); + + if (order == 2) { + RungeKutta::RK2(S_old, S_new, time, dt, std::forward(f), + [&] (int /*stage*/, MultiFab& mf, Real t) { + FillPatcherFill(mf, 0, mf.nComp(), mf.nGrow(), t, + state_type, 0); }, + std::forward

    (p)); + } else if (order == 3) { + RungeKutta::RK3(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

    (p)); + } else if (order == 4) { + RungeKutta::RK4(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

    (p)); + } else { + amrex::Abort("AmrLevel::RK: order = "+std::to_string(order)+" is not supported"); + } +} + +template +void AmrLevel::storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk) +{ + if (level == parent->finestLevel()) { return; } + + const StateDescriptor& desc = AmrLevel::desc_lst[state_type]; + + auto& fillpatcher = parent->getLevel(level+1).m_fillpatcher[state_type]; + fillpatcher = std::make_unique> + (parent->boxArray(level+1), parent->DistributionMap(level+1), + parent->Geom(level+1), + parent->boxArray(level), parent->DistributionMap(level), + parent->Geom(level), + IntVect(desc.nExtra()), desc.nComp(), desc.interp(0)); + + fillpatcher->storeRKCoarseData(time, dt, S_old, rkk); +} + + } #endif /*_AmrLevel_H_*/ diff --git a/Src/Amr/AMReX_AmrLevel.cpp b/Src/Amr/AMReX_AmrLevel.cpp index fbeba917255..c10a1e6277b 100644 --- a/Src/Amr/AMReX_AmrLevel.cpp +++ b/Src/Amr/AMReX_AmrLevel.cpp @@ -31,6 +31,14 @@ EBSupport AmrLevel::m_eb_support_level = EBSupport::volume; DescriptorList AmrLevel::desc_lst; DeriveList AmrLevel::derive_lst; +void +AmrLevel::post_timestep (int /*iteration*/) +{ + if (level < parent->finestLevel()) { + parent->getLevel(level+1).resetFillPatcher(); + } +} + void AmrLevel::postCoarseTimeStep (Real time) { @@ -2223,4 +2231,23 @@ AmrLevel::CreateLevelDirectory (const std::string &dir) levelDirectoryCreated = true; } +void +AmrLevel::FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle) +{ + StateDataPhysBCFunct physbcf(state[state_index], 0, geom); + + if (level == 0) { + S.FillBoundary(geom.periodicity()); + physbcf(S, 0, S.nComp(), S.nGrowVect(), time, 0); + } else { + auto& crse_level = parent->getLevel(level-1); + StateDataPhysBCFunct physbcf_crse(crse_level.state[state_index], 0, + crse_level.geom); + auto& fillpatcher = m_fillpatcher[state_index]; + fillpatcher->fillRK(stage, iteration, ncycle, S, time, physbcf_crse, + physbcf, AmrLevel::desc_lst[state_index].getBCs()); + } +} + } diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index 41ed75318c6..22b14d35c0d 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -55,6 +55,17 @@ namespace amrex { * allowed to fill only some of the components with the fill function. * * (5) This only works for cell-centered and nodal data. + * + * This class also provides support for RungeKutta::RK3 and RungeKutta::RK4. + * The storeRKCoarseData function can be used to store coarse AMR level + * data that are needed for filling fine level data's ghost cells in this + * class. The `fillRK` function can be used to fill ghost cells for fine + * AMR levels. This operation at the coarse/fine boundary is non-trivial + * for RK orders higher than 2. Note that it is expected that time stepping + * on the coarse level is perform before any fine level time stepping, and + * it's the user's reponsibility to properly create and destroy this object. + * See AmrLevel::RK for an example of using the RungeKutta functions and + * FillPatcher together. */ template @@ -153,6 +164,37 @@ public: PreInterpHook const& pre_interp = {}, PostInterpHook const& post_interp = {}); + /** + * \brief Store coarse AMR level data for RK3 and RK4 + * + * \tparam order RK order. Must be 3 or 4. + * \param time time at the beginning of the step + * \param dt time step + * \param S_old data at time + * \param RK_k right-hand side at RK stages + */ + template + void storeRKCoarseData (Real time, Real dt, MF const& S_old, + Array const& RK_k); + + /** + * \brief Fill ghost cells of fine AMR level for RK3 and RK4 + * + * \param stage RK stage number starting from 1 + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param cbc filling physical boundary on coarse level + * \param fbc filling physical boundary on fine level + * \param bcs physical BC types + */ + template + void fillRK (int stage, int iteration, int ncycle, MF& mf, Real time, + BC& cbc, BC& fbc, Vector const& bcs); + private: BoxArray m_fba; @@ -165,8 +207,14 @@ private: int m_ncomp; InterpBase* m_interp; EB2::IndexSpace const* m_eb_index_space = nullptr; + MF m_sfine; + IntVect m_ratio; Vector>> m_cf_crse_data; + std::unique_ptr m_cf_crse_data_tmp; std::unique_ptr m_cf_fine_data; + Real m_dt_coarse = std::numeric_limits::lowest(); + + FabArrayBase::FPinfo const& getFPinfo (); }; template @@ -185,11 +233,17 @@ FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fd m_nghost(nghost), m_ncomp(ncomp), m_interp(interp), - m_eb_index_space(eb_index_space) + m_eb_index_space(eb_index_space), + m_sfine(fba, fdm, 1, nghost, MFInfo().SetAlloc(false)) { static_assert(IsFabArray::value, "FillPatcher: MF must be FabArray type"); AMREX_ALWAYS_ASSERT(m_fba.ixType().cellCentered() || m_fba.ixType().nodeCentered()); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); + } + AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),m_ratio)); } template @@ -217,6 +271,15 @@ FillPatcher::fill (MF& mf, IntVect const& nghost, Real time, m_fgeom, fbc, fbccomp); } +template +FabArrayBase::FPinfo const& +FillPatcher::getFPinfo () +{ + const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(m_ratio); + return FabArrayBase::TheFPinfo(m_sfine, m_sfine, m_nghost, coarsener, + m_fgeom, m_cgeom, m_eb_index_space); +} + template template void @@ -239,19 +302,7 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim m_ncomp >= ncomp && m_ncomp == cmf[0]->nComp()); - IntVect ratio; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); - } - AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),ratio)); - - const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(ratio); - const FabArrayBase::FPinfo& fpc = FabArrayBase::TheFPinfo(mf, mf, - m_nghost, - coarsener, - m_fgeom, - m_cgeom, - m_eb_index_space); + auto const& fpc = getFPinfo(); if ( ! fpc.ba_crse_patch.empty()) { @@ -294,7 +345,11 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim } else if (m_cf_crse_data.size() == 2) { - mf_crse_patch = make_mf_crse_patch(fpc, ncomp); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + mf_crse_patch = MF(*m_cf_crse_data_tmp, amrex::make_alias, scomp, ncomp); int const ng_space_interp = 8; // Need to be big enough Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); domain.convert(mf.ixType()); @@ -330,7 +385,7 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim ncomp, IntVect(0), m_cgeom, m_fgeom, amrex::grow(amrex::convert(m_fgeom.Domain(), mf.ixType()),nghost), - ratio, m_interp, bcs, bcscomp); + m_ratio, m_interp, bcs, bcscomp); post_interp(*m_cf_fine_data, scomp, ncomp); @@ -338,6 +393,194 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim } } +template +template +void FillPatcher::storeRKCoarseData (Real /*time*/, Real dt, MF const& S_old, + Array const& RK_k) +{ + m_dt_coarse = dt; + m_cf_crse_data.resize(order+1); + + auto const& fpc = getFPinfo(); + + for (auto& tmf : m_cf_crse_data) { + tmf.first = std::numeric_limits::lowest(); // because we dont' need it + tmf.second = std::make_unique(make_mf_crse_patch(fpc, m_ncomp)); + } + m_cf_crse_data[0].second->ParallelCopy(S_old, m_cgeom.periodicity()); + for (std::size_t i = 0; i < order; ++i) { + m_cf_crse_data[i+1].second->ParallelCopy(RK_k[i], m_cgeom.periodicity()); + } +} + +template +template +void FillPatcher::fillRK (int stage, int iteration, int ncycle, + MF& mf, Real time, BC& cbc, BC& fbc, + Vector const& bcs) +{ + int rk_order = m_cf_crse_data.size()-1; + if (rk_order != 3 && rk_order != 4) { + amrex::Abort("FillPatcher: unsupported RK order "+std::to_string(rk_order)); + return; + } + AMREX_ASSERT(stage > 0 && stage <= rk_order); + + auto const& fpc = getFPinfo(); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + auto const& u = m_cf_crse_data_tmp->arrays(); + auto const& u0 = m_cf_crse_data[0].second->const_arrays(); + auto const& k1 = m_cf_crse_data[1].second->const_arrays(); + auto const& k2 = m_cf_crse_data[2].second->const_arrays(); + auto const& k3 = m_cf_crse_data[3].second->const_arrays(); + + Real dtc = m_dt_coarse; + Real r = Real(1) / Real(ncycle); + Real xsi = Real(iteration-1) / Real(ncycle); + + if (rk_order == 3) { + // coefficients for U + Real b1 = xsi - Real(5./6.)*xsi*xsi; + Real b2 = Real(1./6.)*xsi*xsi; + Real b3 = Real(2./3)*xsi*xsi; + // coefficients for Ut + Real c1 = Real(1.) - Real(5./3.)*xsi; + Real c2 = Real(1./3.)*xsi; + Real c3 = Real(4./3.)*xsi; + // coefficients for Utt + constexpr Real d1 = Real(-5./3.); + constexpr Real d2 = Real(1./3.); + constexpr Real d3 = Real(4./3.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + r*ut); + }); + } else if (stage == 3) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + Real utt = d1*kk1 + d2*kk2 + d3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc* + (uu + Real(0.5)*r*ut + Real(0.25)*r*r*utt); + }); + } + } else if (rk_order == 4) { + auto const& k4 = m_cf_crse_data[4].second->const_arrays(); + Real xsi2 = xsi*xsi; + Real xsi3 = xsi2*xsi; + // coefficients for U + Real b1 = xsi - Real(1.5)*xsi2 + Real(2./3.)*xsi3; + Real b2 = xsi2 - Real(2./3.)*xsi3; + Real b3 = b2; + Real b4 = Real(-0.5)*xsi2 + Real(2./3.)*xsi3; + // coefficients for Ut + Real c1 = Real(1.) - Real(3.)*xsi + Real(2.)*xsi2; + Real c2 = Real(2.)*xsi - Real(2.)*xsi2; + Real c3 = c2; + Real c4 = -xsi + Real(2.)*xsi2; + // coefficients for Utt + Real d1 = Real(-3.) + Real(4.)*xsi; + Real d2 = Real( 2.) - Real(4.)*xsi; + Real d3 = d2; + Real d4 = Real(-1.) + Real(4.)*xsi; + // coefficients for Uttt + constexpr Real e1 = Real( 4.); + constexpr Real e2 = Real(-4.); + constexpr Real e3 = Real(-4.); + constexpr Real e4 = Real( 4.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + Real(0.5)*r*ut); + }); + } else if (stage == 3 || stage == 4) { + Real r2 = r*r; + Real r3 = r2*r; + Real at = (stage == 3) ? Real(0.5)*r : r; + Real att = (stage == 3) ? Real(0.25)*r2 : Real(0.5)*r2; + Real attt = (stage == 3) ? Real(0.0625)*r3 : Real(0.125)*r3; + Real akk = (stage == 3) ? Real(-4.) : Real(4.); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + Real utt = d1*kk1 + d2*kk2 + d3*kk3 + d4*kk4; + Real uttt = e1*kk1 + e2*kk2 + e3*kk3 + e4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc * + (uu + at*ut + att*utt + attt*(uttt+akk*(kk3-kk2))); + }); + } + } + Gpu::streamSynchronize(); + + cbc(*m_cf_crse_data_tmp, 0, m_ncomp, m_nghost, time, 0); + + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique(make_mf_fine_patch(fpc, m_ncomp)); + } + + FillPatchInterp(*m_cf_fine_data, 0, *m_cf_crse_data_tmp, 0, + m_ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),m_nghost), + m_ratio, m_interp, bcs, 0); + + // xxxxx We can optimize away this ParallelCopy by making a special fpinfo. + mf.ParallelCopy(*m_cf_fine_data, 0, 0, m_ncomp, IntVect(0), m_nghost); + + mf.FillBoundary(m_fgeom.periodicity()); + fbc(mf, 0, m_ncomp, m_nghost, time, 0); +} + } #endif diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H new file mode 100644 index 00000000000..b5e35f783c5 --- /dev/null +++ b/Src/Base/AMReX_RungeKutta.H @@ -0,0 +1,293 @@ +#ifndef AMREX_RUNGE_KUTTA_H_ +#define AMREX_RUNGE_KUTTA_H_ +#include + +#include + +namespace amrex { + +/** + * \brief Functions for Runge-Kutta methods + * + * This namespace RungeKutta has functions for a number RK methods, RK2, RK3 + * and RK4. Here, RK2 refers to the explicit trapezoid rule, RK3 refers to + * the SSPRK3 + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Third-order_Strong_Stability_Preserving_Runge-Kutta_(SSPRK3)), + * and RK4 is the classical fourth-order method + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Classic_fourth-order_method). + * The function templates take the old data in FabArray/MultiFab as input, + * and evolve the system for one time step. The result is stored in another + * FabArray/MultiFab. These two FabArrays must have ghost cells if they are + * needed for evaluating the right-hand side. The functions take three + * callable objects for computing the right-hand side, filling ghost cells, + * and optionally post-processing RK stage results. For RK3 and RK4, they + * also need a callable object for storing the data needed for filling + * coarse/fine boundaries in AMR simulations. + * + * The callable object for right-hand side has the signature of `void(int + * stage, MF& dudt, MF const& u, Real t, Real dt)`, where `stage` is the RK + * stage number starting from 1, `dudt` is the output, `u` is the input, `t` + * is the first-order approximate time of the stage, and `dt` is the + * sub-time step, which can be used for reflux operations in AMR + * simulations. + * + * The callable object for filling ghost cells has the signature of + * `void(int stage, MF& u, Real t)`, where `stage` is the RK stage number + * starting from 1, `u` is a FabArray/MultiFab whose ghost cells need to be + * filled, and `t` is the first-order approximate time of the data at that + * stage. The FillPatcher class can be useful for implementing such a + * callable. See AmrLevel::RK for an example. + * + * The callable object for post-processing stage results is optional. It's + * no-op by default. Its function signature is `void(int stage, MF& u)`, + * where `stage` is the RK stage number and `u` is the result of that stage. + * + * For RK3 and RK4, one must also provide a callable object with the + * signature of `void(Array const& rkk)`, where `order` is the RK + * order and `rkk` contains the right-hand side at all the RK stages. The + * FillPatcher class can be useful for implementing such a callable. See + * AmrLevel::RK for an example. + */ +namespace RungeKutta { + +struct PostStageNoOp { + template + std::enable_if_t::value> operator() (int, MF&) const {} +}; + +namespace detail { +//! Unew = Uold + dUdt * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*sdot[bi](i,j,k,n); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (dUdt1 + dUdt2) * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt1, MF const& dUdt2, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot1 = dUdt1.const_arrays(); + auto const& sdot2 = dUdt2.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*(sdot1[bi](i,j,k,n) + + sdot2[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = (Uold+Unew)/2 + dUdt * dt/2 +template +void rk2_update_2 (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = Real(0.5)*(snew[bi](i,j,k,n) + + sold[bi](i,j,k,n) + + sdot[bi](i,j,k,n) * dt); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1 + k2 + 4*k3) * dt6, where dt6 = dt/6 +template +void rk3_update_3 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * (k1[bi](i,j,k,n) + k2[bi](i,j,k,n) + + Real(4.) * k3[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1+k4+2*(k2+k3))*dt6, where dt6 = dt/6 +template +void rk4_update_4 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + auto const& k4 = rkk[3].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * ( k1[bi](i,j,k,n) + k4[bi](i,j,k,n) + + Real(2.)*(k2[bi](i,j,k,n) + k3[bi](i,j,k,n))); + }); + Gpu::streamSynchronize(); +} +} + +/** + * \brief Time stepping with RK2 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param post_stage post-processing stage results + */ +template +void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta2"); + + MF dUdt(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + + // RK2 stage 1 + fillbndry(1, Uold, time); + frhs(1, dUdt, Uold, time, Real(0.5)*dt); + // Unew = Uold + dt * dUdt + detail::rk_update(Unew, Uold, dUdt, dt); + post_stage(1, Unew); + + // RK2 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, dUdt, Unew, time, Real(0.5)*dt); + // Unew = (Uold+Unew)/2 + dUdt_2 * dt/2, + // which is Unew = Uold + dt/2 * (dUdt_1 + dUdt_2) + detail::rk2_update_2(Unew, Uold, dUdt, dt); + post_stage(2, Unew); +} + +/** + * \brief Time stepping with RK3 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta3"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK3 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt + detail::rk_update(Unew, Uold, rkk[0], dt); + post_stage(1, Unew); + + // RK3 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, rkk[1], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1+k2) * dt/4 + detail::rk_update(Unew, Uold, rkk[0], rkk[1], Real(0.25)*dt); + post_stage(2, Unew); + + // RK3 stage 3 + Real t_half = time + Real(0.5)*dt; + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt*Real(2./3.)); + // Unew = Uold + (k1/6 + k2/6 + k3*(2/3)) * dt + detail::rk3_update_3(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(3, Unew); + + store_crse_data(rkk); +} + +/** + * \brief Time stepping with RK4 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta4"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK4 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt/2 + detail::rk_update(Unew, Uold, rkk[0], Real(0.5)*dt); + post_stage(1, Unew); + + // RK4 stage 2 + Real t_half = time + Real(0.5)*dt; + fillbndry(2, Unew, t_half); + frhs(2, rkk[1], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k2 * dt/2 + detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); + post_stage(2, Unew); + + // RK4 stage 3 + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k3 * dt; + detail::rk_update(Unew, Uold, rkk[2], dt); + post_stage(3, Unew); + + // RK4 stage 4 + fillbndry(4, Unew, time+dt); + frhs(4, rkk[3], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt + detail::rk4_update_4(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(4, Unew); + + store_crse_data(rkk); +} + +}} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index c47fdcae706..f09897ff6f7 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -188,6 +188,7 @@ target_sources( amrex AMReX_IntegratorBase.H AMReX_RKIntegrator.H AMReX_TimeIntegrator.H + AMReX_RungeKutta.H # GPU -------------------------------------------------------------------- AMReX_Gpu.H AMReX_GpuQualifiers.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index 79085ae70a1..cd15687dce1 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -203,7 +203,7 @@ C$(AMREX_BASE)_headers += AMReX_FEIntegrator.H C$(AMREX_BASE)_headers += AMReX_IntegratorBase.H C$(AMREX_BASE)_headers += AMReX_RKIntegrator.H C$(AMREX_BASE)_headers += AMReX_TimeIntegrator.H - +C$(AMREX_BASE)_headers += AMReX_RungeKutta.H # # Fortran interface routines. diff --git a/Tests/GPU/CNS/Source/CNS.H b/Tests/GPU/CNS/Source/CNS.H index 877f0b523da..eedb7d486ba 100644 --- a/Tests/GPU/CNS/Source/CNS.H +++ b/Tests/GPU/CNS/Source/CNS.H @@ -157,6 +157,8 @@ protected: static int do_reflux; + static int rk_order; + static bool do_visc; static bool use_const_visc; diff --git a/Tests/GPU/CNS/Source/CNS.cpp b/Tests/GPU/CNS/Source/CNS.cpp index c3b5e2fb600..1a073c68c8a 100644 --- a/Tests/GPU/CNS/Source/CNS.cpp +++ b/Tests/GPU/CNS/Source/CNS.cpp @@ -19,6 +19,7 @@ int CNS::verbose = 0; IntVect CNS::hydro_tile_size {AMREX_D_DECL(1024,16,16)}; Real CNS::cfl = 0.3; int CNS::do_reflux = 1; +int CNS::rk_order = 2; int CNS::refine_max_dengrad_lev = -1; Real CNS::refine_dengrad = 1.0e10; @@ -241,6 +242,9 @@ CNS::post_timestep (int /*iteration*/) if (level < parent->finestLevel()) { avgDown(); + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); } } @@ -354,6 +358,7 @@ CNS::read_params () } pp.query("do_reflux", do_reflux); + pp.query("rk_order", rk_order); pp.query("do_visc", do_visc); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index c086cac0e9f..99749dded19 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -7,7 +7,7 @@ using namespace amrex; Real -CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) +CNS::advance (Real time, Real dt, int iteration, int ncycle) { BL_PROFILE("CNS::advance()"); @@ -16,11 +16,6 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) state[i].swapTimeLevels(dt); } - MultiFab& S_new = get_new_data(State_Type); - MultiFab& S_old = get_old_data(State_Type); - MultiFab dSdt(grids,dmap,NUM_STATE,0,MFInfo(),Factory()); - MultiFab Sborder(grids,dmap,NUM_STATE,NUM_GROW,MFInfo(),Factory()); - FluxRegister* fr_as_crse = nullptr; if (do_reflux && level < parent->finestLevel()) { CNS& fine_level = getLevel(level+1); @@ -36,23 +31,14 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) fr_as_crse->setVal(Real(0.0)); } - // RK2 stage 1 - FillPatch(*this, Sborder, NUM_GROW, time, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // U^* = U^n + dt*dUdt^n - MultiFab::LinComb(S_new, Real(1.0), Sborder, 0, dt, dSdt, 0, 0, NUM_STATE, 0); - computeTemp(S_new,0); - - // RK2 stage 2 - // After fillpatch Sborder = U^n+dt*dUdt^n - FillPatch(*this, Sborder, NUM_GROW, time+dt, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // S_new = 0.5*(Sborder+S_old) = U^n + 0.5*dt*dUdt^n - MultiFab::LinComb(S_new, Real(0.5), Sborder, 0, Real(0.5), S_old, 0, 0, NUM_STATE, 0); - // S_new += 0.5*dt*dSdt - MultiFab::Saxpy(S_new, Real(0.5)*dt, dSdt, 0, 0, NUM_STATE, 0); - // We now have S_new = U^{n+1} = (U^n+0.5*dt*dUdt^n) + 0.5*dt*dUdt^* - computeTemp(S_new,0); + RK(rk_order, State_Type, time, dt, iteration, ncycle, + // Given state S, compute dSdt. dtsub is needed for flux register operations + [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, + Real /*t*/, Real dtsub) { + compute_dSdt(S, dSdt, dtsub, fr_as_crse, fr_as_fine); + }, + // Optional. In case if there is anything needed after each RK substep. + [&] (int /*stage*/, MultiFab& S) { computeTemp(S,0); }); return dt; } @@ -254,5 +240,3 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, } } } - - diff --git a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H index b9bf5a18f78..75f4f784fad 100644 --- a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H +++ b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H @@ -17,24 +17,24 @@ cns_diffcoef (int i, int j, int k, { using amrex::Real; - coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); - coefs(i,j,k,CXI) = Real(0.0); - coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; + coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); + coefs(i,j,k,CXI) = Real(0.0); + coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; } AMREX_GPU_DEVICE inline void cns_constcoef (int i, int j, int k, - amrex::Array4 const& q, + amrex::Array4 const& /*q*/, amrex::Array4 const& coefs, Parm const& parm) noexcept { using amrex::Real; - coefs(i,j,k,CETA) = parm.const_visc_mu; - coefs(i,j,k,CXI) = parm.const_visc_ki; - coefs(i,j,k,CLAM) = parm.const_lambda; + coefs(i,j,k,CETA) = parm.const_visc_mu; + coefs(i,j,k,CXI) = parm.const_visc_ki; + coefs(i,j,k,CLAM) = parm.const_lambda; } AMREX_GPU_DEVICE @@ -45,7 +45,7 @@ cns_diff_x (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fx, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -81,7 +81,7 @@ cns_diff_y (int i, int j, int k, amrex::Array4 const& q, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fy, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -119,7 +119,7 @@ cns_diff_z (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fz, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; From 9a3cd5d985ad357ab78d8f06f397cfc741448fdc Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 14 Oct 2022 17:27:41 -0700 Subject: [PATCH 098/111] CMake Docs: Fix User-Guidance (Link) (#2990) Update the user-guidance on CMake dependency linking to CMake 3.0+ (anno. 2014+). Seen in #2978 --- Docs/sphinx_documentation/source/BuildingAMReX.rst | 4 ++-- Docs/sphinx_documentation/source/GPU.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index 7b3273bf874..8a377377700 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -584,7 +584,7 @@ the following line in the appropriate CMakeLists.txt file: :: - target_link_libraries( AMReX:: ) + target_link_libraries( PUBLIC AMReX:: ) In the above snippet, ```` is any of the targets listed in the table below. @@ -709,7 +709,7 @@ As an example, consider the following CMake code: :: find_package(AMReX REQUIRED 3D EB) - target_link_libraries( Foo AMReX::amrex AMReX::Flags_CXX ) + target_link_libraries( Foo PUBLIC AMReX::amrex ) The code in the snippet above checks whether an AMReX installation with 3D and Embedded Boundary support is available on the system. If so, AMReX is linked to target ``Foo`` and AMReX flags preset is used diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 5e6e153f6e0..4984b839132 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -315,7 +315,7 @@ we provide the helper function ``setup_target_for_cuda_compilation()``: setup_target_for_cuda_compilation(my_target) # Link against amrex - target_link_libraries(my_target AMReX::amrex) + target_link_libraries(my_target PUBLIC AMReX::amrex) From bcbf17f1cee4cd3209552cd0cafb2558c9254f20 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Oct 2022 19:48:14 -0700 Subject: [PATCH 099/111] 2D RZ solver for WarpX: Arbitrary coefficient (#2986) The assumption in the 2D RZ solver for WarpX used to be there was no sigma_r (i.e., sigma_r == 1). In this PR, we allow arbitrary sigma_r coefficient. --- .../MLMG/AMReX_MLEBNodeFDLap_2D_K.H | 44 +++++++++---------- .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 4 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 17 ++++--- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H index 1b490726405..08439f9f99b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H @@ -200,7 +200,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - F && xeb, Real dr, Real dz, Real rlo) noexcept + F && xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -211,11 +211,11 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - out = Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + out = Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - out = Real(4.0) * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); + out = Real(4.0) * sigr * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); scale = hp; } } else { @@ -235,7 +235,7 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, tmp += (xeb(i-1,j,k) - x(i,j,k)) / hm * (r - Real(0.5) * hp * dr); } - out = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); + out = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -266,29 +266,29 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real xeb, Real dr, Real dz, Real rlo) noexcept + Real xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int, int, int) -> Real { return xeb; }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Array4 const& xeb, Real dr, Real dz, Real rlo) noexcept + Array4 const& xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int i1, int i2, int i3) -> Real { return xeb(i1,i2,i3); }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, - Real dr, Real dz, Real rlo) noexcept + Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -296,11 +296,11 @@ void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Real Ax = (x(i,j-1,k) - Real(2.0)*x(i,j,k) + x(i,j+1,k)) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + Ax += Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + Ax += sigr * (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); } y(i,j,k) = Ax; } @@ -310,7 +310,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -322,12 +322,12 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - Ax = (Real(4.0) / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma = -(Real(4.0) / (dr*dr)); + Ax = (Real(4.0) * sigr / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma = -(Real(4.0) * sigr / (dr*dr)); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - gamma = -(Real(4.0) / (dr*dr*hp*hp)); + gamma = -(Real(4.0) * sigr / (dr*dr*hp*hp)); Ax = gamma * x(i,j,k); scale = hp; } @@ -352,8 +352,8 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, tmp0 += Real(-1.0) / hm * (r - Real(0.5) * hp * dr); } - Ax = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); - gamma = tmp0 * Real(2.0) / ((hp+hm) * r * dr * dr); + Ax = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); + gamma = tmp0 * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -390,7 +390,7 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -400,13 +400,13 @@ void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Real gamma = -Real(2.0) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += (Real(4.0)/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma += -(Real(4.0)/(dr*dr)); + Ax += (Real(4.0)*sigr/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma += -(Real(4.0)*sigr/(dr*dr)); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); - gamma += -(rp+rm) / (r*dr*dr); + Ax += sigr*(rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + gamma += -sigr*(rp+rm) / (r*dr*dr); } constexpr Real omega = Real(1.25); x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / gamma); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 1c074ff115b..404aefc8c0b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -19,8 +19,8 @@ namespace amrex { // with only diagonal components. The EB is assumed to be Dirichlet. // // del dot (simga grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a -// scalar constant that is zero by default. sigma is non-zero in -// z-direction only. For now the `alpha` term has not been implemented yet. +// scalar constant that is zero by default. For now the `alpha` term has +// not been implemented yet class MLEBNodeFDLaplacian : public MLNodeLinOp diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index fe32603e995..920e8540200 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -310,8 +310,9 @@ MLEBNodeFDLaplacian::prepareForSolve () AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_lobc[0][0] == BCType::Neumann, "The lo-x BC must be Neumann for 2d RZ"); } - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_sigma[0] == 0._rt, - "r-direction sigma must be zero"); + if (m_sigma[0] == 0._rt) { + m_sigma[0] = 1._rt; // For backward compatibility + } } #endif } @@ -356,6 +357,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -396,7 +398,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phiebarr, dx0, dx1, xlo); + phiebarr, sig0, dx0, dx1, xlo); }); } else #endif @@ -413,7 +415,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phieb, dx0, dx1, xlo); + phieb, sig0, dx0, dx1, xlo); }); } else #endif @@ -432,7 +434,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa if (m_rz) { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { - mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,dx0,dx1,xlo); + mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,sig0,dx0,dx1,xlo); }); } else #endif @@ -453,6 +455,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -495,7 +498,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz_eb(i,j,k,solarr,rhsarr,dmskarr,ecx,ecy, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif @@ -514,7 +517,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz(i,j,k,solarr,rhsarr,dmskarr, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif From 56b6402d238979fca6e7c57fdc644a54c4cf6fce Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 15 Oct 2022 14:59:38 -0700 Subject: [PATCH 100/111] ParallelFor with compile time optimization of kernels with run time parameters (#2954) Branches inside ParallelFor can be very expensive. If a branch uses a lot of resources (e.g., registers), it can significantly affect the performance even if at run time the branch is never executed because it affects the GPU occupancy. For CPUs, it can affect vectorization of the kernel. The new ParallelFor functions use C++17 fold expression to generate kernel launches for all run time variants. Only one will be executed. Which one is chosen at run time depends the run time parameters. The kernel function can use constexpr if to discard unused code blocks for better run time performance. Here are two examples of how to use them. int runtime_option = ...; enum All_options : int { A0, A1, A2, A3}; // Four ParallelFors will be generated. ParallelFor(TypeList>{}, {runtime_option}, box, [=] AMREX_GPU_DEVICE (int i, int j, int k, auto control) { ... if constexpr (control.value == A0) { ... } else if constexpr (control.value == A1) { ... } else if constexpr (control.value == A2) { ... else { ... } ... }); and int A_runtime_option = ...; int B_runtime_option = ...; enum A_options : int { A0, A1, A2, A3}; enum B_options : int { B0, B1 }; // 4*2=8 ParallelFors will be generated. ParallelFor(TypeList, CompileTimeOptions > {}, {A_runtime_option, B_runtime_option}, N, [=] AMREX_GPU_DEVICE (int i, auto A_control, auto B_control) { ... if constexpr (A_control.value == A0) { ... } else if constexpr (A_control.value == A1) { ... } else if constexpr (A_control.value == A2) { ... else { ... } if constexpr (A_control.value != A3 && B_control.value == B1) { ... } ... }); Note that that due to a limitation of CUDA's extended device lambda, the constexpr if block cannot be the one that captures a variable first. If nvcc complains about it, you will have to manually capture it outside constexpr if. The data type for the parameters is int. Thank Maikel Nadolski and Alex Sinn for showing us the meta-programming techniques used here. --- Src/Base/AMReX_CTOParallelForImpl.H | 331 ++++++++++++++++++++++++++++ Src/Base/AMReX_GpuLaunch.H | 2 + Src/Base/CMakeLists.txt | 1 + Src/Base/Make.package | 1 + Tests/CMakeLists.txt | 2 +- Tests/CTOParFor/CMakeLists.txt | 7 + Tests/CTOParFor/GNUmakefile | 20 ++ Tests/CTOParFor/Make.package | 4 + Tests/CTOParFor/main.cpp | 64 ++++++ 9 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 Src/Base/AMReX_CTOParallelForImpl.H create mode 100644 Tests/CTOParFor/CMakeLists.txt create mode 100644 Tests/CTOParFor/GNUmakefile create mode 100644 Tests/CTOParFor/Make.package create mode 100644 Tests/CTOParFor/main.cpp diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H new file mode 100644 index 00000000000..f4dd41ca0c8 --- /dev/null +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -0,0 +1,331 @@ +#ifndef AMREX_CTO_PARALLEL_FOR_H_ +#define AMREX_CTO_PARALLEL_FOR_H_ + +#include +#include +#include + +#include +#include + +/* This header is not for the users to include directly. It's meant to be + * included in AMReX_GpuLaunch.H, which has included the headers needed + * here. */ + +/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */ + +namespace amrex { + +template +struct CompileTimeOptions { + // TypeList is defined in AMReX_Tuple.H + using list_type = TypeList...>; +}; + +#if (__cplusplus >= 201703L) + +namespace meta +{ + template + constexpr auto operator+ (TypeList, TypeList) { + return TypeList{}; + } + + template + constexpr auto single_product (TypeList, A) { + return TypeList{})...>{}; + } + + template + constexpr auto operator* (LLs, TypeList) { + return (TypeList<>{} + ... + single_product(LLs{}, As{})); + } + + template + constexpr auto cartesian_product_n (TypeList) { + return (TypeList>{} * ... * Ls{}); + } +} + +namespace detail +{ + template + std::enable_if_t::value || std::is_same::value, bool> + ParallelFor_helper2 (T const& N, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + if constexpr (std::is_integral::value) { + ParallelFor(N, [f] AMREX_GPU_DEVICE (T i) noexcept + { + f(i, As{}...); + }); + } else { + ParallelFor(N, [f] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + f(i, j, k, As{}...); + }); + } + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value, bool> + ParallelFor_helper2 (Box const& box, T ncomp, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + ParallelFor(box, ncomp, [f] AMREX_GPU_DEVICE (int i, int j, int k, T n) noexcept + { + f(i, j, k, n, As{}...); + }); + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value || std::is_same::value> + ParallelFor_helper1 (T const& N, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(N, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } + + template + std::enable_if_t::value> + ParallelFor_helper1 (Box const& box, T ncomp, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(box, ncomp, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } +} + +#endif + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + T N, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(N, std::forward(f), + meta::cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(N, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +void ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, std::forward(f), + meta::cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, T ncomp, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, ncomp, std::forward(f), + meta::cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, ncomp, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + N, [=] AMREX_GPU_DEVICE (int i, auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param N an interger specifying the 1D for loop's range. + * \param f a callable object taking an integer and working on that iteration. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + T N, F&& f) +{ + ParallelFor(ctos, option, N, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the 3D for loop's range. + * \param f a callable object taking three integers and working on the given cell. + */ +template +void ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, F&& f) +{ + ParallelFor(ctos, option, box, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the iteration in 3D space. + * \param ncomp an integer specifying the range for iteration over components. + * \param f a callable object taking three integers and working on the given cell. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, T ncomp, F&& f) +{ + ParallelFor(ctos, option, box, ncomp, std::forward(f)); +} + +} + +#endif diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index a91cf45297d..7e877140629 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -550,4 +550,6 @@ namespace Gpu { #endif +#include + #endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index f09897ff6f7..c1212cf37fe 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -224,6 +224,7 @@ target_sources( amrex AMReX_MFParallelForC.H AMReX_MFParallelForG.H AMReX_TagParallelFor.H + AMReX_CTOParallelForImpl.H AMReX_ParReduce.H # CUDA -------------------------------------------------------------------- AMReX_CudaGraph.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index cd15687dce1..1fdca2587d7 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -100,6 +100,7 @@ C$(AMREX_BASE)_headers += AMReX_MFParallelForC.H C$(AMREX_BASE)_headers += AMReX_MFParallelForG.H C$(AMREX_BASE)_headers += AMReX_TagParallelFor.H +C$(AMREX_BASE)_headers += AMReX_CTOParallelForImpl.H C$(AMREX_BASE)_headers += AMReX_ParReduce.H diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 50cc2bb8cb2..8d318f918b8 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -1,7 +1,7 @@ # # List of subdirectories to search for CMakeLists. # -set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser) +set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser CTOParFor) if (AMReX_PARTICLES) list(APPEND AMREX_TESTS_SUBDIRS Particles) diff --git a/Tests/CTOParFor/CMakeLists.txt b/Tests/CTOParFor/CMakeLists.txt new file mode 100644 index 00000000000..57c1e7715e2 --- /dev/null +++ b/Tests/CTOParFor/CMakeLists.txt @@ -0,0 +1,7 @@ +set(_sources main.cpp) +set(_input_files) + +setup_test(_sources _input_files) + +unset(_sources) +unset(_input_files) diff --git a/Tests/CTOParFor/GNUmakefile b/Tests/CTOParFor/GNUmakefile new file mode 100644 index 00000000000..0dbc65578af --- /dev/null +++ b/Tests/CTOParFor/GNUmakefile @@ -0,0 +1,20 @@ +AMREX_HOME = ../../ + +DEBUG = FALSE +DIM = 3 +COMP = gcc + +USE_MPI = FALSE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = FALSE + +CXXSTD = c++17 + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/CTOParFor/Make.package b/Tests/CTOParFor/Make.package new file mode 100644 index 00000000000..4497b0e25b9 --- /dev/null +++ b/Tests/CTOParFor/Make.package @@ -0,0 +1,4 @@ +CEXE_sources += main.cpp + + + diff --git a/Tests/CTOParFor/main.cpp b/Tests/CTOParFor/main.cpp new file mode 100644 index 00000000000..0cf1d7ea35a --- /dev/null +++ b/Tests/CTOParFor/main.cpp @@ -0,0 +1,64 @@ +#include +#include + +using namespace amrex; + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); +#if (__cplusplus >= 201703L) + { + enum A_options: int { + A0 = 0, A1 + }; + + enum B_options: int { + B0 = 0, B1, B2 + }; + + Box box(IntVect(0),IntVect(7)); + IArrayBox fab(box,2); + fab.setVal(-10); + + auto const& arr = fab.array(); + + for (int ia = 0; ia < 2; ++ia) { + for (int ib = 0; ib < 3; ++ib) { + ParallelFor(TypeList, + CompileTimeOptions>{}, + {ia, ib}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, + auto B_control) + { + auto const& larr = arr; + int a, b; + if constexpr (A_control.value == 0) { + a = 0; + } else if constexpr (A_control.value == 1) { + a = 1; + } else { + a = -1; + } + if constexpr (B_control.value == 0) { + b = 0; + } else if constexpr (B_control.value == 1) { + b = 1; + } else if constexpr (B_control.value == 2) { + b = 2; + } else if constexpr (B_control.value == 3) { + b = 3; + } + larr(i,j,k) = a*10 + b; + }); + + auto s = fab.sum(0); + AMREX_ALWAYS_ASSERT(s == box.numPts()*(ia*10+ib)); + } + } + } +#else + amrex::Print() << "This test requires C++17." << std::endl; +#endif + amrex::Finalize(); +} From 9502b99cd98cc1aa70e5f19804c50252438ec1a6 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 18 Oct 2022 10:20:06 -0700 Subject: [PATCH 101/111] Add BCRec::set for convenience (#2993) --- Src/Base/AMReX_BCRec.H | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H index 1980c727e81..d76760df9d9 100644 --- a/Src/Base/AMReX_BCRec.H +++ b/Src/Base/AMReX_BCRec.H @@ -74,6 +74,17 @@ public: AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void setHi (int dir, int bc_val) noexcept { bc[AMREX_SPACEDIM+dir] = bc_val; } /** + * \brief Explicitly set bndry value for given face. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void set (Orientation face, int bc_val) noexcept { + if (face.isLow()) { + setLo(face.coordDir(), bc_val); + } else { + setHi(face.coordDir(), bc_val); + } + } + /** * \brief Return bndry values (used in calls to FORTRAN). */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE From 0b88bfd3718ab226fc4a03a6598d320976744346 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 19 Oct 2022 13:39:18 -0700 Subject: [PATCH 102/111] Add user defined BC types (#2995) Add BCType::user_1, BCType::user_2 and BCType::user_3. Previously the only "user" type is ext_dir (external Dirichlet). The BC types are passed from the user's code to FillPatch, which in turn passes them back to the user provided BC filling function. These new types will make it easy for the user to determine the user defined BC types in their BC filling functions. --- Docs/sphinx_documentation/source/Basics.rst | 8 ++++++-- Docs/sphinx_documentation/source/LinearSolvers.rst | 5 ++--- Src/Base/AMReX_BC_TYPES.H | 6 ++++-- Src/Base/AMReX_bc_types_mod.F90 | 3 +++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index dd4e53d455e..dc3022f7e12 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -2549,7 +2549,11 @@ The basic idea behind physical boundary conditions is as follows: Reflection from interior cells with sign changed, :math:`q(-i) = -q(i)`. -- For external Dirichlet boundaries, the user needs to provide a + user_1, user_2 and user_3 + "User". It is the user's responsibility to write a routine + to fill ghost cells (more details below). + +- For external Dirichlet and user boundaries, the user needs to provide a callable object like below. .. highlight:: c++ @@ -2564,7 +2568,7 @@ The basic idea behind physical boundary conditions is as follows: const BCRec* bcr, const int bcomp, const int orig_comp) const { - // external Dirichlet for cell iv + // external Dirichlet or user BC for cell iv } }; diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index a1df8760f3a..d893859e7c2 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -209,8 +209,8 @@ function :: - void setDomainBC (const Array& lobc, // for lower ends - const Array& hibc); // for higher ends + void setDomainBC (const Array& lobc, // for lower ends + const Array& hibc); // for higher ends The supported BC types at the physical domain boundaries are @@ -761,4 +761,3 @@ An example (implemented in the ``MultiComponent`` tutorial) might be: See ``amrex-tutorials/ExampleCodes/LinearSolvers/MultiComponent`` for a complete working example. .. solver reuse - diff --git a/Src/Base/AMReX_BC_TYPES.H b/Src/Base/AMReX_BC_TYPES.H index ea24a64addf..b735da6fddb 100644 --- a/Src/Base/AMReX_BC_TYPES.H +++ b/Src/Base/AMReX_BC_TYPES.H @@ -73,7 +73,10 @@ enum mathematicalBndryTypes : int { foextrap = 2, ext_dir = 3, hoextrap = 4, - hoextrapcc = 5 + hoextrapcc = 5, + user_1 = 1001, + user_2 = 1002, + user_3 = 1003 }; } @@ -102,4 +105,3 @@ enum mathematicalBndryTypes : int { #endif #endif - diff --git a/Src/Base/AMReX_bc_types_mod.F90 b/Src/Base/AMReX_bc_types_mod.F90 index c326d49e419..c1c6f237ba8 100644 --- a/Src/Base/AMReX_bc_types_mod.F90 +++ b/Src/Base/AMReX_bc_types_mod.F90 @@ -15,6 +15,9 @@ module amrex_bc_types_module integer, parameter, public :: amrex_bc_ext_dir = 3 integer, parameter, public :: amrex_bc_hoextrap = 4 integer, parameter, public :: amrex_bc_hoextrapcc = 5 + integer, parameter, public :: amrex_bc_user_1 = 1001 + integer, parameter, public :: amrex_bc_user_2 = 1002 + integer, parameter, public :: amrex_bc_user_3 = 1003 integer, parameter, public :: amrex_pbc_interior = 0 integer, parameter, public :: amrex_pbc_inflow = 1 From 3082028e42870b1ed37f0d26160ef078580511e3 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 19 Oct 2022 19:24:10 -0700 Subject: [PATCH 103/111] Update GitHub Actions (#2996) https://github.blog/changelog/2022-09-22-github-actions-all-actions-will-begin-running-on-node16-instead-of-node12/ ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- .github/workflows/clang.yml | 6 +++--- .github/workflows/cuda.yml | 8 ++++---- .github/workflows/docs.yml | 2 +- .github/workflows/gcc.yml | 28 ++++++++++++++-------------- .github/workflows/hip.yml | 6 +++--- .github/workflows/intel.yml | 4 ++-- .github/workflows/macos.yml | 4 ++-- .github/workflows/sensei.yml | 2 +- .github/workflows/style.yml | 4 ++-- .github/workflows/windows.yml | 6 +++--- 10 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index 79bbf1947b7..773126bca38 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wnon-virtual-dtor"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_clang6.sh - name: Build & Install @@ -53,7 +53,7 @@ jobs: env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1 -Wnon-virtual-dtor"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_clang6.sh - name: Build & Install @@ -83,7 +83,7 @@ jobs: name: Clang@6.0 NOMPI Release [configure 2D] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_clang6.sh - name: Build & Install diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 6e080d8a848..c91bbecd48e 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc10.sh - name: Build & Install @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install @@ -78,7 +78,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wshadow"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvhpc21-11.sh - name: Build & Install @@ -119,7 +119,7 @@ jobs: name: CUDA@11.2 GNU@9.3.0 [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c0d50aa99e1..82e387cbff4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2.3.1 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly. + uses: actions/checkout@v3 with: persist-credentials: false diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 5ee581b4fef..8f1434d2ea3 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -46,7 +46,7 @@ jobs: env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -69,7 +69,7 @@ jobs: env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -93,7 +93,7 @@ jobs: # -Werror temporarily skipped until we have functional testing established # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -116,7 +116,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_gcc10.sh - name: Build & Install @@ -149,7 +149,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -178,7 +178,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nofortran.sh - name: Build & Install @@ -206,7 +206,7 @@ jobs: name: GNU@7.5 Release [configure 1D] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -220,7 +220,7 @@ jobs: name: GNU@7.5 Release [configure 3D] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -234,7 +234,7 @@ jobs: name: GNU@7.5 Release [configure 3D] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -248,7 +248,7 @@ jobs: name: GNU@7.5 OMP Debug [configure 3D] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -262,7 +262,7 @@ jobs: name: GNU Plotfile Tools [tools] runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -276,7 +276,7 @@ jobs: runs-on: ubuntu-18.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -301,7 +301,7 @@ jobs: CXX: h5pcc CC: h5cc steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index a487d27bf9c..d542fb603a2 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -22,7 +22,7 @@ jobs: # #define select_impl_(_1, _2, impl_, ...) impl_ env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -69,7 +69,7 @@ jobs: # #define select_impl_(_1, _2, impl_, ...) impl_ env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -104,7 +104,7 @@ jobs: name: HIP EB [configure 2D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 80ae98cd2f1..6e7d87a299e 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -13,7 +13,7 @@ jobs: # mkl/rng/device/detail/mrg32k3a_impl.hpp has a number of sign-compare error env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-sign-compare"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_dpcpp.sh - name: Build & Install @@ -41,7 +41,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: install dependencies run: | export DEBIAN_FRONTEND=noninteractive diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 67db29cdcd8..be5a1e738ca 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -17,7 +17,7 @@ jobs: CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install @@ -42,7 +42,7 @@ jobs: CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install diff --git a/.github/workflows/sensei.yml b/.github/workflows/sensei.yml index f551f46bec0..163456a924a 100644 --- a/.github/workflows/sensei.yml +++ b/.github/workflows/sensei.yml @@ -21,7 +21,7 @@ jobs: container: image: senseiinsitu/ci:fedora35-amrex-20220613 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup run: mkdir build - name: Configure diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index b459865f587..9c32554218d 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -10,13 +10,13 @@ jobs: tabs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Tabs run: .github/workflows/style/check_tabs.sh trailing_whitespaces: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Trailing Whitespaces run: .github/workflows/style/check_trailing_whitespaces.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b066ba6c98c..fba862d26dd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,7 +12,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -31,7 +31,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI static runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -49,7 +49,7 @@ jobs: name: Clang C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: seanmiddleditch/gha-setup-ninja@master - name: Build & Install shell: cmd From b3e0a62ba4d8c66b7cc40ab439b94835a5f4247c Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 26 Oct 2022 15:02:13 -0700 Subject: [PATCH 104/111] Pre- and Post-interpolation hook interface (#2991) Support both Fab and MultiFab versions of pre- and post-interpolation hooks. Because the pre-interp hook might modify the data, we need to make a copy to avoid modifying cached coarse data. Close #2989. --- Src/AmrCore/AMReX_FillPatchUtil_I.H | 69 +++++++++++++---------------- Src/AmrCore/AMReX_FillPatcher.H | 31 +++++++------ 2 files changed, 45 insertions(+), 55 deletions(-) diff --git a/Src/AmrCore/AMReX_FillPatchUtil_I.H b/Src/AmrCore/AMReX_FillPatchUtil_I.H index 8d8f210a0fe..3e94abfad27 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil_I.H +++ b/Src/AmrCore/AMReX_FillPatchUtil_I.H @@ -4,6 +4,31 @@ namespace amrex { +namespace detail { + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf[0],Box(),icomp,ncomp)) +{ +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf); mfi.isValid(); ++mfi) { + auto& dfab = mf[mfi]; + const Box& dbx = dfab.box(); + f(dfab, dbx, icomp, ncomp); + } +} + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf,icomp,ncomp)) +{ + f(mf, icomp, ncomp); +} + +} + template bool ProperlyNested (const IntVect& ratio, const IntVect& blocking_factor, int ngrow, const IndexType& boxType, Interp* mapper) @@ -459,9 +484,6 @@ namespace { if ( ! fpc.ba_crse_patch.empty()) { - - using FAB = typename MF::FABType::value_type; - MF mf_crse_patch = make_mf_crse_patch (fpc, ncomp, mf.boxArray().ixType()); // Must make sure fine exists under needed coarse faces. // It stores values for the final (interior) interpolation, @@ -491,20 +513,12 @@ namespace { solve_mask.setVal(1); // Values to solve. solve_mask.setVal(0, mask_cpc, 0, 1); // Known values. - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); InterpFace(mapper, mf_crse_patch, 0, mf_refined_patch, 0, ncomp, ratio, solve_mask, cgeom, fgeom, bcscomp, RunOn::Gpu, bcs); - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& dfab = mf_refined_patch[mfi]; - post_interp(dfab, dfab.box(), 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_refined_patch, 0, ncomp); bool aliasing = false; for (auto const& fmf_a : fmf) { @@ -538,30 +552,14 @@ namespace { MF mf_fine_patch = make_mf_fine_patch(fpc, ncomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - auto& sfab = mf_crse_patch[mfi]; - const Box& sbx = sfab.box(); - pre_interp(sfab, sbx, 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf_fine_patch, 0, mf_crse_patch, 0, ncomp, IntVect(0), cgeom, fgeom, amrex::grow(amrex::convert(fgeom.Domain(),mf.ixType()),nghost), ratio, mapper, bcs, bcscomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_fine_patch); mfi.isValid(); ++mfi) - { - auto& dfab = mf_fine_patch[mfi]; - const Box& dbx = dfab.box(); - post_interp(dfab, dbx, 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_fine_patch, 0, ncomp); mf.ParallelCopy(mf_fine_patch, 0, dcomp, ncomp, IntVect{0}, nghost); } @@ -1024,14 +1022,7 @@ InterpFromCoarseLevel (MF& mf, IntVect const& nghost, Real time, cbc(mf_crse_patch, 0, ncomp, mf_crse_patch.nGrowVect(), time, cbccomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf, dcomp, mf_crse_patch, 0, ncomp, nghost, cgeom, fgeom, fdomain_g, ratio, mapper, bcs, bcscomp); diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index 22b14d35c0d..d0e775416ee 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -330,26 +330,25 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim } } - MF mf_crse_patch; + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + if (m_cf_crse_data.size() > 0 && amrex::almostEqual(time, m_cf_crse_data[0].first,5)) { - mf_crse_patch = MF(*m_cf_crse_data[0].second, amrex::make_alias, - scomp, ncomp); + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[0].second, + scomp, 0, ncomp, 0); } else if (m_cf_crse_data.size() > 1 && amrex::almostEqual(time, m_cf_crse_data[1].first,5)) { - mf_crse_patch = MF(*m_cf_crse_data[1].second, amrex::make_alias, - scomp, ncomp); + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[1].second, + scomp, 0, ncomp, 0); } else if (m_cf_crse_data.size() == 2) { - if (m_cf_crse_data_tmp == nullptr) { - m_cf_crse_data_tmp = std::make_unique - (make_mf_crse_patch(fpc, m_ncomp)); - } - mf_crse_patch = MF(*m_cf_crse_data_tmp, amrex::make_alias, scomp, ncomp); int const ng_space_interp = 8; // Need to be big enough Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); domain.convert(mf.ixType()); @@ -358,10 +357,10 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim Real alpha = (t1-time)/(t1-t0); Real beta = (time-t0)/(t1-t0); AMREX_ASSERT(alpha >= 0._rt && beta >= 0._rt); - auto const& a = mf_crse_patch.arrays(); + auto const& a = m_cf_crse_data_tmp->arrays(); auto const& a0 = m_cf_crse_data[0].second->const_arrays(); auto const& a1 = m_cf_crse_data[1].second->const_arrays(); - amrex::ParallelFor(mf_crse_patch, IntVect(0), ncomp, + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), ncomp, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept { if (domain.contains(i,j,k)) { @@ -377,17 +376,17 @@ FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real tim amrex::Abort("FillPatcher: High order interpolation in time not supported. Or FillPatcher was not properly deleted."); } - cbc(mf_crse_patch, 0, ncomp, nghost, time, cbccomp); + cbc(*m_cf_crse_data_tmp, 0, ncomp, nghost, time, cbccomp); - pre_interp(mf_crse_patch, 0, ncomp); + detail::call_interp_hook(pre_interp, *m_cf_crse_data_tmp, 0, ncomp); - FillPatchInterp(*m_cf_fine_data, scomp, mf_crse_patch, 0, + FillPatchInterp(*m_cf_fine_data, scomp, *m_cf_crse_data_tmp, 0, ncomp, IntVect(0), m_cgeom, m_fgeom, amrex::grow(amrex::convert(m_fgeom.Domain(), mf.ixType()),nghost), m_ratio, m_interp, bcs, bcscomp); - post_interp(*m_cf_fine_data, scomp, ncomp); + detail::call_interp_hook(post_interp, *m_cf_fine_data, scomp, ncomp); mf.ParallelCopy(*m_cf_fine_data, scomp, dcomp, ncomp, IntVect{0}, nghost); } From ab8c892e1dd8943a6f0f759693757c6a186668a7 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 26 Oct 2022 15:59:39 -0700 Subject: [PATCH 105/111] Add alias template Gpu::NonManagedDeviceVector (#2999) --- Src/AmrCore/AMReX_TagBox.cpp | 10 +++++----- Src/Base/AMReX_GpuContainers.H | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index 9654e1cac7a..3ec7425e283 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -441,7 +441,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const std::partial_sum(nblocks.begin(), nblocks.end(), blockoffset.begin()+1); int ntotblocks = blockoffset.back(); - PODVector > dv_ntags(ntotblocks); + Gpu::NonManagedDeviceVector dv_ntags(ntotblocks); for (MFIter fai(*this); fai.isValid(); ++fai) { @@ -491,21 +491,21 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const #endif } - PODVector > hv_ntags(ntotblocks); + Gpu::PinnedVector hv_ntags(ntotblocks); Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int)); - PODVector > hv_tags_offset(ntotblocks+1); + Gpu::PinnedVector hv_tags_offset(ntotblocks+1); hv_tags_offset[0] = 0; std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1); int ntotaltags = hv_tags_offset.back(); if (ntotaltags == 0) return; - PODVector > dv_tags_offset(ntotblocks); + Gpu::NonManagedDeviceVector dv_tags_offset(ntotblocks); int* dp_tags_offset = dv_tags_offset.data(); Gpu::htod_memcpy_async(dp_tags_offset, hv_tags_offset.data(), ntotblocks*sizeof(int)); - PODVector > dv_tags(ntotaltags); + Gpu::NonManagedDeviceVector dv_tags(ntotaltags); IntVect* dp_tags = dv_tags.data(); int iblock = 0; diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H index cc68770ff3f..0992411b552 100644 --- a/Src/Base/AMReX_GpuContainers.H +++ b/Src/Base/AMReX_GpuContainers.H @@ -26,6 +26,13 @@ namespace Gpu { template using DeviceVector = PODVector >; + /** + * \brief A PODVector that uses the non-managed device memory arena. + * + */ + template + using NonManagedDeviceVector = PODVector >; + /** * \brief A PODVector that uses the managed memory arena. * @@ -83,6 +90,9 @@ namespace Gpu { template using HostVector = PODVector; + template + using NonManagedVector = PODVector; + template using ManagedVector = PODVector; From 7f3c90893d70ca33c6dec499436dd503c77eeddf Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 26 Oct 2022 16:40:16 -0700 Subject: [PATCH 106/111] Make The_Device_Arena non-managed (#2998) The_Device_Arena used to be a separate Arena. We changed it to be an alias of The_Arena to avoid memory fragmentation. However, the issue is we don't have an Arena that can allocate non-managed memory unless The_Arena is not managed. Because of performance concerns, we sometimes want to allocate non-managed memory. Therefore, we make The_Device_Arena an alias if and only if The_Arena is not managed. --- Src/Base/AMReX_Arena.cpp | 2 +- Src/Base/AMReX_GpuContainers.H | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index 19f4f6f5459..2082014d634 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -305,7 +305,7 @@ Arena::Initialize () the_async_arena = new PArena(the_async_arena_release_threshold); #ifdef AMREX_USE_GPU - if (the_arena->isDevice() || the_arena->isManaged()) { + if (the_arena->isDevice()) { the_device_arena = the_arena; } else { the_device_arena = new CArena(0, ArenaInfo{}.SetDeviceMemory().SetReleaseThreshold diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H index 0992411b552..faccec1d2ef 100644 --- a/Src/Base/AMReX_GpuContainers.H +++ b/Src/Base/AMReX_GpuContainers.H @@ -19,9 +19,8 @@ namespace Gpu { /** * \brief A PODVector that uses the standard memory Arena. - * Note that, on NVIDIA architectures, this Arena is actually - * managed. - * + * Note that the memory might or might not be managed depending + * on the amrex.the_arena_is_managed ParmParse parameter. */ template using DeviceVector = PODVector >; From 3ec07681574afa658f4f53117d7ab618459a514b Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 26 Oct 2022 16:49:40 -0700 Subject: [PATCH 107/111] `FabArray::isDefined` (#2997) ## Summary Add a new query to `define_function_called`. ## Additional background This is a cheaper check than `ok()` for finding out if a MultiFab has been allocated or not yet, assuming that the calling code follows the convention that `define()` is called collectively. Update: It turns out you can also call `empty` inherited from `FabArrayBase`. The new API is quite explicit, which is ok, too. Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_FabArray.H | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index 8be30fc8763..736c39567ae 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -438,6 +438,15 @@ public: */ bool ok () const; + /** Has define() been called on this rank? + * + * \return true if `define` has been called on this `FabArray`. Note that all constructors except `FabArray ()` + * and `FabArray(Arena*a)` call `define`, even if the `MFInfo` argument has `alloc=false`. One could + * also use `FabArrayBase::empty()` to find whether `define` is called or not, although they are not exactly + * the same. + */ + bool isDefined () const; + //! Return a constant reference to the FAB associated with mfi. const FAB& operator[] (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); } @@ -1128,6 +1137,7 @@ protected: std::unique_ptr > m_factory; DataAllocator m_dallocator; + //! has define() been called? bool define_function_called = false; // @@ -1768,6 +1778,13 @@ FabArray::ok () const return isok == 1; } +template +bool +FabArray::isDefined () const +{ + return define_function_called; +} + template void FabArray::define (const BoxArray& bxs, From 735c3513153f1d06f783e64f455816be85fb3602 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 29 Oct 2022 10:57:23 -0700 Subject: [PATCH 108/111] MPI Reduce for ValLocPair (#3003) Add ParallelReduce::Min, ParallelReduce::Max, ParallelAllReduce::Min, and ParallelAllReduce::Max for ValLocPair, where TV and TI are types that have corresponding MPI types (e.g., int, Real, IntVect, Box, etc.). --- Src/Base/AMReX_ParallelDescriptor.H | 73 +++++++++++++++++++++++++++ Src/Base/AMReX_ParallelDescriptor.cpp | 15 ++++++ Src/Base/AMReX_ParallelReduce.H | 55 ++++++++++++++++++++ Src/Base/AMReX_Reduce.H | 26 +--------- Src/Base/AMReX_ValLocPair.H | 35 +++++++++++++ Src/Base/CMakeLists.txt | 1 + Src/Base/Make.package | 1 + 7 files changed, 181 insertions(+), 25 deletions(-) create mode 100644 Src/Base/AMReX_ValLocPair.H diff --git a/Src/Base/AMReX_ParallelDescriptor.H b/Src/Base/AMReX_ParallelDescriptor.H index 38cd4cdf167..03c431d135a 100644 --- a/Src/Base/AMReX_ParallelDescriptor.H +++ b/Src/Base/AMReX_ParallelDescriptor.H @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef BL_AMRPROF #include @@ -211,6 +212,11 @@ while ( false ) extern AMREX_EXPORT MPI_Comm m_comm; inline MPI_Comm Communicator () noexcept { return m_comm; } +#ifdef AMREX_USE_MPI + extern Vector m_mpi_types; + extern Vector m_mpi_ops; +#endif + //! return the number of MPI ranks local to the current Parallel Context inline int NProcs () noexcept @@ -1479,6 +1485,73 @@ void DoReduce (T* r, MPI_Op op, int cnt, int cpu) #endif } +#ifdef AMREX_USE_MPI +namespace ParallelDescriptor { + +template +struct Mpi_typemap> +{ + static MPI_Datatype type () + { + static MPI_Datatype mpi_type = MPI_DATATYPE_NULL; + if (mpi_type == MPI_DATATYPE_NULL) { + using T = ValLocPair; + static_assert(std::is_trivially_copyable::value, + "To communicate with MPI, ValLocPair must be trivially copyable."); + static_assert(std::is_standard_layout::value, + "To communicate with MPI, ValLocPair must be standard layout"); + + T vlp[2]; + MPI_Datatype types[] = { + Mpi_typemap::type(), + Mpi_typemap::type(), + }; + int blocklens[] = { 1, 1 }; + MPI_Aint disp[2]; + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) ); + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) ); + disp[1] -= disp[0]; + disp[0] = 0; + BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types, + &mpi_type) ); + MPI_Aint lb, extent; + BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) ); + if (extent != sizeof(T)) { + MPI_Datatype tmp = mpi_type; + BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0, sizeof(vlp[0]), &mpi_type) ); + BL_MPI_REQUIRE( MPI_Type_free(&tmp) ); + } + BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) ); + + m_mpi_types.push_back(&mpi_type); + } + return mpi_type; + } +}; + +template +MPI_Op Mpi_op () +{ + static MPI_Op mpi_op = MPI_OP_NULL; + if (mpi_op == MPI_OP_NULL) { + static auto user_fn = [] (void *invec, void *inoutvec, int* len, + MPI_Datatype * /*datatype*/) + { + auto in = static_cast(invec); + auto out = static_cast(inoutvec); + for (int i = 0; i < *len; ++i) { + out[i] = F()(in[i],out[i]); + } + }; + BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) ); + m_mpi_ops.push_back(&mpi_op); + } + return mpi_op; +} + +} +#endif + } #endif /*BL_PARALLELDESCRIPTOR_H*/ diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 6d457d28398..3ea202d9b50 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -65,6 +65,11 @@ namespace amrex { namespace ParallelDescriptor { MPI_Comm m_comm = MPI_COMM_NULL; // communicator for all ranks, probably MPI_COMM_WORLD +#ifdef AMREX_USE_MPI + Vector m_mpi_types; + Vector m_mpi_ops; +#endif + int m_MinTag = 1000, m_MaxTag = -1; const int ioProcessor = 0; @@ -357,10 +362,20 @@ EndParallel () BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_indextype) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_box) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_lull_t) ); + for (auto t : m_mpi_types) { + BL_MPI_REQUIRE( MPI_Type_free(t) ); + *t = MPI_DATATYPE_NULL; + } + for (auto op : m_mpi_ops) { + BL_MPI_REQUIRE( MPI_Op_free(op) ); + *op = MPI_OP_NULL; + } mpi_type_intvect = MPI_DATATYPE_NULL; mpi_type_indextype = MPI_DATATYPE_NULL; mpi_type_box = MPI_DATATYPE_NULL; mpi_type_lull_t = MPI_DATATYPE_NULL; + m_mpi_types.clear(); + m_mpi_ops.clear(); } if (!call_mpi_finalize) { diff --git a/Src/Base/AMReX_ParallelReduce.H b/Src/Base/AMReX_ParallelReduce.H index e0e1e98b66e..3a6db500a2a 100644 --- a/Src/Base/AMReX_ParallelReduce.H +++ b/Src/Base/AMReX_ParallelReduce.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -120,6 +121,32 @@ namespace ParallelGather { namespace ParallelAllReduce { + template + void Max (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + + template + void Min (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + template void Max (T& v, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, -1, comm); @@ -174,6 +201,34 @@ namespace ParallelAllReduce { namespace ParallelReduce { + template + void Max (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + + template + void Min (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + template void Max (T& v, int root, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, root, comm); diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index d9c886412a7..9076e984828 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -13,31 +14,6 @@ namespace amrex { -template -struct ValLocPair -{ - TV value; - TI index; - - static constexpr ValLocPair max () { - return ValLocPair{std::numeric_limits::max(), TI()}; - } - - static constexpr ValLocPair lowest () { - return ValLocPair{std::numeric_limits::lowest(), TI()}; - } - - friend constexpr bool operator< (ValLocPair const& a, ValLocPair const& b) - { - return a.value < b.value; - } - - friend constexpr bool operator> (ValLocPair const& a, ValLocPair const& b) - { - return a.value > b.value; - } -}; - namespace Reduce { namespace detail { #ifdef AMREX_USE_GPU diff --git a/Src/Base/AMReX_ValLocPair.H b/Src/Base/AMReX_ValLocPair.H new file mode 100644 index 00000000000..b7b480b1dba --- /dev/null +++ b/Src/Base/AMReX_ValLocPair.H @@ -0,0 +1,35 @@ +#ifndef AMREX_VALLOCPAIR_H_ +#define AMREX_VALLOCPAIR_H_ + +#include + +namespace amrex { + +template +struct ValLocPair +{ + TV value; + TI index; + + static constexpr ValLocPair max () { + return ValLocPair{std::numeric_limits::max(), TI()}; + } + + static constexpr ValLocPair lowest () { + return ValLocPair{std::numeric_limits::lowest(), TI()}; + } + + friend constexpr bool operator< (ValLocPair const& a, ValLocPair const& b) + { + return a.value < b.value; + } + + friend constexpr bool operator> (ValLocPair const& a, ValLocPair const& b) + { + return a.value > b.value; + } +}; + +} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index c1212cf37fe..7af11a24b5a 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -30,6 +30,7 @@ target_sources( amrex AMReX_Utility.cpp AMReX_FileSystem.H AMReX_FileSystem.cpp + AMReX_ValLocPair.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index 1fdca2587d7..9dd615b3251 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -22,6 +22,7 @@ C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H +C$(AMREX_BASE)_headers += AMReX_ValLocPair.H C$(AMREX_BASE)_headers += AMReX_FileSystem.H C$(AMREX_BASE)_sources += AMReX_FileSystem.cpp From 5ec270b4d534a486aeabf478ae553f1df53f2e5b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 1 Nov 2022 08:59:44 -0700 Subject: [PATCH 109/111] Fix compilation for PETSc (#3005) We cannot include PETSc headers too early because it might redefine MPI routines as macros (https://github.com/petsc/petsc/blob/main/include/petsclog.h#L441). They break MPI calls like below, MPI_Allreduce(&tmp, &vi, 1, ParallelDescriptor::Mpi_typemap::type(), ParallelDescriptor::Mpi_op>(), comm); because of the `,` in `>`. --- Src/Extern/PETSc/AMReX_PETSc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Src/Extern/PETSc/AMReX_PETSc.cpp b/Src/Extern/PETSc/AMReX_PETSc.cpp index bf0bf68a99c..7d8cd79b582 100644 --- a/Src/Extern/PETSc/AMReX_PETSc.cpp +++ b/Src/Extern/PETSc/AMReX_PETSc.cpp @@ -1,7 +1,4 @@ -#include -#include - #ifdef AMREX_USE_EB #include #include @@ -9,6 +6,9 @@ #include +#include +#include + #include #include #include From d2b82938c171a4b1ada48839ed6891b5b0183b43 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 1 Nov 2022 09:01:54 -0700 Subject: [PATCH 110/111] Update CHANGES for 22.11 (#3006) --- CHANGES | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/CHANGES b/CHANGES index a9ab0555a58..648db385c07 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,45 @@ +# 22.11 + + -- MPI Reduce for ValLocPair (#3003) + + -- `FabArray::isDefined` (#2997) + + -- Make The_Device_Arena non-managed (#2998) + + -- Add alias template Gpu::NonManagedDeviceVector (#2999) + + -- Pre- and Post-interpolation hook interface (#2991) + + -- Add user defined BC types (#2995) + + -- Add BCRec::set for convenience (#2993) + + -- ParallelFor with compile time optimization of kernels with run time parameters (#2954) + + -- 2D RZ solver for WarpX: Arbitrary coefficient (#2986) + + -- Runge-Kutta support for AMR (#2974) + + -- Fourth-order interpolation from fine to coarse level (#2987) + + -- Fix EB data inconsistency when fixing small cells and multiple cuts (#2943) + + -- MFIter::Finalize (#2983, #2985, #2988) + + -- Fix MLMG::getGradSolution & getFluxes for inhomogeneous Neumann and Robin BC (#2984) + + -- MLLinOp::postSolve (#2981) + + -- add templating for the cell bilinear interpolators (#2979) + + -- FillPatcher class (#2972) + + -- Remove sycl namespace alias (#2971) + + -- Fix Tensor Solver BC (#2930) + + -- Disable host device for macros for SYCL/DPC++ (#2969) + # 22.10 -- Solve an issue with particles async IO when having runtime added variables (#2966) From c4a4811c373d9b599bb710c7029365b1ca7f2c22 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 1 Nov 2022 14:08:38 -0500 Subject: [PATCH 111/111] C++17 Transition (#2992) ## Summary Update AMReX to require C++17 or newer. - [x] docs - [x] CMake - [x] GNUmake - [x] CI ## Additional background Requires a mature [C++17](https://en.wikipedia.org/wiki/C%2B%2B17) compiler, e.g., GCC 8, Clang 7, NVCC 11.0, MSVC 19.15 or newer. Already used since 1+ year in production by downstream codes such as Castro and WarpX. Needed for modernization and new features such as #2878 Co-authored-by: Weiqun Zhang --- .github/workflows/clang.yml | 26 ++++---- .github/workflows/cuda.yml | 39 +---------- .../workflows/dependencies/dependencies.sh | 2 +- ...ncies_clang6.sh => dependencies_clang7.sh} | 4 +- .../dependencies/dependencies_gcc8.sh | 17 +++++ .../dependencies/dependencies_nofortran.sh | 3 +- .../dependencies/dependencies_nvcc10.sh | 30 --------- .../dependencies/dependencies_nvcc11.sh | 7 +- .github/workflows/gcc.yml | 65 ++++++++++--------- .../source/BuildingAMReX.rst | 8 +-- .../source/BuildingAMReX_Chapter.rst | 2 +- INSTALL | 2 +- Src/Amr/AMReX_StateDescriptor.cpp | 65 ++++++++++++------- Src/Base/AMReX_Arena.cpp | 19 ++++-- Src/Base/AMReX_CTOParallelForImpl.H | 12 ++-- Tools/AMRProfParser/GNUmakefile | 1 - Tools/CMake/AMReXTypecheck.cmake | 2 +- Tools/CMake/AMReX_Config.cmake | 14 ++-- Tools/GNUMake/Make.rules | 2 +- Tools/GNUMake/comps/armclang.mak | 4 +- Tools/GNUMake/comps/cray.mak | 10 +-- Tools/GNUMake/comps/dpcpp.mak | 2 +- Tools/GNUMake/comps/gnu.mak | 45 +++---------- Tools/GNUMake/comps/hip.mak | 2 +- Tools/GNUMake/comps/intel.mak | 19 +----- Tools/GNUMake/comps/llvm-flang.mak | 4 +- Tools/GNUMake/comps/llvm.mak | 4 +- Tools/GNUMake/comps/nag.mak | 9 +-- Tools/GNUMake/comps/nvcc.mak | 63 ++++-------------- Tools/GNUMake/comps/nvhpc.mak | 12 ++-- Tools/GNUMake/comps/pgi.mak | 10 ++- Tools/Plotfile/CMakeLists.txt | 2 +- 32 files changed, 195 insertions(+), 311 deletions(-) rename .github/workflows/dependencies/{dependencies_clang6.sh => dependencies_clang7.sh} (73%) create mode 100755 .github/workflows/dependencies/dependencies_gcc8.sh delete mode 100755 .github/workflows/dependencies/dependencies_nvcc10.sh diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index 773126bca38..afd37544c12 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -12,13 +12,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library_clang: - name: Clang@6.0 C++14 SP NOMPI Debug [lib] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wnon-virtual-dtor"} + name: Clang@7.0 C++17 SP NOMPI Debug [lib] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} steps: - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -34,7 +34,6 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DAMReX_PRECISION=SINGLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -48,14 +47,14 @@ jobs: ctest --output-on-failure tests_clang: - name: Clang@6.0 C++14 SP Particles DP Mesh Debug [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1 -Wnon-virtual-dtor"} + name: Clang@7.0 C++17 SP Particles DP Mesh Debug [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -O1 -Wnon-virtual-dtor"} # It's too slow with -O0 steps: - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -70,7 +69,6 @@ jobs: -DAMReX_PARTICLES=ON \ -DAMReX_PRECISION=DOUBLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -80,14 +78,14 @@ jobs: # Build 2D libamrex with configure configure-2d: - name: Clang@6.0 NOMPI Release [configure 2D] - runs-on: ubuntu-18.04 + name: Clang@7.0 NOMPI Release [configure 2D] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | ./configure --dim 2 --with-fortran no --comp llvm --with-mpi no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-c++17-extensions" + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names" make install diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index c91bbecd48e..98a2b001760 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -7,37 +7,6 @@ concurrency: cancel-in-progress: true jobs: - # Build libamrex and all tests with CUDA 10.2 - tests-cuda10: - name: CUDA@10.2 GNU@6.5.0 C++14 Release [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond"} - steps: - - uses: actions/checkout@v3 - - name: Dependencies - run: .github/workflows/dependencies/dependencies_nvcc10.sh - - name: Build & Install - run: | - export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} - export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} - which nvcc || echo "nvcc not in PATH!" - mkdir build - cd build - cmake .. \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DAMReX_EB=OFF \ - -DAMReX_ENABLE_TESTS=ON \ - -DAMReX_FORTRAN=OFF \ - -DAMReX_PARTICLES=ON \ - -DAMReX_GPU_BACKEND=CUDA \ - -DCMAKE_C_COMPILER=$(which gcc-6) \ - -DCMAKE_CXX_COMPILER=$(which g++-6) \ - -DCMAKE_CUDA_HOST_COMPILER=$(which g++-6) \ - -DCMAKE_Fortran_COMPILER=$(which gfortran-6) \ - -DAMReX_CUDA_ARCH=7.0 \ - -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON - make -j 2 - # Build libamrex and all tests with CUDA 11.0.2 (recent supported) tests-cuda11: name: CUDA@11.2 GNU@9.3.0 C++17 Release [tests] @@ -64,9 +33,7 @@ jobs: -DCMAKE_CXX_COMPILER=$(which g++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which g++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ - -DAMReX_CUDA_ARCH=8.0 \ + -DAMReX_CUDA_ARCH=7.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -106,8 +73,6 @@ jobs: -DCMAKE_CXX_COMPILER=$(which nvc++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which nvc++) \ -DCMAKE_Fortran_COMPILER=$(which nvfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ -DAMReX_CUDA_ARCH=8.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -126,5 +91,5 @@ jobs: run: | export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} ./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names CXXSTD=c++17 + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names make install diff --git a/.github/workflows/dependencies/dependencies.sh b/.github/workflows/dependencies/dependencies.sh index d0e86e99c0a..c9bb080831c 100755 --- a/.github/workflows/dependencies/dependencies.sh +++ b/.github/workflows/dependencies/dependencies.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl diff --git a/.github/workflows/dependencies/dependencies_clang6.sh b/.github/workflows/dependencies/dependencies_clang7.sh similarity index 73% rename from .github/workflows/dependencies/dependencies_clang6.sh rename to .github/workflows/dependencies/dependencies_clang7.sh index 19b348b920b..85396a2f73c 100755 --- a/.github/workflows/dependencies/dependencies_clang6.sh +++ b/.github/workflows/dependencies/dependencies_clang7.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl @@ -11,4 +11,4 @@ sudo apt-get update sudo apt-get install -y \ build-essential \ - clang gfortran + clang-7 gfortran diff --git a/.github/workflows/dependencies/dependencies_gcc8.sh b/.github/workflows/dependencies/dependencies_gcc8.sh new file mode 100755 index 00000000000..c216e6a8c51 --- /dev/null +++ b/.github/workflows/dependencies/dependencies_gcc8.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +# Copyright 2020-2022 The AMReX Community +# +# License: BSD-3-Clause-LBNL +# Authors: Axel Huebl + +set -eu -o pipefail + +sudo add-apt-repository ppa:ubuntu-toolchain-r/test +sudo apt-get update + +sudo apt-get install -y --no-install-recommends \ + build-essential \ + g++-8 gfortran-8 \ + libopenmpi-dev \ + openmpi-bin diff --git a/.github/workflows/dependencies/dependencies_nofortran.sh b/.github/workflows/dependencies/dependencies_nofortran.sh index 36d759f66fa..61089ad8bf7 100755 --- a/.github/workflows/dependencies/dependencies_nofortran.sh +++ b/.github/workflows/dependencies/dependencies_nofortran.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL +# Authors: Axel Huebl # search recursive inside a folder if a file contains tabs # diff --git a/.github/workflows/dependencies/dependencies_nvcc10.sh b/.github/workflows/dependencies/dependencies_nvcc10.sh deleted file mode 100755 index 591dd04d79b..00000000000 --- a/.github/workflows/dependencies/dependencies_nvcc10.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2020 Axel Huebl -# -# License: BSD-3-Clause-LBNL - -set -eu -o pipefail - -sudo apt-get update - -sudo apt-get install -y --no-install-recommends\ - build-essential \ - g++-6 \ - gfortran-6 \ - libopenmpi-dev \ - openmpi-bin - -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list -sudo apt-get update -sudo apt-get install -y \ - cuda-command-line-tools-10-2 \ - cuda-compiler-10-2 \ - cuda-cupti-dev-10-2 \ - cuda-minimal-build-10-2 \ - cuda-nvml-dev-10-2 \ - cuda-nvtx-10-2 \ - cuda-curand-dev-10-2 -sudo ln -s cuda-10.2 /usr/local/cuda diff --git a/.github/workflows/dependencies/dependencies_nvcc11.sh b/.github/workflows/dependencies/dependencies_nvcc11.sh index 79c8c6c31f6..a4b2f335a99 100755 --- a/.github/workflows/dependencies/dependencies_nvcc11.sh +++ b/.github/workflows/dependencies/dependencies_nvcc11.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 Axel Huebl # # License: BSD-3-Clause-LBNL @@ -19,9 +19,8 @@ sudo apt-get install -y \ pkg-config \ wget -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list +curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb +sudo dpkg -i cuda-keyring_1.0-1_all.deb sudo apt-get update sudo apt-get install -y \ cuda-command-line-tools-11-2 \ diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 8f1434d2ea3..32726a4767a 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -13,13 +13,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library: - name: GNU@7.5 C++17 Release [lib] - runs-on: ubuntu-18.04 + name: GNU@8.4 C++17 Release [lib] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -29,7 +29,9 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_INSTALL_PREFIX=/tmp/my-amrex \ - -DCMAKE_CXX_STANDARD=17 + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 make install make test_install @@ -41,8 +43,8 @@ jobs: # Build libamrex and all tests tests_build_3D: - name: GNU@7.5 C++14 3D Debug Fortran [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 C++17 3D Debug Fortran [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: @@ -64,8 +66,8 @@ jobs: ctest --test-dir build --output-on-failure tests_build_2D: - name: GNU@7.5 C++14 2D Debug Fortran [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 C++17 2D Debug Fortran [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: @@ -87,8 +89,8 @@ jobs: ctest --test-dir build --output-on-failure tests_build_1D: - name: GNU@7.5 C++14 1D Debug Fortran [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 C++17 1D Debug Fortran [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # -Werror temporarily skipped until we have functional testing established # It's too slow with -O0 @@ -113,7 +115,7 @@ jobs: # Build libamrex and all tests tests_cxx20: name: GNU@10.1 C++20 OMP [tests] - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v3 @@ -145,13 +147,13 @@ jobs: # Build libamrex and all tests w/o MPI tests-nonmpi: - name: GNU@7.5 C++14 NOMPI [tests] - runs-on: ubuntu-18.04 + name: GNU@8.4 C++17 NOMPI [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -167,15 +169,18 @@ jobs: -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=ON \ -DAMReX_MPI=OFF \ - -DAMReX_PARTICLES=ON + -DAMReX_PARTICLES=ON \ + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 ctest --output-on-failure # Build libamrex and all tests tests-nofortran: - name: GNU@7.5 C++14 w/o Fortran [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 C++17 w/o Fortran [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v3 @@ -203,8 +208,8 @@ jobs: # Build 1D libamrex with configure configure-1d: - name: GNU@7.5 Release [configure 1D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 1D] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Dependencies @@ -217,8 +222,8 @@ jobs: # Build 3D libamrex with configure configure-3d: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@11.2 Release [configure 3D] + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 - name: Dependencies @@ -231,8 +236,8 @@ jobs: # Build 3D libamrex with single precision and tiny profiler configure-3d-single-tprof: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 3D] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Dependencies @@ -245,8 +250,8 @@ jobs: # Build 3D libamrex debug omp build with configure configure-3d-omp-debug: - name: GNU@7.5 OMP Debug [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 OMP Debug [configure 3D] + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Dependencies @@ -260,7 +265,7 @@ jobs: # Build Tools/Plotfile plotfile-tools: name: GNU Plotfile Tools [tools] - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Dependencies @@ -272,8 +277,8 @@ jobs: # Build libamrex and run all tests tests_run: - name: GNU@7.5 C++14 [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 C++17 [tests] + runs-on: ubuntu-20.04 env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - uses: actions/checkout@v3 @@ -295,8 +300,8 @@ jobs: ctest --output-on-failure -R test_hdf5: - name: GNU@7.5 HDF5 I/O Test [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 HDF5 I/O Test [tests] + runs-on: ubuntu-20.04 env: CXX: h5pcc CC: h5cc diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index 8a377377700..331f9b8c9f6 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -35,8 +35,8 @@ list of important variables. +-----------------+-------------------------------------+--------------------+ | COMP | gnu, cray, ibm, intel, llvm, or pgi | none | +-----------------+-------------------------------------+--------------------+ - | CXXSTD | C++ standard (``c++14``, ``c++17``, | compiler default, | - | | ``c++20``) | at least ``c++14`` | + | CXXSTD | C++ standard (``c++17``, ``c++20``) | compiler default, | + | | | at least ``c++17`` | +-----------------+-------------------------------------+--------------------+ | DEBUG | TRUE or FALSE | FALSE | +-----------------+-------------------------------------+--------------------+ @@ -740,8 +740,8 @@ The AMReX team does development on Linux machines, from laptops to supercomputer We do not officially support AMReX on Windows, and many of us do not have access to any Windows machines. However, we believe there are no fundamental issues for it to work on Windows. -(1) AMReX mostly uses standard C++14, but for Windows C++17 is required. This is because we use - C++17 to support file system operations when POSIX I/O is not available. +(1) AMReX mostly uses standard C++17. +We run continous integration tests on Windows with MSVC and Clang compilers. (2) We use POSIX signal handling when floating point exceptions, segmentation faults, etc. happen. This capability is not supported on Windows. diff --git a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst index dd61bb254d3..3ecbc775c17 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst @@ -18,7 +18,7 @@ an application code then uses its own build system and links to AMReX as an exte Finally, AMReX can also be built with CMake, as detailed in the section on :ref:`sec:build:cmake`. -AMReX requires a C++ compiler that supports the C++14 standard, a +AMReX requires a C++ compiler that supports the C++17 standard, a Fortran compiler that supports the Fortran 2003 standard, and a C compiler that supports the C99 standard. Prerequisites for building with GNU Make include Python (>= 2.7, including 3) and standard tools diff --git a/INSTALL b/INSTALL index efb40fbdb2e..ed1e0dfb36e 100644 --- a/INSTALL +++ b/INSTALL @@ -10,7 +10,7 @@ There are three ways to use AMReX. Fortran modules via `./configure` followed by `make` and `make install`. Type `./configure -h` to show help message. An application code uses its build system to compile and link to the - AMReX library. Because AMReX uses C++14 and Fortran, the linker + AMReX library. Because AMReX uses C++17 and Fortran, the linker needs to link the libraries. See `Tutorials/Basic/Build_with_libamrex` for an example of this approach. Note that this approach relies the make system in diff --git a/Src/Amr/AMReX_StateDescriptor.cpp b/Src/Amr/AMReX_StateDescriptor.cpp index 932479feeb2..1910dcf7b3f 100644 --- a/Src/Amr/AMReX_StateDescriptor.cpp +++ b/Src/Amr/AMReX_StateDescriptor.cpp @@ -42,23 +42,31 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_func != 0 || m_func3D != 0); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, 1); if (thread_safe) { - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void @@ -69,23 +77,32 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_gfunc != 0 || m_gfunc3D != 0); + amrex::ignore_unused(ng); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, ng); if (thread_safe) { - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index 2082014d634..f7a46dc25c8 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -14,11 +14,11 @@ ///#include //#define AMREX_MLOCK(x,y) VirtualLock(x,y) //#define AMREX_MUNLOCK(x,y) VirtualUnlock(x,y) -#define AMREX_MLOCK(x,y) ((void)0) +//#define AMREX_MLOCK(x,y) ((void)0) #define AMREX_MUNLOCK(x,y) ((void)0) #else #include -#define AMREX_MLOCK(x,y) mlock(x,y) +//#define AMREX_MLOCK(x,y) mlock(x,y) #define AMREX_MUNLOCK(x,y) munlock(x,y) #endif @@ -132,13 +132,15 @@ Arena::allocate_system (std::size_t nbytes) if (arena_info.use_cpu_memory) { p = std::malloc(nbytes); +#ifndef _WIN32 #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif #endif } else if (arena_info.device_use_hostalloc) @@ -190,7 +192,16 @@ Arena::allocate_system (std::size_t nbytes) } #else p = std::malloc(nbytes); - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); +#ifndef _WIN32 +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +#endif #endif if (p == nullptr) amrex::Abort("Sorry, malloc failed"); return p; diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H index f4dd41ca0c8..e79122de24d 100644 --- a/Src/Base/AMReX_CTOParallelForImpl.H +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -24,8 +24,8 @@ struct CompileTimeOptions { #if (__cplusplus >= 201703L) -namespace meta -{ +//namespace meta +//{ template constexpr auto operator+ (TypeList, TypeList) { return TypeList{}; @@ -45,7 +45,7 @@ namespace meta constexpr auto cartesian_product_n (TypeList) { return (TypeList>{} * ... * Ls{}); } -} +//} namespace detail { @@ -124,7 +124,7 @@ ParallelFor (TypeList /*list_of_compile_time_options*/, #if (__cplusplus >= 201703L) using OptionsListList = TypeList; detail::ParallelFor_helper1(N, std::forward(f), - meta::cartesian_product_n(OptionsListList{}), + cartesian_product_n(OptionsListList{}), runtime_options); #else amrex::ignore_unused(N, f, runtime_options); @@ -140,7 +140,7 @@ void ParallelFor (TypeList /*list_of_compile_time_options*/, #if (__cplusplus >= 201703L) using OptionsListList = TypeList; detail::ParallelFor_helper1(box, std::forward(f), - meta::cartesian_product_n(OptionsListList{}), + cartesian_product_n(OptionsListList{}), runtime_options); #else amrex::ignore_unused(box, f, runtime_options); @@ -157,7 +157,7 @@ ParallelFor (TypeList /*list_of_compile_time_options*/, #if (__cplusplus >= 201703L) using OptionsListList = TypeList; detail::ParallelFor_helper1(box, ncomp, std::forward(f), - meta::cartesian_product_n(OptionsListList{}), + cartesian_product_n(OptionsListList{}), runtime_options); #else amrex::ignore_unused(box, ncomp, f, runtime_options); diff --git a/Tools/AMRProfParser/GNUmakefile b/Tools/AMRProfParser/GNUmakefile index 619d67a557a..59fd2a54b0c 100644 --- a/Tools/AMRProfParser/GNUmakefile +++ b/Tools/AMRProfParser/GNUmakefile @@ -23,7 +23,6 @@ USE_MPI = FALSE USE_OMP = FALSE EBASE = amrprofparser BL_NO_FORT = FALSE -USE_CXX11 = TRUE include $(AMREX_HOME)/Tools/GNUMake/Make.defs include $(AMREX_HOME)/Src/Base/Make.package diff --git a/Tools/CMake/AMReXTypecheck.cmake b/Tools/CMake/AMReXTypecheck.cmake index 926fcda9daf..0b68fb8c274 100644 --- a/Tools/CMake/AMReXTypecheck.cmake +++ b/Tools/CMake/AMReXTypecheck.cmake @@ -250,7 +250,7 @@ function( add_typecheck_target _target) add_custom_command( OUTPUT ${_cppd_file} COMMAND ${CMAKE_C_COMPILER} - ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c99 ${_fullname} > ${_cppd_file} + ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c11 ${_fullname} > ${_cppd_file} COMMAND sed ARGS -i -e 's/amrex::Real/${AMREX_REAL}/g' ${_cppd_file} COMMAND sed diff --git a/Tools/CMake/AMReX_Config.cmake b/Tools/CMake/AMReX_Config.cmake index 1754b339094..c842db1e136 100644 --- a/Tools/CMake/AMReX_Config.cmake +++ b/Tools/CMake/AMReX_Config.cmake @@ -37,22 +37,18 @@ function (configure_amrex) # # Setup compilers # - # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++14" for GNU + # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++17" for GNU # This will also enforce the same standard with the CUDA compiler # Moreover, it will also enforce such standard on all the consuming targets # set_target_properties(amrex PROPERTIES CXX_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows, C++17 for dpc++ and hip - if (AMReX_DPCPP OR AMReX_HIP) - target_compile_features(amrex PUBLIC cxx_std_17) - else () - target_compile_features(amrex PUBLIC $,Windows>,cxx_std_17,cxx_std_14>) - endif () + # minimum: C++17 + target_compile_features(amrex PUBLIC cxx_std_17) if (AMReX_CUDA) set_target_properties(amrex PROPERTIES CUDA_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows - target_compile_features(amrex PUBLIC $,Windows>,cuda_std_17,cuda_std_14>) + # minimum: C++17 + target_compile_features(amrex PUBLIC cuda_std_17) endif() # diff --git a/Tools/GNUMake/Make.rules b/Tools/GNUMake/Make.rules index 5d6caa60e06..48ef6d9d3f8 100644 --- a/Tools/GNUMake/Make.rules +++ b/Tools/GNUMake/Make.rules @@ -441,7 +441,7 @@ $(tmpEXETempDir)/%.F.orig: %.F # & --> * $(tmpEXETempDir)/%-cppd.h: %.H @if [ ! -d $(tmpEXETempDir) ]; then mkdir -p $(tmpEXETempDir); fi - $(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c99 $< -o $@ + $(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c11 $< -o $@ @$(SHELL) -ec 'sed -i -e '\''s/amrex::Real/$(amrex_real)/g'\'' $@ ; \ sed -i -e '\''s/amrex_real/$(amrex_real)/g'\'' $@ ; \ sed -i -e '\''s/amrex_particle_real/$(amrex_particle_real)/g'\'' $@ ; \ diff --git a/Tools/GNUMake/comps/armclang.mak b/Tools/GNUMake/comps/armclang.mak index ccbfbeb77ed..d2826cb1134 100644 --- a/Tools/GNUMake/comps/armclang.mak +++ b/Tools/GNUMake/comps/armclang.mak @@ -64,11 +64,11 @@ CXXFLAGS += -Wno-c++17-extensions ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD := c++14 + CXXSTD := c++17 endif CXXFLAGS += -std=$(CXXSTD) -CFLAGS += -std=c99 +CFLAGS += -std=c11 FMODULES = -J$(fmoddir) -I $(fmoddir) diff --git a/Tools/GNUMake/comps/cray.mak b/Tools/GNUMake/comps/cray.mak index f75a56c5f75..cf484e6ec38 100644 --- a/Tools/GNUMake/comps/cray.mak +++ b/Tools/GNUMake/comps/cray.mak @@ -73,15 +73,15 @@ endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD := c++14 + CXXSTD := c++17 endif ifeq ($(CRAY_IS_CLANG_BASED),TRUE) CXXFLAGS += -std=$(CXXSTD) - CFLAGS += -std=c99 + CFLAGS += -std=c11 else CXXFLAGS += -h std=$(CXXSTD) - CFLAGS += -h c99 + CFLAGS += -h c11 endif F90FLAGS += -N 255 -em @@ -119,10 +119,6 @@ else endif endif -ifeq ($(CRAY_IS_CLANG_BASED),TRUE) - CXXFLAGS += -Wno-c++17-extensions -endif - CXXFLAGS += $(GENERIC_COMP_FLAGS) CFLAGS += $(GENERIC_COMP_FLAGS) FFLAGS += $(GENERIC_COMP_FLAGS) diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index b351f0ac731..33c05fc0c7a 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -69,7 +69,7 @@ else endif CXXFLAGS += -Wno-error=sycl-strict -fsycl -CFLAGS += -std=c99 +CFLAGS += -std=c11 ifneq ($(DEBUG),TRUE) # There is currently a bug that DEBUG build will crash. ifeq ($(DPCPP_AOT),TRUE) diff --git a/Tools/GNUMake/comps/gnu.mak b/Tools/GNUMake/comps/gnu.mak index 5e621eb140e..2d67d418717 100644 --- a/Tools/GNUMake/comps/gnu.mak +++ b/Tools/GNUMake/comps/gnu.mak @@ -38,23 +38,23 @@ ifeq ($(EXPORT_DYNAMIC),TRUE) GENERIC_GNU_FLAGS += -rdynamic -fno-omit-frame-pointer endif -gcc_major_ge_5 = $(shell expr $(gcc_major_version) \>= 5) -gcc_major_ge_6 = $(shell expr $(gcc_major_version) \>= 6) -gcc_major_ge_7 = $(shell expr $(gcc_major_version) \>= 7) gcc_major_ge_8 = $(shell expr $(gcc_major_version) \>= 8) gcc_major_ge_9 = $(shell expr $(gcc_major_version) \>= 9) gcc_major_ge_10 = $(shell expr $(gcc_major_version) \>= 10) gcc_major_ge_11 = $(shell expr $(gcc_major_version) \>= 11) +gcc_major_ge_12 = $(shell expr $(gcc_major_version) \>= 12) + +ifneq ($(gcc_major_ge_8),1) + $(error GCC < 8 not supported) +endif ifeq ($(THREAD_SANITIZER),TRUE) GENERIC_GNU_FLAGS += -fsanitize=thread endif ifeq ($(FSANITIZER),TRUE) GENERIC_GNU_FLAGS += -fsanitize=address -fsanitize=undefined - ifeq ($(gcc_major_ge_8),1) - GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract - GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow - endif + GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract + GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow endif ifeq ($(USE_OMP),TRUE) @@ -97,7 +97,7 @@ else endif ifeq ($(WARN_ALL),TRUE) - warning_flags = -Wall -Wextra -Wlogical-op + warning_flags = -Wall -Wextra -Wlogical-op -Wfloat-conversion -Wnull-dereference -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches ifeq ($(WARN_SIGN_COMPARE),FALSE) warning_flags += -Wno-sign-compare @@ -108,26 +108,10 @@ ifeq ($(WARN_ALL),TRUE) warning_flags += -Wpedantic endif - ifeq ($(gcc_major_ge_6),1) - warning_flags += -Wnull-dereference -Wmisleading-indentation -Wduplicated-cond - endif - - ifeq ($(gcc_major_ge_5),1) - warning_flags += -Wfloat-conversion - endif - ifneq ($(WARN_SHADOW),FALSE) warning_flags += -Wshadow endif - ifeq ($(gcc_major_version),7) - warning_flags += -Wno-array-bounds - endif - - ifeq ($(gcc_major_ge7),1) - warning_flags += -Wduplicated-branches - endif - ifeq ($(gcc_major_ge10),1) warning_flags += -Wextra-semi endif @@ -161,21 +145,12 @@ endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) - ifeq ($(shell expr $(gcc_major_version) \< 5),1) - ifneq ($(NO_CONFIG_CHECKING),TRUE) - ifeq ($(CXXSTD),c++14) - $(error C++14 support requires GCC 5 or newer.) - endif - endif - endif CXXFLAGS += -std=$(CXXSTD) else - ifeq ($(gcc_major_version),5) - CXXFLAGS += -std=c++14 - endif + CXXFLAGS += -std=c++17 endif -CFLAGS += -std=gnu99 +CFLAGS += -std=c11 ######################################################################## diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak index bb4c6d98a17..6005409f9cc 100644 --- a/Tools/GNUMake/comps/hip.mak +++ b/Tools/GNUMake/comps/hip.mak @@ -23,7 +23,7 @@ endif # Generic flags, always used CXXFLAGS = -std=$(CXXSTD) -m64 -CFLAGS = -std=c99 -m64 +CFLAGS = -std=c11 -m64 FFLAGS = -ffixed-line-length-none -fno-range-check -fno-second-underscore F90FLAGS = -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none diff --git a/Tools/GNUMake/comps/intel.mak b/Tools/GNUMake/comps/intel.mak index 0c4d6e30b2a..2341192d163 100644 --- a/Tools/GNUMake/comps/intel.mak +++ b/Tools/GNUMake/comps/intel.mak @@ -39,21 +39,12 @@ endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) - ifneq ($(firstword $(sort 17.0 $(intel_version))), 17.0) - ifeq ($(CXXSTD),c++14) - $(error C++14 support requires Intel icpc 17.0 or newer.) - endif - endif CXXFLAGS += -std=$(CXXSTD) else - ifeq ($(firstword $(sort 17.0 $(intel_version))), 17.0) - CXXFLAGS += -std=c++14 - else - $(error Intel icpc 17.0 or newer is required.) - endif + CXXFLAGS += -std=c++17 endif -CFLAGS += -std=c99 +CFLAGS += -std=c11 F90FLAGS += -implicitnone @@ -64,11 +55,7 @@ FMODULES = -module $(fmoddir) -I$(fmoddir) GENERIC_COMP_FLAGS = ifeq ($(USE_OMP),TRUE) - ifeq ($(firstword $(sort 16.0 $(intel_version))), 16.0) - GENERIC_COMP_FLAGS += -qopenmp - else - GENERIC_COMP_FLAGS += -openmp - endif + GENERIC_COMP_FLAGS += -qopenmp endif CXXFLAGS += $(GENERIC_COMP_FLAGS) -pthread diff --git a/Tools/GNUMake/comps/llvm-flang.mak b/Tools/GNUMake/comps/llvm-flang.mak index 58a0a06b64e..c9abdaaaeeb 100644 --- a/Tools/GNUMake/comps/llvm-flang.mak +++ b/Tools/GNUMake/comps/llvm-flang.mak @@ -43,11 +43,11 @@ endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD := c++14 + CXXSTD := c++17 endif CXXFLAGS += -std=$(CXXSTD) -CFLAGS += -std=c99 +CFLAGS += -std=c11 FMODULES = -J$(fmoddir) -I $(fmoddir) diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak index 86da5884b7f..ead1d9290c2 100644 --- a/Tools/GNUMake/comps/llvm.mak +++ b/Tools/GNUMake/comps/llvm.mak @@ -67,11 +67,11 @@ CXXFLAGS += -Wno-c++17-extensions ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD := c++14 + CXXSTD := c++17 endif CXXFLAGS += -std=$(CXXSTD) -CFLAGS += -std=c99 +CFLAGS += -std=c11 FFLAGS += -ffixed-line-length-none -fno-range-check -fno-second-underscore F90FLAGS += -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none diff --git a/Tools/GNUMake/comps/nag.mak b/Tools/GNUMake/comps/nag.mak index faaf0db7155..55ec14b0620 100644 --- a/Tools/GNUMake/comps/nag.mak +++ b/Tools/GNUMake/comps/nag.mak @@ -52,17 +52,12 @@ endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) - ifeq ($(shell expr $(gcc_major_version) \< 5),1) - ifeq ($(CXXSTD),c++14) - $(error C++14 support requires GCC 5 or newer.) - endif - endif CXXFLAGS += -std=$(CXXSTD) else - CXXFLAGS += -std=c++14 + CXXFLAGS += -std=c++17 endif -CFLAGS += -std=gnu99 +CFLAGS += -std=c11 FFLAGS += -mismatch F90FLAGS += -mismatch -u diff --git a/Tools/GNUMake/comps/nvcc.mak b/Tools/GNUMake/comps/nvcc.mak index 9d9bf90ce51..f52dfeb6c86 100644 --- a/Tools/GNUMake/comps/nvcc.mak +++ b/Tools/GNUMake/comps/nvcc.mak @@ -10,21 +10,11 @@ else nvcc_minor_version := 9 endif -# Disallow CUDA toolkit versions < 10 +# Disallow CUDA toolkit versions < 11 -nvcc_major_lt_10 = $(shell expr $(nvcc_major_version) \< 10) -ifeq ($(nvcc_major_lt_10),1) - $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 10.0 or newer.) -endif - -nvcc_forward_unknowns = 0 -ifeq ($(shell expr $(nvcc_major_version) \= 10),1) -ifeq ($(shell expr $(nvcc_minor_version) \>= 2),1) - nvcc_forward_unknowns = 1 -endif -endif -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1) - nvcc_forward_unknowns = 1 +nvcc_major_lt_11 = $(shell expr $(nvcc_major_version) \< 11) +ifeq ($(nvcc_major_lt_11),1) + $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 11.0 or newer.) endif ifeq ($(shell expr $(nvcc_major_version) \= 11),1) @@ -34,24 +24,6 @@ ifeq ($(shell expr $(nvcc_minor_version) \= 0),1) endif endif -ifeq ($(shell expr $(nvcc_major_version) \< 11),1) - # -MMD -MP not supported in < 11 - USE_LEGACY_DEPFLAGS = TRUE - DEPFLAGS = -endif - -ifeq ($(shell expr $(nvcc_major_version) \< 10),1) - # -MM not supported in < 10 - LEGACY_DEPFLAGS = -M -endif - -ifeq ($(shell expr $(nvcc_major_version) \= 10),1) -ifeq ($(shell expr $(nvcc_minor_version) \= 0),1) - # -MM not supported in 10.0 - LEGACY_DEPFLAGS = -M -endif -endif - # # nvcc compiler driver does not always accept pgc++ # as a host compiler at present. However, if we're using @@ -72,16 +44,14 @@ endif ifeq ($(lowercase_nvcc_host_comp),gnu) - ifeq ($(shell expr $(gcc_major_version) \< 5),1) - ifneq ($(NO_CONFIG_CHECKING),TRUE) - $(error C++14 support requires GCC 5 or newer.) - endif + ifeq ($(shell expr $(gcc_major_version) \< 8),1) + $(error GCC >= 8 required.) endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD = c++14 + CXXSTD = c++17 endif CXXFLAGS += -std=$(CXXSTD) @@ -95,27 +65,22 @@ ifeq ($(lowercase_nvcc_host_comp),gnu) else ifeq ($(lowercase_nvcc_host_comp),pgi) ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) - ifeq ($(shell expr $(gcc_major_version) \< 5),1) - ifeq ($(CXXSTD),c++14) - $(error C++14 support requires GCC 5 or newer.) - endif - endif else - CXXSTD := c++14 + CXXSTD := c++17 endif CXXFLAGS += -std=$(CXXSTD) NVCC_CCBIN ?= pgc++ - # In pgi.make, we use gcc_major_version to handle c++14 flag. + # In pgi.make, we use gcc_major_version to handle c++17 flag. CXXFLAGS_FROM_HOST := -ccbin=$(NVCC_CCBIN) -Xcompiler='$(CXXFLAGS)' --std=$(CXXSTD) CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST) else ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) else - CXXSTD := c++14 + CXXSTD := c++17 endif NVCC_CCBIN ?= $(CXX) @@ -124,7 +89,7 @@ else CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST) endif -NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda +NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda --forward-unknown-to-host-compiler # This is to work around a bug with nvcc, see: https://github.com/kokkos/kokkos/issues/1473 NVCC_FLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored @@ -154,11 +119,6 @@ endif NVCC_FLAGS += $(XTRA_NVCC_FLAGS) -ifeq ($(nvcc_forward_unknowns),1) - NVCC_FLAGS += --forward-unknown-to-host-compiler -endif - -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1) ifeq ($(GPU_ERROR_CAPTURE_THIS),TRUE) NVCC_FLAGS += --Werror ext-lambda-captures-this else @@ -166,7 +126,6 @@ ifeq ($(GPU_WARN_CAPTURE_THIS),TRUE) NVCC_FLAGS += --Wext-lambda-captures-this endif endif -endif nvcc_diag_error = 0 ifeq ($(shell expr $(nvcc_major_version) \>= 12),1) diff --git a/Tools/GNUMake/comps/nvhpc.mak b/Tools/GNUMake/comps/nvhpc.mak index 49f815213f1..d76e7c9d36e 100644 --- a/Tools/GNUMake/comps/nvhpc.mak +++ b/Tools/GNUMake/comps/nvhpc.mak @@ -94,19 +94,15 @@ endif # The logic here should be consistent with what's in nvcc.mak ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) - ifeq ($(shell expr $(gcc_major_version) \< 5),1) - ifeq ($(CXXSTD),c++14) - $(error C++14 support requires GCC 5 or newer.) - endif + ifeq ($(shell expr $(gcc_major_version) \< 8),1) + $(error GCC >= 8 required.) endif CXXFLAGS += -std=$(CXXSTD) else - ifeq ($(gcc_major_version),5) - CXXFLAGS += -std=c++14 - endif + CXXFLAGS += -std=c++17 endif -CFLAGS += -c99 +CFLAGS += -c11 CXXFLAGS += $(GENERIC_NVHPC_FLAGS) CFLAGS += $(GENERIC_NVHPC_FLAGS) diff --git a/Tools/GNUMake/comps/pgi.mak b/Tools/GNUMake/comps/pgi.mak index 0cf50d77287..d2736c71a33 100644 --- a/Tools/GNUMake/comps/pgi.mak +++ b/Tools/GNUMake/comps/pgi.mak @@ -87,20 +87,18 @@ endif # The logic here should be consistent with what's in nvcc.mak -ifeq ($(shell expr $(gcc_major_version) \< 5),1) - $(error C++14 support requires GCC 5 or newer.) +ifeq ($(shell expr $(gcc_major_version) \< 8),1) + $(error GCC >= 8 required) endif ifdef CXXSTD CXXSTD := $(strip $(CXXSTD)) CXXFLAGS += -std=$(CXXSTD) else - ifeq ($(gcc_major_version),5) - CXXFLAGS += -std=c++14 - endif + CXXFLAGS += -std=c++17 endif -CFLAGS += -c99 +CFLAGS += -c11 CXXFLAGS += $(GENERIC_PGI_FLAGS) CFLAGS += $(GENERIC_PGI_FLAGS) diff --git a/Tools/Plotfile/CMakeLists.txt b/Tools/Plotfile/CMakeLists.txt index 44f99d9523c..9f8f066fbbb 100644 --- a/Tools/Plotfile/CMakeLists.txt +++ b/Tools/Plotfile/CMakeLists.txt @@ -34,5 +34,5 @@ target_include_directories(fsnapshot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_sources(fsnapshot PRIVATE AMReX_PPMUtil.H AMReX_PPMUtil.cpp) if (AMReX_CUDA) set_source_files_properties(AMReX_PPMUtil.cpp PROPERTIES LANGUAGE CUDA) - target_compile_features(fsnapshot PUBLIC cxx_std_14) + target_compile_features(fsnapshot PUBLIC cxx_std_17) endif()