From 54741c5e556147ae06eaba80b1965a5213883139 Mon Sep 17 00:00:00 2001
From: Andrew Myers <atmyers2@gmail.com>
Date: Thu, 22 Sep 2022 13:43:58 -0700
Subject: [PATCH] Enable OpenMP in particle push and coordinate transformation
 routines. (#241)

* Enable OpenMP in particle push and coordinate transformation routines.

* enable tiling by default if not running on GPU

* use dynamic scheduling by default

* do in nice way with named function

* add some docstrings

* also enable omp in the new spacecharge particle gather and push

* turn on dynamic scheduling by default

* also accelerate mfiter loop with omp
---
 src/initialization/InitParser.cpp             | 13 +++++++
 src/particles/ImpactXParticleContainer.H      | 36 +++++++++++++++++--
 src/particles/ImpactXParticleContainer.cpp    | 23 ++++++++++++
 src/particles/Push.cpp                        |  3 ++
 .../spacecharge/ForceFromSelfFields.cpp       |  3 ++
 src/particles/spacecharge/GatherAndPush.cpp   |  3 ++
 .../CoordinateTransformation.cpp              |  3 ++
 7 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/src/initialization/InitParser.cpp b/src/initialization/InitParser.cpp
index 604e7ef0f..7f4ee7d67 100644
--- a/src/initialization/InitParser.cpp
+++ b/src/initialization/InitParser.cpp
@@ -22,5 +22,18 @@ namespace impactx::initialization
         bool abort_on_out_of_gpu_memory = true; // AMReX' default: false
         pp_amrex.query("abort_on_out_of_gpu_memory", abort_on_out_of_gpu_memory);
         pp_amrex.add("abort_on_out_of_gpu_memory", abort_on_out_of_gpu_memory);
+
+        // Here we override the default tiling option for particles, which is always
+        // "false" in AMReX, to "false" if compiling for GPU execution and "true"
+        // if compiling for CPU.
+        {
+            amrex::ParmParse pp_particles("particles");
+#ifdef AMREX_USE_GPU
+            bool do_tiling = false; // By default, tiling is off on GPU
+#else
+            bool do_tiling = true;
+#endif
+            pp_particles.queryAdd("do_tiling", do_tiling);
+        }
     }
 } // namespace impactx
diff --git a/src/particles/ImpactXParticleContainer.H b/src/particles/ImpactXParticleContainer.H
index 73c321207..6744cf2fd 100644
--- a/src/particles/ImpactXParticleContainer.H
+++ b/src/particles/ImpactXParticleContainer.H
@@ -72,6 +72,38 @@ namespace impactx
         };
     };
 
+    /** AMReX iterator for particle boxes
+     *
+     * We subclass here to change the default threading strategy, which is
+     * `static` in AMReX, to `dynamic` in ImpactX.
+     */
+    class ParIter
+        : public amrex::ParIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>
+    {
+    public:
+        using amrex::ParIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>::ParIter;
+
+        ParIter (ContainerType& pc, int level);
+
+        ParIter (ContainerType& pc, int level, amrex::MFItInfo& info);
+    };
+
+    /** Const AMReX iterator for particle boxes - data is read only.
+     *
+     * We subclass here to change the default threading strategy, which is
+     * `static` in AMReX, to `dynamic` in ImpactX.
+     */
+    class ParConstIter
+        : public amrex::ParConstIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>
+    {
+    public:
+        using amrex::ParConstIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>::ParConstIter;
+
+        ParConstIter (ContainerType& pc, int level);
+
+        ParConstIter (ContainerType& pc, int level, amrex::MFItInfo& info);
+    };
+
     /** Beam Particles in ImpactX
      *
      * This class stores particles, distributed over MPI ranks.
@@ -81,10 +113,10 @@ namespace impactx
     {
     public:
         //! amrex iterator for particle boxes
-        using iterator = amrex::ParIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>;
+        using iterator = impactx::ParIter;
 
         //! amrex constant iterator for particle boxes (read-only)
-        using const_iterator = amrex::ParConstIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>;
+        using const_iterator = impactx::ParConstIter;
 
         //! Construct a new particle container
         ImpactXParticleContainer (amrex::AmrCore* amr_core);
diff --git a/src/particles/ImpactXParticleContainer.cpp b/src/particles/ImpactXParticleContainer.cpp
index 864b59a1a..9f80c3955 100644
--- a/src/particles/ImpactXParticleContainer.cpp
+++ b/src/particles/ImpactXParticleContainer.cpp
@@ -24,6 +24,29 @@
 
 namespace impactx
 {
+    bool do_omp_dynamic () {
+        bool do_dynamic = true;
+        amrex::ParmParse pp_impactx("impactx");
+        pp_impactx.query("do_dynamic_scheduling", do_dynamic);
+        return do_dynamic;
+    }
+
+    ParIter::ParIter (ContainerType& pc, int level)
+        : amrex::ParIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>(pc, level,
+                   amrex::MFItInfo().SetDynamic(do_omp_dynamic())) {}
+
+    ParIter::ParIter (ContainerType& pc, int level, amrex::MFItInfo& info)
+        : amrex::ParIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>(pc, level,
+              info.SetDynamic(do_omp_dynamic())) {}
+
+    ParConstIter::ParConstIter (ContainerType& pc, int level)
+        : amrex::ParConstIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>(pc, level,
+              amrex::MFItInfo().SetDynamic(do_omp_dynamic())) {}
+
+    ParConstIter::ParConstIter (ContainerType& pc, int level, amrex::MFItInfo& info)
+        : amrex::ParConstIter<0, 0, RealSoA::nattribs, IntSoA::nattribs>(pc, level,
+              info.SetDynamic(do_omp_dynamic())) {}
+
     ImpactXParticleContainer::ImpactXParticleContainer (amrex::AmrCore* amr_core)
         : amrex::ParticleContainer<0, 0, RealSoA::nattribs, IntSoA::nattribs>(amr_core->GetParGDB())
     {
diff --git a/src/particles/Push.cpp b/src/particles/Push.cpp
index 828b8e3cb..10c17d571 100644
--- a/src/particles/Push.cpp
+++ b/src/particles/Push.cpp
@@ -118,6 +118,9 @@ namespace detail
 
             // loop over all particle boxes
             using ParIt = ImpactXParticleContainer::iterator;
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
             for (ParIt pti(pc, lev); pti.isValid(); ++pti) {
                 const int np = pti.numParticles();
                 //const auto t_lev = pti.GetLevel();
diff --git a/src/particles/spacecharge/ForceFromSelfFields.cpp b/src/particles/spacecharge/ForceFromSelfFields.cpp
index d5287c253..d92be8cd7 100644
--- a/src/particles/spacecharge/ForceFromSelfFields.cpp
+++ b/src/particles/spacecharge/ForceFromSelfFields.cpp
@@ -40,6 +40,9 @@ namespace impactx::spacecharge
             space_charge_field.at(lev).at("y").setVal(0.);
             space_charge_field.at(lev).at("z").setVal(0.);
 
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
             for (amrex::MFIter mfi(phi.at(lev)); mfi.isValid(); ++mfi) {
 
                 amrex::Box bx = mfi.validbox();
diff --git a/src/particles/spacecharge/GatherAndPush.cpp b/src/particles/spacecharge/GatherAndPush.cpp
index 74d7111c4..edd3343e6 100644
--- a/src/particles/spacecharge/GatherAndPush.cpp
+++ b/src/particles/spacecharge/GatherAndPush.cpp
@@ -43,6 +43,9 @@ namespace impactx::spacecharge
 
             // loop over all particle boxes
             using ParIt = ImpactXParticleContainer::iterator;
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
             for (ParIt pti(pc, lev); pti.isValid(); ++pti) {
                 const int np = pti.numParticles();
 
diff --git a/src/particles/transformation/CoordinateTransformation.cpp b/src/particles/transformation/CoordinateTransformation.cpp
index e67296c00..71d4741c3 100644
--- a/src/particles/transformation/CoordinateTransformation.cpp
+++ b/src/particles/transformation/CoordinateTransformation.cpp
@@ -37,6 +37,9 @@ namespace transformation {
         for (int lev = 0; lev <= nLevel; ++lev) {
             // loop over all particle boxes
             using ParIt = ImpactXParticleContainer::iterator;
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
             for (ParIt pti(pc, lev); pti.isValid(); ++pti) {
                 const int np = pti.numParticles();