From bb3b3c8fede882478dae81c7daafd33b1393faa0 Mon Sep 17 00:00:00 2001
From: siddanib <45137944+siddanib@users.noreply.github.com>
Date: Tue, 7 May 2024 18:28:11 -0700
Subject: [PATCH] README.rst in tests/test_MPMD/test_1 (#312)

---
 .zenodo.json                      |  5 +++
 tests/test_MPMD/test_1/README.rst | 43 +++++++++++++++++++++++
 tests/test_MPMD/test_1/main.cpp   | 26 +++++++++++---
 tests/test_MPMD/test_1/main.py    | 58 ++-----------------------------
 4 files changed, 72 insertions(+), 60 deletions(-)
 create mode 100644 tests/test_MPMD/test_1/README.rst

diff --git a/.zenodo.json b/.zenodo.json
index 40588d6c..44474858 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -44,6 +44,11 @@
         "affiliation": "Lawrence Berkeley National Laboratory",
         "name": "Zhang, Weiqun",
         "orcid": "0000-0001-8092-1974"
+      },
+      {
+        "affiliation": "Lawrence Berkeley National Laboratory",
+        "name": "Siddani, Bhargav Sriram",
+        "orcid": "0000-0002-3535-4429"
       }
     ],
     "license": {
diff --git a/tests/test_MPMD/test_1/README.rst b/tests/test_MPMD/test_1/README.rst
new file mode 100644
index 00000000..ac627090
--- /dev/null
+++ b/tests/test_MPMD/test_1/README.rst
@@ -0,0 +1,43 @@
+AMReX-MPMD
+==========
+
+AMReX-MPMD utilizes the Multiple Program Multiple Data (MPMD) feature of MPI to provide cross-functionality for AMReX-based codes.
+
+Test
+====
+
+The current test leverages the AMReX-MPMD capability to perform a data (MultiFab) transfer between *main.cpp* and *main.py* scripts.
+The test is based on a two component MultiFab. The first component is populated in *main.cpp* before it is transferred to *main.py*.
+Then *main.py* script fills the second component based on the obtained first component value.
+Finally, the second component is transferred back from *main.py* to *main.cpp*.
+**This test requires MPI and mpi4py**.
+
+pyAMReX compile
+---------------
+
+.. code-block:: bash
+
+   # find dependencies & configure
+   # Include -DAMReX_GPU_BACKEND=CUDA for gpu version
+   cmake -S . -B build -DAMReX_SPACEDIM="1;2;3" -DAMReX_MPI=ON -DpyAMReX_amrex_src=/path/to/amrex
+
+   # compile & install, here we use four threads
+   cmake --build build -j 4 --target pip_install
+
+main.cpp compile
+----------------
+
+.. code-block:: bash
+
+   # Include USE_CUDA=TRUE for gpu version
+   make AMREX_HOME=/path/to/amrex -j 4
+
+Run
+---
+
+Please note that MPI ranks attributed to each application/code need to be continuous, i.e., MPI ranks 0-7 are for *main.cpp* and 8-11 are for *main.py*.
+This may be default behaviour on several systems.
+
+.. code-block:: bash
+
+   mpirun -np 8 ./main3d.gnu.DEBUG.MPI.ex : -np 4 python main.py
diff --git a/tests/test_MPMD/test_1/main.cpp b/tests/test_MPMD/test_1/main.cpp
index 24e4b81b..01d80373 100644
--- a/tests/test_MPMD/test_1/main.cpp
+++ b/tests/test_MPMD/test_1/main.cpp
@@ -57,16 +57,34 @@ int main(int argc, char* argv[])
                 mf_array(i,j,k,0) = 1.0 + std::exp(-r_squared);
 
             });
-         }
+        }
         // Send ONLY the first populated MultiFab component to the other app
         copr.send(mf,0,1);
         // Receive ONLY the second MultiFab component from the other app
         copr.recv(mf,1,1);
-        //Plot MultiFab Data
-        WriteSingleLevelPlotfile("plt_cpp_001", mf, {"comp0","comp1"}, geom, 0., 0);
+        // Cross-verification
+        for(amrex::MFIter mfi(mf); mfi.isValid(); ++mfi){
+            const amrex::Box& bx = mfi.validbox();
+            const amrex::Array4<amrex::Real>& mf_array = mf.array(mfi);
+
+            amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k){
+
+                mf_array(i,j,k,1) -= (amrex::Real(10.0)*mf_array(i,j,k,0));
 
+            });
+        }
+        amrex::Real glb_max = amrex::Real(0.0);
+        // Local Max
+        for(amrex::MFIter mfi(mf); mfi.isValid(); ++mfi) {
+            amrex::FArrayBox& fab = mf[mfi];
+            glb_max = amrex::max<amrex::Real>(glb_max,fab.maxabs<amrex::RunOn::Device>(1));
+        }
+        // Global Max
+        amrex::ParallelAllReduce::Max<amrex::Real>(glb_max,comm);
+        if (glb_max != amrex::Real(0.0)) {
+        amrex::Abort("There appears to be a mismatch in data-ordering \n");
+        }
     }
     amrex::Finalize();
     amrex::MPMD::Finalize();
-
 }
diff --git a/tests/test_MPMD/test_1/main.py b/tests/test_MPMD/test_1/main.py
index 33eb7440..fdc251ed 100644
--- a/tests/test_MPMD/test_1/main.py
+++ b/tests/test_MPMD/test_1/main.py
@@ -12,35 +12,6 @@
 
 import amrex.space3d as amr
 
-
-def load_cupy():
-    if amr.Config.have_gpu:
-        try:
-            import cupy as cp
-
-            xp = cp
-            amr.Print("Note: found and will use cupy")
-        except ImportError:
-            amr.Print(
-                "Warning: GPU found but cupy not available! Trying managed memory in numpy..."
-            )
-            import numpy as np
-
-            xp = np
-        if amr.Config.gpu_backend == "SYCL":
-            amr.Print("Warning: SYCL GPU backend not yet implemented for Python")
-            import numpy as np
-
-            xp = np
-
-    else:
-        import numpy as np
-
-        xp = np
-        amr.Print("Note: found and will use numpy")
-    return xp
-
-
 # Initialize amrex::MPMD to establish communication across the two apps
 # However, leverage MPMD_Initialize_without_split
 # so that communication split can be performed using mpi4py.MPI
@@ -50,8 +21,6 @@ def load_cupy():
 # Initialize AMReX
 amr.initialize_when_MPMD([], app_comm_py)
 
-# CPU/GPU logic
-xp = load_cupy()
 amr.Print(f"Hello world from pyAMReX version {amr.__version__}\n")
 # Create a MPMD Copier that gets the BoxArray information from the other (C++) app
 copr = amr.MPMD_Copier(True)
@@ -66,36 +35,13 @@ def load_cupy():
 
 # Fill the second MultiFab component based on the first component
 for mfi in mf:
-    # Preferred way to fill array using fast ranged operations:
-    # - xp.array is indexed in reversed order (n,z,y,x),
-    #   .T creates a view into the AMReX (x,y,z,n) order
-    # - indices are local (range from 0 to box size)
-    mf_array = xp.array(mf.array(mfi), copy=False).T
-
+    # Convert Array4 to numpy/cupy array
+    mf_array = mf.array(mfi).to_xp(copy=False, order="F")
     mf_array[:, :, :, 1] = 10.0 * mf_array[:, :, :, 0]
 
 # Send ONLY the second MultiFab component to the other (C++) app
 copr.send(mf, 1, 1)
 
-# Plot MultiFab data
-# HERE THE DOMAIN INFORMATION IS ONLY BEING UTILIZED TO SAVE A PLOTFILE
-# How many grid cells in each direction over the problem domain
-n_cell = 32
-# Integer vector indicating the lower coordinate bounds
-dom_lo = amr.IntVect(0, 0, 0)
-# Integer vector indicating the upper coordinate bounds
-dom_hi = amr.IntVect(n_cell - 1, n_cell - 1, n_cell - 1)
-# Box containing the coordinates of this domain
-domain = amr.Box(dom_lo, dom_hi)
-# Geometry: physical properties for data on our domain
-real_box = amr.RealBox([0.0, 0.0, 0.0], [1.0, 1.0, 1.0])
-coord = 0  # Cartesian
-is_per = [0, 0, 0]  # periodicity
-geom = amr.Geometry(domain, real_box, coord, is_per)
-plotfile = amr.concatenate(root="plt_py_", num=1, mindigits=3)
-varnames = amr.Vector_string(["comp0", "comp1"])
-amr.write_single_level_plotfile(plotfile, mf, varnames, geom, time=0.0, level_step=0)
-
 # Finalize AMReX
 amr.finalize()
 # Finalize AMReX::MPMD