Merge pull request #1211 from LLNL/feature/gunney/marching-cubes-scan

Faster MarchingCubes scan loop
LLNL · Dec 21, 2023 · e648b86 · e648b86
2 parents 22597be + 92c71fb
commit e648b86
Show file tree

Hide file tree

Showing 10 changed files with 1,411 additions and 308 deletions.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -32,6 +32,11 @@ The Axom project release numbers follow [Semantic Versioning](http://semver.org/
   with `std::unordered_map`, but utilizes an open-addressing design.
 
 ### Changed
+- `MarchingCubes` allows user to select the underlying data-parallel implementation
+  - `fullParallel` works best on GPUs.
+  - `hybridParallel` reduces the amount of data processed and works best with
+     `MarchingCubesRuntimePolicy::seq`.
+  - `byPolicy` (the default) selects the implementation based on the runtime policy.
 - `MarchingCubes` and `DistributedClosestPoint` classes identify domains by their
   `state/domain_id` parameters if provided, or the local iteration index if not.
 - `MarchingCubes` and `DistributedClosestPoint` classes changed from requiring the Blueprint

diff --git a/src/axom/quest/CMakeLists.txt b/src/axom/quest/CMakeLists.txt
@@ -111,7 +111,7 @@ endif()
 
 blt_list_append(
     TO       quest_headers
-    ELEMENTS MarchingCubes.hpp detail/MarchingCubesImpl.hpp
+    ELEMENTS MarchingCubes.hpp detail/MarchingCubesHybridParallel.hpp detail/MarchingCubesFullParallel.hpp
     IF       CONDUIT_FOUND
                )
 

diff --git a/src/axom/quest/MarchingCubes.cpp b/src/axom/quest/MarchingCubes.cpp
@@ -6,13 +6,16 @@
 #include "axom/config.hpp"
 
 // Implementation requires Conduit.
-#ifdef AXOM_USE_CONDUIT
-  #include "conduit_blueprint.hpp"
+#ifndef AXOM_USE_CONDUIT
+  #error "MarchingCubes.cpp requires conduit"
+#endif
+#include "conduit_blueprint.hpp"
 
-  #include "axom/core/execution/execution_space.hpp"
-  #include "axom/quest/MarchingCubes.hpp"
-  #include "axom/quest/detail/MarchingCubesImpl.hpp"
-  #include "axom/fmt.hpp"
+#include "axom/core/execution/execution_space.hpp"
+#include "axom/quest/MarchingCubes.hpp"
+#include "axom/quest/detail/MarchingCubesHybridParallel.hpp"
+#include "axom/quest/detail/MarchingCubesFullParallel.hpp"
+#include "axom/fmt.hpp"
 
 namespace axom
 {
@@ -30,9 +33,8 @@ MarchingCubes::MarchingCubes(RuntimePolicy runtimePolicy,
   , m_maskFieldName(maskField)
   , m_maskPath(maskField.empty() ? std::string() : "fields/" + maskField)
 {
-  const bool isMultidomain = conduit::blueprint::mesh::is_multi_domain(bpMesh);
   SLIC_ASSERT_MSG(
-    isMultidomain,
+    conduit::blueprint::mesh::is_multi_domain(bpMesh),
     "MarchingCubes class input mesh must be in multidomain format.");
 
   m_singles.reserve(conduit::blueprint::mesh::number_of_domains(bpMesh));
@@ -63,6 +65,7 @@ void MarchingCubes::computeIsocontour(double contourVal)
   for(int dId = 0; dId < m_singles.size(); ++dId)
   {
     std::unique_ptr<MarchingCubesSingleDomain>& single = m_singles[dId];
+    single->setDataParallelism(m_dataParallelism);
     single->computeIsocontour(contourVal);
   }
 }
@@ -182,11 +185,9 @@ void MarchingCubesSingleDomain::setDomain(const conduit::Node& dom)
     dom.fetch_existing(axom::fmt::format("topologies/{}", m_topologyName)));
   SLIC_ASSERT(m_ndim >= 2 && m_ndim <= 3);
 
-  const conduit::Node& coordsValues =
-    dom.fetch_existing(coordsetPath + "/values");
-  bool isInterleaved = conduit::blueprint::mcarray::is_interleaved(coordsValues);
   SLIC_ASSERT_MSG(
-    !isInterleaved,
+    !conduit::blueprint::mcarray::is_interleaved(
+      dom.fetch_existing(coordsetPath + "/values")),
     "MarchingCubes currently requires contiguous coordinates layout.");
 }
 
@@ -215,64 +216,28 @@ void MarchingCubesSingleDomain::computeIsocontour(double contourVal)
   SLIC_ASSERT_MSG(!m_fcnFieldName.empty(),
                   "You must call setFunctionField before computeIsocontour.");
 
-  allocateImpl();
-  m_impl->initialize(*m_dom, m_topologyName, m_fcnFieldName, m_maskFieldName);
-  m_impl->setContourValue(contourVal);
-  m_impl->markCrossings();
-  m_impl->scanCrossings();
-  m_impl->computeContour();
-}
-
-void MarchingCubesSingleDomain::allocateImpl()
-{
-  using namespace detail::marching_cubes;
-  if(m_runtimePolicy == RuntimePolicy::seq)
-  {
-    m_impl = m_ndim == 2
-      ? std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<2, axom::SEQ_EXEC, axom::SEQ_EXEC>)
-      : std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<3, axom::SEQ_EXEC, axom::SEQ_EXEC>);
-  }
-  #ifdef AXOM_RUNTIME_POLICY_USE_OPENMP
-  else if(m_runtimePolicy == RuntimePolicy::omp)
+  // We have 2 implementations.  MarchingCubesHybridParallel is faster on the host
+  // and MarchingCubesFullParallel is faster on GPUs.  Both work in all cases.
+  // We can choose based on runtime policy or by user choice
+  if(m_dataParallelism ==
+       axom::quest::MarchingCubesDataParallelism::hybridParallel ||
+     (m_dataParallelism == axom::quest::MarchingCubesDataParallelism::byPolicy &&
+      m_runtimePolicy == RuntimePolicy::seq))
   {
-    m_impl = m_ndim == 2
-      ? std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<2, axom::OMP_EXEC, axom::SEQ_EXEC>)
-      : std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<3, axom::OMP_EXEC, axom::SEQ_EXEC>);
+    m_impl = axom::quest::detail::marching_cubes::newMarchingCubesHybridParallel(
+      m_runtimePolicy,
+      m_ndim);
   }
-  #endif
-  #ifdef AXOM_RUNTIME_POLICY_USE_CUDA
-  else if(m_runtimePolicy == RuntimePolicy::cuda)
-  {
-    m_impl = m_ndim == 2
-      ? std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<2, axom::CUDA_EXEC<256>, axom::CUDA_EXEC<1>>)
-      : std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<3, axom::CUDA_EXEC<256>, axom::CUDA_EXEC<1>>);
-  }
-  #endif
-  #ifdef AXOM_RUNTIME_POLICY_USE_HIP
-  else if(m_runtimePolicy == RuntimePolicy::hip)
-  {
-    m_impl = m_ndim == 2
-      ? std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<2, axom::HIP_EXEC<256>, axom::HIP_EXEC<1>>)
-      : std::unique_ptr<ImplBase>(
-          new MarchingCubesImpl<3, axom::HIP_EXEC<256>, axom::HIP_EXEC<1>>);
-  }
-  #endif
   else
   {
-    SLIC_ERROR(axom::fmt::format(
-      "MarchingCubesSingleDomain has no implementation for runtime policy {}",
-      m_runtimePolicy));
+    m_impl = axom::quest::detail::marching_cubes::newMarchingCubesFullParallel(
+      m_runtimePolicy,
+      m_ndim);
   }
+  m_impl->initialize(*m_dom, m_topologyName, m_fcnFieldName, m_maskFieldName);
+  m_impl->setContourValue(contourVal);
+  m_impl->computeContourMesh();
 }
 
-#endif  // AXOM_USE_CONDUIT
-
 }  // end namespace quest
 }  // end namespace axom
diff --git a/src/axom/quest/MarchingCubes.hpp b/src/axom/quest/MarchingCubes.hpp
@@ -38,9 +38,25 @@ namespace marching_cubes
 {
 template <int DIM, typename ExecSpace, typename SequentialLoopPolicy>
 class MarchingCubesImpl;
-}
+}  // namespace marching_cubes
 }  // namespace detail
 
+/*!
+  @brief Enum for implementation.
+
+  Partial parallel implementation uses a non-parallizable loop and
+  processes less data.  It has been shown to work well on CPUs.
+  Full parallel implementation processes more data, but parallelizes
+  fully and has been shown to work well on GPUs.  byPolicy chooses
+  based on runtime policy.
+*/
+enum class MarchingCubesDataParallelism
+{
+  byPolicy = 0,
+  hybridParallel = 1,
+  fullParallel = 2
+};
+
 class MarchingCubesSingleDomain;
 
 /*!
@@ -150,9 +166,23 @@ class MarchingCubes
     const std::string &cellIdField = {},
     const std::string &domainIdField = {});
 
+  /*!
+    @brief Set choice of data-parallel implementation.
+
+    By default, choice is MarchingCubesDataParallelism::byPolicy.
+  */
+  void setDataParallelism(MarchingCubesDataParallelism dataPar)
+  {
+    m_dataParallelism = dataPar;
+  }
+
 private:
   RuntimePolicy m_runtimePolicy;
 
+  //@brief Choice of full or partial data-parallelism, or byPolicy.
+  MarchingCubesDataParallelism m_dataParallelism =
+    MarchingCubesDataParallelism::byPolicy;
+
   //! @brief Single-domain implementations.
   axom::Array<std::unique_ptr<MarchingCubesSingleDomain>> m_singles;
   const std::string m_topologyName;
@@ -172,9 +202,6 @@ class MarchingCubes
  */
 class MarchingCubesSingleDomain
 {
-  template <int DIM, typename ExecSpace, typename SequentialLoopPolicy>
-  friend class detail::marching_cubes::MarchingCubesImpl;
-
 public:
   using RuntimePolicy = axom::runtime_policy::Policy;
   /*!
@@ -195,7 +222,6 @@ class MarchingCubesSingleDomain
    *
    * Some data from \a dom may be cached by the constructor.
    * Any change to it after the constructor leads to undefined behavior.
-   * See setDomain(const conduit::Node &) for requirements on \a dom.
    *
    * The mesh coordinates should be stored contiguously.  See
    * conduit::blueprint::is_contiguous().  In the future, this
@@ -207,6 +233,11 @@ class MarchingCubesSingleDomain
                             const std::string &topologyName,
                             const std::string &maskfield);
 
+  void setDataParallelism(MarchingCubesDataParallelism &dataPar)
+  {
+    m_dataParallelism = dataPar;
+  }
+
   /*!
     @brief Specify the field containing the nodal scalar function
     in the input mesh.
@@ -261,25 +292,6 @@ class MarchingCubesSingleDomain
     m_impl->populateContourMesh(mesh, cellIdField);
   }
 
-private:
-  RuntimePolicy m_runtimePolicy;
-  /*!
-    \brief Computational mesh as a conduit::Node.
-  */
-  const conduit::Node *m_dom;
-  int m_ndim;
-
-  //!@brief Name of Blueprint topology in m_dom.
-  const std::string m_topologyName;
-
-  std::string m_fcnFieldName;
-  //!@brief Path to nodal scalar function in m_dom.
-  std::string m_fcnPath;
-
-  const std::string m_maskFieldName;
-  //!@brief Path to mask in m_dom.
-  const std::string m_maskPath;
-
   /*!
     @brief Base class for implementations templated on dimension
     and execution space.
@@ -296,16 +308,9 @@ class MarchingCubesSingleDomain
                             const std::string &maskPath) = 0;
     //!@brief Set the contour value
     virtual void setContourValue(double contourVal) = 0;
-    //@{
-    //!@name Phases of the computation
-    //!@brief Mark domain cells that cross the contour.
-    virtual void markCrossings() = 0;
-    //!@brief Precompute some metadata for contour mesh.
-    virtual void scanCrossings() = 0;
-    //!@brief Generate the contour mesh in internal data format.
-    virtual void computeContour() = 0;
-    //!@brief Get the number of contour mesh cells generated.
-    //@}
+    //!@brief Compute the contour mesh.
+    virtual void computeContourMesh() = 0;
+    //!@brief Return number of contour mesh facets generated.
     virtual axom::IndexType getContourCellCount() const = 0;
     /*!
       @brief Populate output mesh object with generated contour.
@@ -319,9 +324,31 @@ class MarchingCubesSingleDomain
     virtual ~ImplBase() { }
   };
 
+private:
+  RuntimePolicy m_runtimePolicy;
+
+  //@brief Choice of full or partial data-parallelism, or byPolicy.
+  MarchingCubesDataParallelism m_dataParallelism =
+    MarchingCubesDataParallelism::byPolicy;
+
+  /*!
+    \brief Computational mesh as a conduit::Node.
+  */
+  const conduit::Node *m_dom;
+  int m_ndim;
+
+  //!@brief Name of Blueprint topology in m_dom.
+  const std::string m_topologyName;
+
+  std::string m_fcnFieldName;
+  //!@brief Path to nodal scalar function in m_dom.
+  std::string m_fcnPath;
+
+  const std::string m_maskFieldName;
+  //!@brief Path to mask in m_dom.
+  const std::string m_maskPath;
+
   std::unique_ptr<ImplBase> m_impl;
-  //!@brief Allocate implementation object and set m_impl.
-  void allocateImpl();
 
   /*!
    * \brief Set the blueprint single-domain mesh.

diff --git a/src/axom/quest/MeshViewUtil.hpp b/src/axom/quest/MeshViewUtil.hpp
@@ -221,6 +221,7 @@ class MeshViewUtil
     @param [in] topologyName Name of topology in the domain.
 
     If \a topologyName is omitted, use the first topology.
+
     The topology dimension must match DIM.
   */
   MeshViewUtil(conduit::Node& bpDomain, const std::string& topologyName = "")