From e9cbc045b931ae1bc2507a76e37e6089d92dc138 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Tue, 28 Sep 2021 13:54:19 -0500
Subject: [PATCH 01/35] Proper abort when users request excessive orbitals.

---
 src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index e3e2087977..f9bcaa8e44 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -468,6 +468,13 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
         maxOrbs++;
     }
   }
+
+  app_log() << SortBands.size() << " complex-valued orbitals supplied by h5 can be expanded up to " << maxOrbs
+            << " SPOs." << std::endl;
+  if (maxOrbs < numOrbs)
+    myComm->barrier_and_abort("EinsplineSetBuilder::OccupyBands_ESHDF user input requests "
+                              "more orbitals than what the h5 file supplies.");
+
   // Now sort the bands by energy
   if (sortBands == 2)
   {
@@ -644,7 +651,7 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
     orbIndex++;
   }
   NumDistinctOrbitals = orbIndex;
-  app_log() << "We will read " << NumDistinctOrbitals << " distinct orbitals.\n";
+  app_log() << "We will read " << NumDistinctOrbitals << " distinct complex-valued orbitals from h5.\n";
   app_log() << "There are " << NumCoreOrbs << " core states and " << NumValenceOrbs << " valence states.\n";
 }
 

From d5e6abb2168feeafd5e3009433b41551feff8e26 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Tue, 28 Sep 2021 14:45:20 -0500
Subject: [PATCH 02/35] Rename UseRealOrbitals to use_real_splines_

---
 src/QMCWaveFunctions/EinsplineSetBuilder.h       |  3 ++-
 .../EinsplineSetBuilderCommon.cpp                | 16 ++++++++--------
 .../EinsplineSetBuilder_createSPOs.cpp           | 12 ++++++------
 .../EinsplineSpinorSetBuilder.cpp                |  2 +-
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder.h b/src/QMCWaveFunctions/EinsplineSetBuilder.h
index daca4bacfd..f898efd2b4 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder.h
@@ -234,7 +234,8 @@ class EinsplineSetBuilder : public SPOSetBuilder
   // clone
   std::vector<TinyVector<int, OHMMS_DIM>> UseTwists;
   std::vector<int> IncludeTwists, DistinctTwists;
-  bool UseRealOrbitals;
+  /// if false, splines are conceptually complex valued
+  bool use_real_splines_;
   int NumDistinctOrbitals, NumCoreOrbs, NumValenceOrbs;
   // This is true if the corresponding twist in DistinctTwists should
   // should be used to generate two distinct orbitals from the real and
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index b4209afc69..b8e9fbea51 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -621,7 +621,7 @@ void EinsplineSetBuilder::AnalyzeTwists2()
     }
   }
   // Find out if we can make real orbitals
-  UseRealOrbitals = true;
+  use_real_splines_ = true;
   for (int i = 0; i < DistinctTwists.size(); i++)
   {
     int ti        = DistinctTwists[i];
@@ -629,18 +629,18 @@ void EinsplineSetBuilder::AnalyzeTwists2()
     for (int j = 0; j < OHMMS_DIM; j++)
       if (std::abs(twist[j] - 0.0) > MatchingTol && std::abs(twist[j] - 0.5) > MatchingTol &&
           std::abs(twist[j] + 0.5) > MatchingTol)
-        UseRealOrbitals = false;
+        use_real_splines_ = false;
   }
-  if (UseRealOrbitals && (DistinctTwists.size() > 1))
+  if (use_real_splines_ && (DistinctTwists.size() > 1))
   {
     app_log() << "***** Use of real orbitals is possible, but not currently implemented\n"
               << "      with more than one twist angle.\n";
-    UseRealOrbitals = false;
+    use_real_splines_ = false;
   }
-  if (UseRealOrbitals)
-    app_log() << "Using real orbitals.\n";
+  if (use_real_splines_)
+    app_log() << "Using real splines.\n";
   else
-    app_log() << "Using complex orbitals.\n";
+    app_log() << "Using complex splines.\n";
 #else
   DistinctTwists.resize(IncludeTwists.size());
   MakeTwoCopies.resize(IncludeTwists.size());
@@ -649,7 +649,7 @@ void EinsplineSetBuilder::AnalyzeTwists2()
     DistinctTwists[i] = IncludeTwists[i];
     MakeTwoCopies[i]  = false;
   }
-  UseRealOrbitals = false;
+  use_real_splines_ = false;
 #endif
 }
 
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
index c5dfaf8c83..d78903cc62 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
@@ -41,7 +41,7 @@ namespace qmcplusplus
 void EinsplineSetBuilder::set_metadata(int numOrbs, int TwistNum_inp, bool skipChecks)
 {
   // 1. set a lot of internal parameters in the EinsplineSetBuilder class
-  //  e.g. TileMatrix, UseRealOrbitals, DistinctTwists, MakeTwoCopies.
+  //  e.g. TileMatrix, use_real_splines_, DistinctTwists, MakeTwoCopies.
   // 2. this is also where metadata for the orbitals are read from the wavefunction hdf5 file
   //  and broadcast to MPI groups. Variables broadcasted are listed in
   //  EinsplineSetBuilderCommon.cpp EinsplineSetBuilder::BroadcastOrbitalInfo()
@@ -263,7 +263,7 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
   // set the internal parameters
   if (spinSet == 0)
     set_metadata(numOrbs, TwistNum_inp, skipChecks);
-  //if (use_complex_orb == "yes") UseRealOrbitals = false; // override given user input
+  //if (use_complex_orb == "yes") use_real_splines_ = false; // override given user input
 
   // look for <backflow>, would be a lot easier with xpath, but I cannot get it to work
   bool has_backflow = false;
@@ -289,8 +289,8 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
     kid = kid->next;
   }
 
-  if (has_backflow && use_einspline_set_extended == "yes" && UseRealOrbitals)
-    APP_ABORT("backflow optimization is broken with UseRealOrbitals");
+  if (has_backflow && use_einspline_set_extended == "yes" && use_real_splines_)
+    APP_ABORT("backflow optimization is broken with use_real_splines_");
 
   //////////////////////////////////
   // Create the OrbitalSet object
@@ -308,7 +308,7 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
     APP_ABORT("The 'truncate' feature of spline SPO has been removed. Please use hybrid orbital representation.");
 
 #if !defined(QMC_COMPLEX)
-  if (UseRealOrbitals)
+  if (use_real_splines_)
   {
     //if(TargetPtcl.Lattice.SuperCellEnum != SUPERCELL_BULK && truncate=="yes")
     if (MixedSplineReader == 0)
@@ -350,7 +350,7 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
 #endif
   {
     EinsplineSet* new_OrbitalSet;
-    if (UseRealOrbitals)
+    if (use_real_splines_)
     {
       EinsplineSetExtended<double>* temp_OrbitalSet;
 #if defined(QMC_CUDA)
diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
index 2ead75d793..33ec92cc89 100644
--- a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
+++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
@@ -168,7 +168,7 @@ std::unique_ptr<SPOSet> EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt
 
   std::string useGPU("no");
 #if !defined(QMC_COMPLEX)
-  if (UseRealOrbitals)
+  if (use_real_splines_)
   {
     if (MixedSplineReader == 0)
     {

From eb7b705875366ae3e7f6f051d539c57b1a634883 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Mon, 27 Sep 2021 20:41:17 -0400
Subject: [PATCH 03/35] Refactor public functions

Single translation unit
---
 src/QMCHamiltonians/OperatorBase.cpp | 190 ++++++++++---
 src/QMCHamiltonians/OperatorBase.h   | 386 ++++++++++++++++-----------
 2 files changed, 387 insertions(+), 189 deletions(-)

diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index 3004003f3f..96a64fbd4e 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -22,6 +22,7 @@
 
 namespace qmcplusplus
 {
+// PUBLIC
 OperatorBase::OperatorBase() : value_(0.0), my_index_(-1), t_walker_(0)
 {
   quantum_domain_ = NO_QUANTUM_DOMAIN;
@@ -47,15 +48,33 @@ void OperatorBase::setName(const std::string name) noexcept { name_ = name; }
 TraceRequest& OperatorBase::getRequest() noexcept { return request_; }
 #endif
 
-/** The correct behavior of this routine requires estimators with non-deterministic components
- * in their evaluate() function to override this function.
- */
+//////// VIRTUAL FUNCTIONS ////////////////
+void OperatorBase::addObservables(PropertySetType& plist, BufferType& collectables) { addValue(plist); }
+
+void OperatorBase::registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid) const
+{
+  const bool collect = update_mode_.test(COLLECTABLE);
+  //exclude collectables
+  if (!collect)
+  {
+    h5desc.emplace_back(name_);
+    auto& oh = h5desc.back();
+    std::vector<int> onedim(1, 1);
+    oh.set_dimensions(onedim, my_index_);
+    oh.open(gid);
+  }
+}
+
+void OperatorBase::registerCollectables(std::vector<ObservableHelper>& h5desc, hid_t gid) const {}
+
+void OperatorBase::setObservables(PropertySetType& plist) { plist[my_index_] = value_; }
+
+void OperatorBase::setParticlePropertyList(PropertySetType& plist, int offset) { plist[my_index_ + offset] = value_; }
+
+void OperatorBase::setHistories(Walker_t& ThisWalker) { t_walker_ = &(ThisWalker); }
+
 OperatorBase::Return_t OperatorBase::evaluateDeterministic(ParticleSet& P) { return evaluate(P); }
-/** Take o_list and p_list update evaluation result variables in o_list?
- *
- * really should reduce vector of local_energies. matching the ordering and size of o list
- * the this can be call for 1 or more QMCHamiltonians
- */
+
 void OperatorBase::mw_evaluate(const RefVectorWithLeader<OperatorBase>& o_list,
                                const RefVectorWithLeader<TrialWaveFunction>& wf_list,
                                const RefVectorWithLeader<ParticleSet>& p_list) const
@@ -64,7 +83,7 @@ void OperatorBase::mw_evaluate(const RefVectorWithLeader<OperatorBase>& o_list,
 /**  Temporary raw omp pragma for simple thread parallelism
    *   ignoring the driver level concurrency
    *   
-   *  \todo replace this with a proper abstraction. It should adequately describe the behavior
+   *  TODO: replace this with a proper abstraction. It should adequately describe the behavior
    *  and strictly limit the activation of this level concurrency to when it is intended.
    *  It is unlikely to belong in this function.
    *  
@@ -115,7 +134,131 @@ void OperatorBase::mw_evaluateWithParameterDerivatives(const RefVectorWithLeader
   }
 }
 
+OperatorBase::Return_t OperatorBase::rejectedMove(ParticleSet& P) { return 0; }
+
+OperatorBase::Return_t OperatorBase::evaluateWithToperator(ParticleSet& P) { return evaluate(P); }
+
+void OperatorBase::mw_evaluateWithToperator(const RefVectorWithLeader<OperatorBase>& o_list,
+                                            const RefVectorWithLeader<TrialWaveFunction>& wf_list,
+                                            const RefVectorWithLeader<ParticleSet>& p_list) const
+{
+  mw_evaluate(o_list, wf_list, p_list);
+}
+
+OperatorBase::Return_t OperatorBase::evaluateValueAndDerivatives(ParticleSet& P,
+                                                                 const opt_variables_type& optvars,
+                                                                 const std::vector<ValueType>& dlogpsi,
+                                                                 std::vector<ValueType>& dhpsioverpsi)
+{
+  return evaluate(P);
+}
+
+OperatorBase::Return_t OperatorBase::evaluateWithIonDerivs(ParticleSet& P,
+                                                           ParticleSet& ions,
+                                                           TrialWaveFunction& psi,
+                                                           ParticleSet::ParticlePos_t& hf_term,
+                                                           ParticleSet::ParticlePos_t& pulay_term)
+{
+  return evaluate(P);
+}
+
+OperatorBase::Return_t OperatorBase::evaluateWithIonDerivsDeterministic(ParticleSet& P,
+                                                                        ParticleSet& ions,
+                                                                        TrialWaveFunction& psi,
+                                                                        ParticleSet::ParticlePos_t& hf_term,
+                                                                        ParticleSet::ParticlePos_t& pulay_term)
+{
+  return evaluateWithIonDerivs(P, ions, psi, hf_term, pulay_term);
+}
+
+void OperatorBase::updateSource(ParticleSet& s) {}
+
+OperatorBase::Return_t OperatorBase::getEnsembleAverage() { return 0.0; }
+
+void OperatorBase::createResource(ResourceCollection& collection) const {}
+
+void OperatorBase::acquireResource(ResourceCollection& collection,
+                                   const RefVectorWithLeader<OperatorBase>& o_list) const
+{}
+
+void OperatorBase::releaseResource(ResourceCollection& collection,
+                                   const RefVectorWithLeader<OperatorBase>& o_list) const
+{}
+
+void OperatorBase::setRandomGenerator(RandomGenerator_t* rng) {}
+
+void OperatorBase::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH)
+{
+  std::unique_ptr<OperatorBase> myclone = makeClone(qp, psi);
+  if (myclone)
+  {
+    targetH.addOperator(std::move(myclone), name_, update_mode_[PHYSICAL]);
+  }
+}
+
+#if !defined(REMOVE_TRACEMANAGER)
+void OperatorBase::getRequiredTraces(TraceManager& tm){};
+#endif
+
+void OperatorBase::addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy)
+{
+  APP_ABORT("Need specialization for " + name_ +
+            "::addEnergy(MCWalkerConfiguration &W).\n Required functionality not implemented\n");
+}
 
+void OperatorBase::addEnergy(MCWalkerConfiguration& W,
+                             std::vector<RealType>& LocalEnergy,
+                             std::vector<std::vector<NonLocalData>>& Txy)
+{
+  addEnergy(W, LocalEnergy);
+}
+
+// END VIRTUAL FUNCTIONS //
+
+bool OperatorBase::isClassical() const noexcept { return quantum_domain_ == CLASSICAL; }
+
+bool OperatorBase::isQuantum() const noexcept { return quantum_domain_ == QUANTUM; }
+
+bool OperatorBase::isClassicalClassical() const noexcept { return quantum_domain_ == CLASSICAL_CLASSICAL; }
+
+bool OperatorBase::isQuantumClassical() const noexcept { return quantum_domain_ == QUANTUM_CLASSICAL; }
+
+bool OperatorBase::isQuantumQuantum() const noexcept { return quantum_domain_ == QUANTUM_QUANTUM; }
+
+bool OperatorBase::getMode(const int i) const noexcept { return update_mode_[i]; }
+
+bool OperatorBase::isNonLocal() const noexcept { return update_mode_[NONLOCAL]; }
+
+
+#if !defined(REMOVE_TRACEMANAGER)
+
+void OperatorBase::contributeTraceQuantities()
+{
+  contributeScalarQuantities();
+  contributeParticleQuantities();
+}
+
+void OperatorBase::checkoutTraceQuantities(TraceManager& tm)
+{
+  checkoutScalarQuantities(tm);
+  checkoutParticleQuantities(tm);
+}
+
+void OperatorBase::collectScalarTraces() { collectScalarQuantities(); }
+
+void OperatorBase::deleteTraceQuantities()
+{
+  deleteScalarQuantities();
+  deleteParticleQuantities();
+  streaming_scalars_    = false;
+  streaming_particles_  = false;
+  have_required_traces_ = false;
+  request_.reset();
+}
+
+#endif
+
+////// PROTECTED FUNCTIONS
 void OperatorBase::setEnergyDomain(EnergyDomains edomain)
 {
   if (energyDomainValid(edomain))
@@ -170,33 +313,4 @@ void OperatorBase::twoBodyQuantumDomain(const ParticleSet& P1, const ParticleSet
 
 bool OperatorBase::quantumDomainValid(QuantumDomains qdomain) { return qdomain != NO_QUANTUM_DOMAIN; }
 
-void OperatorBase::add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH)
-{
-  std::unique_ptr<OperatorBase> myclone = makeClone(qp, psi);
-  if (myclone)
-  {
-    targetH.addOperator(std::move(myclone), name_, update_mode_[PHYSICAL]);
-  }
-}
-
-void OperatorBase::registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid) const
-{
-  bool collect = update_mode_.test(COLLECTABLE);
-  //exclude collectables
-  if (!collect)
-  {
-    h5desc.emplace_back(name_);
-    auto& oh = h5desc.back();
-    std::vector<int> onedim(1, 1);
-    oh.set_dimensions(onedim, my_index_);
-    oh.open(gid);
-  }
-}
-
-void OperatorBase::addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy)
-{
-  APP_ABORT("Need specialization for " + name_ +
-            "::addEnergy(MCWalkerConfiguration &W).\n Required functionality not implemented\n");
-}
-
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index 1bdecd0371..5dc2dcd79b 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -110,33 +110,40 @@ class OperatorBase : public QMCTraits
     NONLOCAL    = 5,
   };
 
-  ///constructor
+  /**
+   * @brief Construct a new Operator Base object
+   * Default and unique empty constructor. Initializes with default values.
+   */
   OperatorBase();
 
-  ///virtual destructor
-  virtual ~OperatorBase() {}
+  virtual ~OperatorBase() = default;
+
+  //////// GETTER AND SETTER FUNCTIONS ////////////////
 
-  // getter for update_mode member
   /**
-   * @brief get update_mode_ 
+   * @brief get update_mode_ reference
+   * 
    * @return std::bitset<8>& reference of get_update_mode_
    */
   std::bitset<8>& getUpdateMode() noexcept;
 
   /**
    * @brief get a copy of value_
+   * 
    * @return Return_t copy of value_
    */
   Return_t getValue() const noexcept;
 
   /**
    * @brief getter a copy of my_name_, rvalue small string optimization
+   * 
    * @return std::string copy of my_name_ member
    */
   std::string getName() const noexcept;
 
   /**
    * @brief Set my_name member, uses small string optimization (pass by value)
+   * 
    * @param name input
    */
   void setName(const std::string name) noexcept;
@@ -144,233 +151,305 @@ class OperatorBase : public QMCTraits
 #if !defined(REMOVE_TRACEMANAGER)
   /**
    * @brief Get request_ member
+   * 
    * @return TraceRequest& reference to request_
    */
   TraceRequest& getRequest() noexcept;
 #endif
 
-  inline bool isClassical() { return quantum_domain_ == CLASSICAL; }
-  inline bool isQuantum() { return quantum_domain_ == QUANTUM; }
-  inline bool isClassicalClassical() { return quantum_domain_ == CLASSICAL_CLASSICAL; }
-  inline bool isQuantumClassical() { return quantum_domain_ == QUANTUM_CLASSICAL; }
-  inline bool isQuantumQuantum() { return quantum_domain_ == QUANTUM_QUANTUM; }
+  //////// PURELY VIRTUAL FUNCTIONS ////////////////
+  /** 
+   * @brief Reset the data with the target ParticleSet
+   * @param P new target ParticleSet
+   */
+  virtual void resetTargetParticleSet(ParticleSet& P) = 0;
 
-  /** return the mode i
-   * @param i index among PRIMARY, OPTIMIZABLE, RATIOUPDATE, PHYSICAL
+  /** 
+   * @brief Evaluate the local energy contribution of this component
+   * @param P input configuration containing N particles
+   * @return the value of the Hamiltonian component
    */
-  inline bool getMode(int i) { return update_mode_[i]; }
+  virtual Return_t evaluate(ParticleSet& P) = 0;
 
-  inline bool isNonLocal() const { return update_mode_[NONLOCAL]; }
+  /** write about the class */
+  virtual bool get(std::ostream& os) const = 0;
+
+  // TODO: add docs
+  virtual std::unique_ptr<OperatorBase> makeClone(ParticleSet& qp, TrialWaveFunction& psi) = 0;
 
-  /** named values to  the property list
+  //////// VIRTUAL FUNCTIONS ////////////////
+
+  /** 
+   * @brief named values to  the property list
+   * Default implementaton uses addValue(plist_)
+   * 
    * @param plist RecordNameProperty
    * @param collectables Observables that are accumulated by evaluate
-   *
-   * Default implementaton uses addValue(plist)
    */
-  virtual void addObservables(PropertySetType& plist, BufferType& collectables) { addValue(plist); }
+  virtual void addObservables(PropertySetType& plist, BufferType& collectables);
 
-  /*** add to observable descriptor for hdf5
+  /** 
+   * @brief add to observable descriptor for hdf5
+   * The default implementation is to register a scalar for this->value_
+   * 
    * @param h5desc contains a set of hdf5 descriptors for a scalar observable
    * @param gid hdf5 group to which the observables belong
-   *
-   * The default implementation is to register a scalar for this->Value
    */
   virtual void registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid) const;
 
-  /*** add to collectables descriptor for hdf5
-   * @param h5desc contains a set of hdf5 descriptors for a scalar observable
-   * @param gid hdf5 group to which the observables belong
-   *
+  /*** 
+   * @brief add to collectables descriptor for hdf5
    * The default implementation does nothing. The derived classes which compute
    * big data, e.g. density, should overwrite this function.
+   * 
+   * @param h5desc contains a set of hdf5 descriptors for a scalar observable
+   * @param gid hdf5 group to which the observables belong
    */
-  virtual void registerCollectables(std::vector<ObservableHelper>& h5desc, hid_t gid) const {}
+  virtual void registerCollectables(std::vector<ObservableHelper>& h5desc, hid_t gid) const;
 
-  /** set the values evaluated by this object to plist
-   * @param plist RecordNameProperty
-   *
+  /** 
+   * @brief Set the values evaluated by this object to plist
    * Default implementation is to assign Value which is updated
-   * by evaluate  function using myIndex.
+   * by evaluate function using my_index_.
+   *
+   * @param plist RecordNameProperty
    */
-  virtual void setObservables(PropertySetType& plist) { plist[my_index_] = value_; }
+  virtual void setObservables(PropertySetType& plist);
 
-  virtual void setParticlePropertyList(PropertySetType& plist, int offset) { plist[my_index_ + offset] = value_; }
+  // TODO: add docs
+  virtual void setParticlePropertyList(PropertySetType& plist, int offset);
 
-  //virtual void setHistories(Walker<Return_t, ParticleSet::ParticleGradient_t>& ThisWalker)
-  virtual void setHistories(Walker_t& ThisWalker) { t_walker_ = &(ThisWalker); }
+  // TODO: add docs
+  virtual void setHistories(Walker_t& ThisWalker);
 
-  /** reset the data with the target ParticleSet
-   * @param P new target ParticleSet
-   */
-  virtual void resetTargetParticleSet(ParticleSet& P) = 0;
+  /** 
+   * @brief Evaluate the local energy contribution of this component, deterministically based on current state.
+   * The correct behavior of this routine requires estimators with non-deterministic components
+   * in their evaluate() function to override this function.
 
-  /** Evaluate the local energy contribution of this component
-   *@param P input configuration containing N particles
-   *@return the value of the Hamiltonian component
-   */
-  virtual Return_t evaluate(ParticleSet& P) = 0;
-  /** Evaluate the local energy contribution of this component, deterministically based on current state.
-   *@param P input configuration containing N particles
-   *@return the value of the Hamiltonian component
+   * @param P input configuration containing N particles
+   * @return the value of the Hamiltonian component
    */
   virtual Return_t evaluateDeterministic(ParticleSet& P);
-  /** Evaluate the contribution of this component of multiple walkers */
+
+  /**
+   * @brief Evaluate the contribution of this component of multiple walkers.
+   * Take o_list and p_list update evaluation result variables in o_list?
+   * really should reduce vector of local_energies. matching the ordering and size of o list
+   * the this can be call for 1 or more QMCHamiltonians
+
+   * @param o_list 
+   * @param wf_list 
+   * @param p_list 
+   */
   virtual void mw_evaluate(const RefVectorWithLeader<OperatorBase>& o_list,
                            const RefVectorWithLeader<TrialWaveFunction>& wf_list,
                            const RefVectorWithLeader<ParticleSet>& p_list) const;
 
+  /**
+   * @brief TODO: add docs
+
+   * @param o_list 
+   * @param p_list 
+   * @param optvars 
+   * @param dlogpsi 
+   * @param dhpsioverpsi 
+   */
   virtual void mw_evaluateWithParameterDerivatives(const RefVectorWithLeader<OperatorBase>& o_list,
                                                    const RefVectorWithLeader<ParticleSet>& p_list,
                                                    const opt_variables_type& optvars,
                                                    RecordArray<ValueType>& dlogpsi,
                                                    RecordArray<ValueType>& dhpsioverpsi) const;
 
+  /**
+   * @brief TODO: add docs
+   * 
+   * @param P 
+   * @return Return_t 
+   */
+  virtual Return_t rejectedMove(ParticleSet& P);
+
+  /** 
+   * @brief Evaluate the local energy contribution of this component with Toperators updated if requested
 
-  virtual Return_t rejectedMove(ParticleSet& P) { return 0; }
-  /** Evaluate the local energy contribution of this component with Toperators updated if requested
-   *@param P input configuration containing N particles
-   *@return the value of the Hamiltonian component
+   * @param P input configuration containing N particles
+   * @return the value of the Hamiltonian component
    */
-  virtual Return_t evaluateWithToperator(ParticleSet& P) { return evaluate(P); }
+  virtual Return_t evaluateWithToperator(ParticleSet& P);
 
-  /** Evaluate the contribution of this component of multiple walkers */
+  /**
+   * @brief Evaluate the contribution of this component of multiple walkers
+
+   * @param o_list 
+   * @param wf_list 
+   * @param p_list 
+   */
   virtual void mw_evaluateWithToperator(const RefVectorWithLeader<OperatorBase>& o_list,
                                         const RefVectorWithLeader<TrialWaveFunction>& wf_list,
-                                        const RefVectorWithLeader<ParticleSet>& p_list) const
-  {
-    mw_evaluate(o_list, wf_list, p_list);
-  }
+                                        const RefVectorWithLeader<ParticleSet>& p_list) const;
 
-  /** evaluate value and derivatives wrt the optimizables
-   *
-   * Default uses evaluate
+  /**
+   * @brief Evaluate value and derivatives wrt the optimizables. Default uses evaluate.
+
+   * @param P 
+   * @param optvars 
+   * @param dlogpsi 
+   * @param dhpsioverpsi 
+   * @return Return_t 
    */
   virtual Return_t evaluateValueAndDerivatives(ParticleSet& P,
                                                const opt_variables_type& optvars,
                                                const std::vector<ValueType>& dlogpsi,
-                                               std::vector<ValueType>& dhpsioverpsi)
-  {
-    return evaluate(P);
-  }
+                                               std::vector<ValueType>& dhpsioverpsi);
+
+  /** 
+   * @brief Evaluate contribution to local energy  and derivatives w.r.t ionic coordinates from OperatorBase.  
 
-  /** evaluate contribution to local energy  and derivatives w.r.t ionic coordinates from OperatorBase.  
-  * @param P target particle set (electrons)
-  * @param ions source particle set (ions)
-  * @param psi Trial wave function
-  * @param hf_terms  Adds OperatorBase's contribution to Re [(dH)Psi]/Psi
-  * @param pulay_terms Adds OperatorBase's contribution to Re [(H-E_L)dPsi]/Psi 
-  * @return Contribution of OperatorBase to Local Energy.
-  */
+   * @param P target particle set (electrons)
+   * @param ions source particle set (ions)
+   * @param psi Trial wave function
+   * @param hf_terms  Adds OperatorBase's contribution to Re [(dH)Psi]/Psi
+   * @param pulay_terms Adds OperatorBase's contribution to Re [(H-E_L)dPsi]/Psi 
+   * @return Contribution of OperatorBase to Local Energy.
+   */
   virtual Return_t evaluateWithIonDerivs(ParticleSet& P,
                                          ParticleSet& ions,
                                          TrialWaveFunction& psi,
                                          ParticleSet::ParticlePos_t& hf_term,
-                                         ParticleSet::ParticlePos_t& pulay_term)
-  {
-    return evaluate(P);
-  }
-
-  /** evaluate contribution to local energy  and derivatives w.r.t ionic coordinates from OperatorBase.  
-  * @param P target particle set (electrons)
-  * @param ions source particle set (ions)
-  * @param psi Trial wave function
-  * @param hf_terms  Adds OperatorBase's contribution to Re [(dH)Psi]/Psi
-  * @param pulay_terms Adds OperatorBase's contribution to Re [(H-E_L)dPsi]/Psi 
-  * @return Contribution of OperatorBase to Local Energy.
-  */
+                                         ParticleSet::ParticlePos_t& pulay_term);
+
+  /** 
+   * @brief Evaluate contribution to local energy  and derivatives w.r.t ionic coordinates from OperatorBase.
+   * If there's no stochastic component, defaults to evaluateWithIonDerivs.
+   * If not otherwise specified, this defaults to evaluate().
+
+   * @param P target particle set (electrons)
+   * @param ions source particle set (ions)
+   * @param psi Trial wave function
+   * @param hf_terms  Adds OperatorBase's contribution to Re [(dH)Psi]/Psi
+   * @param pulay_terms Adds OperatorBase's contribution to Re [(H-E_L)dPsi]/Psi 
+   * @return Contribution of OperatorBase to Local Energy.
+   */
   virtual Return_t evaluateWithIonDerivsDeterministic(ParticleSet& P,
                                                       ParticleSet& ions,
                                                       TrialWaveFunction& psi,
                                                       ParticleSet::ParticlePos_t& hf_term,
-                                                      ParticleSet::ParticlePos_t& pulay_term)
-  {
-    //If there's no stochastic component, defaults to above defined evaluateWithIonDerivs.
-    //If not otherwise specified, this defaults to evaluate().
-    return evaluateWithIonDerivs(P, ions, psi, hf_term, pulay_term);
-  }
-  /** update data associated with a particleset
-   * @param s source particle set
-   *
+                                                      ParticleSet::ParticlePos_t& pulay_term);
+
+  /** 
+   * @brief Update data associated with a particleset.
    * Default implementation does nothing. Only A-A interactions for s needs to implement its own method.
+
+   * @param s source particle set
    */
-  virtual void updateSource(ParticleSet& s) {}
+  virtual void updateSource(ParticleSet& s);
 
-  /** return an average value by collective operation
+  /** 
+   * @brief Return an average value by collective operation
    */
-  virtual Return_t getEnsembleAverage() { return 0.0; }
+  virtual Return_t getEnsembleAverage();
 
-  /** write about the class */
-  virtual bool get(std::ostream& os) const = 0;
+  /**
+   * @brief Initialize a shared resource and hand it to a collection
 
-  /** initialize a shared resource and hand it to a collection
+   * @param collection 
    */
-  virtual void createResource(ResourceCollection& collection) const {}
+  virtual void createResource(ResourceCollection& collection) const;
 
-  /** acquire a shared resource from a collection
-   */
-  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<OperatorBase>& o_list) const {}
+  /**
+   * @brief Acquire a shared resource from a collection
 
-  /** return a shared resource to a collection
+   * @param collection 
+   * @param o_list 
    */
-  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<OperatorBase>& o_list) const {}
-
-  virtual std::unique_ptr<OperatorBase> makeClone(ParticleSet& qp, TrialWaveFunction& psi) = 0;
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<OperatorBase>& o_list) const;
 
-  //virtual std::unique_ptr<OperatorBase> makeClone(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& H);
+  /**
+   * @brief Return a shared resource to a collection
+   * 
+   * @param collection 
+   * @param o_list 
+   */
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<OperatorBase>& o_list) const;
 
-  virtual void setRandomGenerator(RandomGenerator_t* rng)
-  {
-    //empty
-  }
+  /**
+   * @brief Set the Random Generator object
+   * TODO: add docs
+   * @param rng 
+   */
+  virtual void setRandomGenerator(RandomGenerator_t* rng);
 
+  /**
+   * @brief TODO: add docs
+   * 
+   * @param qp 
+   * @param psi 
+   * @param targetH 
+   */
   virtual void add2Hamiltonian(ParticleSet& qp, TrialWaveFunction& psi, QMCHamiltonian& targetH);
 
 #if !defined(REMOVE_TRACEMANAGER)
-  ///make trace quantities available
-  inline void contributeTraceQuantities()
-  {
-    contributeScalarQuantities();
-    contributeParticleQuantities();
-  }
-
-  ///checkout trace arrays
-  inline void checkoutTraceQuantities(TraceManager& tm)
-  {
-    //derived classes must guard individual checkouts using request info
-    checkoutScalarQuantities(tm);
-    checkoutParticleQuantities(tm);
-  }
-
-  ///collect scalar trace data
-  inline void collectScalarTraces()
-  {
-    //app_log()<<"OperatorBase::collectScalarTraces"<< std::endl;
-    collectScalarQuantities();
-  }
-
-  ///delete trace arrays
-  inline void deleteTraceQuantities()
-  {
-    deleteScalarQuantities();
-    deleteParticleQuantities();
-    streaming_scalars_    = false;
-    streaming_particles_  = false;
-    have_required_traces_ = false;
-    request_.reset();
-  }
-
-  virtual void getRequiredTraces(TraceManager& tm){};
+  /**
+   * @brief TODO: add docs
+   * 
+   * @param tm 
+   */
+  virtual void getRequiredTraces(TraceManager& tm);
 #endif
 
+  // TODO: add docs
+
   virtual void addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy);
 
   virtual void addEnergy(MCWalkerConfiguration& W,
                          std::vector<RealType>& LocalEnergy,
-                         std::vector<std::vector<NonLocalData>>& Txy)
-  {
-    addEnergy(W, LocalEnergy);
-  }
+                         std::vector<std::vector<NonLocalData>>& Txy);
+
+  bool isClassical() const noexcept;
+  bool isQuantum() const noexcept;
+  bool isClassicalClassical() const noexcept;
+  bool isQuantumClassical() const noexcept;
+  bool isQuantumQuantum() const noexcept;
+
+  /** 
+   * @brief Return the mode i
+   * @param i index among PRIMARY, OPTIMIZABLE, RATIOUPDATE, PHYSICAL
+   */
+  bool getMode(const int i) const noexcept;
+
+  /**
+   * @brief TODO: add docs
+   * 
+   * @return true 
+   * @return false 
+   */
+  bool isNonLocal() const noexcept;
+
+
+#if !defined(REMOVE_TRACEMANAGER)
+
+  /**
+   * @brief Make trace quantities available
+   */
+  void contributeTraceQuantities();
+
+  /**
+   * @brief Checkout trace arrays 
+   * Derived classes must guard individual checkouts using request info 
+   * @param tm 
+   */
+  void checkoutTraceQuantities(TraceManager& tm);
+
+  /**
+   * @brief Collect scalar trace data
+   */
+  void collectScalarTraces();
+
+  /**
+   * @brief delete trace arrays
+   */
+  void deleteTraceQuantities();
+#endif
 
 protected:
   ///set the current update mode
@@ -401,12 +480,17 @@ class OperatorBase : public QMCTraits
   bool have_required_traces_;
 #endif
 
+
+  /////PURELY VIRTUAL FUNCTIONS
+
   /**
    * Read the input parameter
    * @param cur xml node for a OperatorBase object
    */
   virtual bool put(xmlNodePtr cur) = 0;
 
+  //////VIRTUAL FUNCTIONS
+
 #if !defined(REMOVE_TRACEMANAGER)
   virtual void contributeScalarQuantities() { request_.contribute_scalar(name_); }
 

From ea9814a51e06d7b2340b7b5b97a56fcc29f93a82 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 28 Sep 2021 15:19:14 -0400
Subject: [PATCH 04/35] Refactor protected functions

Into a single translation unit
---
 src/QMCHamiltonians/OperatorBase.cpp | 47 ++++++++++++++++++++++++----
 src/QMCHamiltonians/OperatorBase.h   | 46 +++++++++------------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index 96a64fbd4e..b29002a390 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -48,7 +48,7 @@ void OperatorBase::setName(const std::string name) noexcept { name_ = name; }
 TraceRequest& OperatorBase::getRequest() noexcept { return request_; }
 #endif
 
-//////// VIRTUAL FUNCTIONS ////////////////
+////////  FUNCTIONS ////////////////
 void OperatorBase::addObservables(PropertySetType& plist, BufferType& collectables) { addValue(plist); }
 
 void OperatorBase::registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid) const
@@ -213,7 +213,7 @@ void OperatorBase::addEnergy(MCWalkerConfiguration& W,
   addEnergy(W, LocalEnergy);
 }
 
-// END VIRTUAL FUNCTIONS //
+// END  FUNCTIONS //
 
 bool OperatorBase::isClassical() const noexcept { return quantum_domain_ == CLASSICAL; }
 
@@ -259,6 +259,35 @@ void OperatorBase::deleteTraceQuantities()
 #endif
 
 ////// PROTECTED FUNCTIONS
+#if !defined(REMOVE_TRACEMANAGER)
+void OperatorBase::contributeScalarQuantities() { request_.contribute_scalar(name_); }
+
+void OperatorBase::checkoutScalarQuantities(TraceManager& tm)
+{
+  streaming_scalars_ = request_.streaming_scalar(name_);
+  if (streaming_scalars_)
+    value_sample_ = tm.checkout_real<1>(name_);
+}
+
+void OperatorBase::collectScalarQuantities()
+{
+  if (streaming_scalars_)
+    (*value_sample_)(0) = value_;
+}
+
+void OperatorBase::deleteScalarQuantities()
+{
+  if (streaming_scalars_)
+    delete value_sample_;
+}
+
+void OperatorBase::contributeParticleQuantities(){};
+void OperatorBase::checkoutParticleQuantities(TraceManager& tm){};
+void OperatorBase::deleteParticleQuantities(){};
+#endif
+
+void OperatorBase::setComputeForces(bool compute) {}
+
 void OperatorBase::setEnergyDomain(EnergyDomains edomain)
 {
   if (energyDomainValid(edomain))
@@ -297,10 +326,10 @@ void OperatorBase::twoBodyQuantumDomain(const ParticleSet& P)
 
 void OperatorBase::twoBodyQuantumDomain(const ParticleSet& P1, const ParticleSet& P2)
 {
-  bool c1 = P1.is_classical();
-  bool c2 = P2.is_classical();
-  bool q1 = P1.is_quantum();
-  bool q2 = P2.is_quantum();
+  const bool c1 = P1.is_classical();
+  const bool c2 = P2.is_classical();
+  const bool q1 = P1.is_quantum();
+  const bool q2 = P2.is_quantum();
   if (c1 && c2)
     quantum_domain_ = CLASSICAL_CLASSICAL;
   else if ((q1 && c2) || (c1 && q2))
@@ -311,6 +340,12 @@ void OperatorBase::twoBodyQuantumDomain(const ParticleSet& P1, const ParticleSet
     APP_ABORT("OperatorBase::twoBodyQuantumDomain(P1,P2)\n  quantum domain of input particles is invalid");
 }
 
+void OperatorBase::addValue(PropertySetType& plist)
+{
+  if (!update_mode_[COLLECTABLE])
+    my_index_ = plist.add(name_.c_str());
+}
+
 bool OperatorBase::quantumDomainValid(QuantumDomains qdomain) { return qdomain != NO_QUANTUM_DOMAIN; }
 
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index 5dc2dcd79b..8154d64e82 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -492,38 +492,26 @@ class OperatorBase : public QMCTraits
   //////VIRTUAL FUNCTIONS
 
 #if !defined(REMOVE_TRACEMANAGER)
-  virtual void contributeScalarQuantities() { request_.contribute_scalar(name_); }
+  virtual void contributeScalarQuantities();
 
-  virtual void checkoutScalarQuantities(TraceManager& tm)
-  {
-    streaming_scalars_ = request_.streaming_scalar(name_);
-    if (streaming_scalars_)
-      value_sample_ = tm.checkout_real<1>(name_);
-  }
+  virtual void checkoutScalarQuantities(TraceManager& tm);
 
-  virtual void collectScalarQuantities()
-  {
-    if (streaming_scalars_)
-      (*value_sample_)(0) = value_;
-  }
+  virtual void collectScalarQuantities();
 
-  virtual void deleteScalarQuantities()
-  {
-    if (streaming_scalars_)
-      delete value_sample_;
-  }
+  virtual void deleteScalarQuantities();
 
-  virtual void contributeParticleQuantities(){};
-  virtual void checkoutParticleQuantities(TraceManager& tm){};
-  virtual void deleteParticleQuantities(){};
+  virtual void contributeParticleQuantities();
+  virtual void checkoutParticleQuantities(TraceManager& tm);
+  virtual void deleteParticleQuantities();
 #endif
 
-  virtual void setComputeForces(bool compute)
-  {
-    // empty
-  }
+  virtual void setComputeForces(bool compute);
 
-  ///set energy domain
+  /**
+   * @brief Set the Energy Domain
+   * 
+   * @param edomain 
+   */
   void setEnergyDomain(EnergyDomains edomain);
 
   ///set quantum domain
@@ -539,16 +527,12 @@ class OperatorBase : public QMCTraits
   void twoBodyQuantumDomain(const ParticleSet& P1, const ParticleSet& P2);
 
   /**
-   * named values to  the property list
+   * @brief named values to  the property list
    * @param plist RecordNameProperty
    *
    * Previously addObservables but it is renamed and a non-virtial function.
    */
-  inline void addValue(PropertySetType& plist)
-  {
-    if (!update_mode_[COLLECTABLE])
-      my_index_ = plist.add(name_.c_str());
-  }
+  void addValue(PropertySetType& plist);
 
 private:
   ///quantum_domain_ of the (particle) operator, default = no_quantum_domain

From db5ec23927ca7202544bf3a54f1463e571562ac8 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 28 Sep 2021 15:54:16 -0400
Subject: [PATCH 05/35] Refactor private functions

---
 src/QMCHamiltonians/OperatorBase.cpp |  5 ++++-
 src/QMCHamiltonians/OperatorBase.h   | 10 ++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index b29002a390..5bad8b7c27 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -346,6 +346,9 @@ void OperatorBase::addValue(PropertySetType& plist)
     my_index_ = plist.add(name_.c_str());
 }
 
-bool OperatorBase::quantumDomainValid(QuantumDomains qdomain) { return qdomain != NO_QUANTUM_DOMAIN; }
+////// PRIVATE FUNCTIONS
+bool OperatorBase::energyDomainValid(EnergyDomains edomain) const noexcept { return edomain != NO_ENERGY_DOMAIN; }
+
+bool OperatorBase::quantumDomainValid(QuantumDomains qdomain) const noexcept { return qdomain != NO_QUANTUM_DOMAIN; }
 
 } // namespace qmcplusplus
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index 8154d64e82..2f0afa8f26 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -548,16 +548,10 @@ class OperatorBase : public QMCTraits
 #endif
 
   ///return whether the energy domain is valid
-  inline bool energyDomainValid(EnergyDomains edomain) const { return edomain != NO_ENERGY_DOMAIN; }
-
-  ///return whether the energy domain is valid
-  inline bool energyDomainValid() const { return energyDomainValid(energy_domain_); }
-
-  ///return whether the quantum domain is valid
-  bool quantumDomainValid(QuantumDomains qdomain);
+  bool energyDomainValid(EnergyDomains edomain) const noexcept;
 
   ///return whether the quantum domain is valid
-  inline bool quantumDomainValid() { return quantumDomainValid(quantum_domain_); }
+  bool quantumDomainValid(QuantumDomains qdomain) const noexcept;
 };
 } // namespace qmcplusplus
 #endif

From 8d8c262312c9116cfdb6f639d810d22075b50b59 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 28 Sep 2021 15:59:50 -0400
Subject: [PATCH 06/35] Apply RAII in constructor

---
 src/QMCHamiltonians/OperatorBase.cpp | 20 +++++++++++++-------
 src/QMCHamiltonians/OperatorBase.h   |  1 -
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index 5bad8b7c27..14ebad3b32 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -23,16 +23,22 @@
 namespace qmcplusplus
 {
 // PUBLIC
-OperatorBase::OperatorBase() : value_(0.0), my_index_(-1), t_walker_(0)
-{
-  quantum_domain_ = NO_QUANTUM_DOMAIN;
-  energy_domain_  = NO_ENERGY_DOMAIN;
 
+OperatorBase::OperatorBase()
+    : value_(0.0),
+      my_index_(-1),
+      t_walker_(0),
 #if !defined(REMOVE_TRACEMANAGER)
-  streaming_scalars_    = false;
-  streaming_particles_  = false;
-  have_required_traces_ = false;
+      streaming_particles_(false),
+      have_required_traces_(false),
 #endif
+      quantum_domain_(NO_QUANTUM_DOMAIN),
+      energy_domain_(NO_ENERGY_DOMAIN)
+#if !defined(REMOVE_TRACEMANAGER)
+      ,
+      streaming_scalars_(false)
+#endif
+{
   update_mode_.set(PRIMARY, 1);
 }
 
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index 2f0afa8f26..7baeb473f2 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -480,7 +480,6 @@ class OperatorBase : public QMCTraits
   bool have_required_traces_;
 #endif
 
-
   /////PURELY VIRTUAL FUNCTIONS
 
   /**

From decb0ead783b6ad5e0d141fe1b3af3c50ba283cc Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Tue, 28 Sep 2021 18:56:29 -0500
Subject: [PATCH 07/35] Less ugly ifdef.

---
 src/QMCHamiltonians/OperatorBase.cpp |  5 +----
 src/QMCHamiltonians/OperatorBase.h   | 10 +++++-----
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/QMCHamiltonians/OperatorBase.cpp b/src/QMCHamiltonians/OperatorBase.cpp
index 14ebad3b32..7b3e3fbba7 100644
--- a/src/QMCHamiltonians/OperatorBase.cpp
+++ b/src/QMCHamiltonians/OperatorBase.cpp
@@ -31,13 +31,10 @@ OperatorBase::OperatorBase()
 #if !defined(REMOVE_TRACEMANAGER)
       streaming_particles_(false),
       have_required_traces_(false),
+      streaming_scalars_(false),
 #endif
       quantum_domain_(NO_QUANTUM_DOMAIN),
       energy_domain_(NO_ENERGY_DOMAIN)
-#if !defined(REMOVE_TRACEMANAGER)
-      ,
-      streaming_scalars_(false)
-#endif
 {
   update_mode_.set(PRIMARY, 1);
 }
diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h
index 7baeb473f2..6aeba48f94 100644
--- a/src/QMCHamiltonians/OperatorBase.h
+++ b/src/QMCHamiltonians/OperatorBase.h
@@ -534,11 +534,6 @@ class OperatorBase : public QMCTraits
   void addValue(PropertySetType& plist);
 
 private:
-  ///quantum_domain_ of the (particle) operator, default = no_quantum_domain
-  QuantumDomains quantum_domain_;
-  ///energy domain of the operator (kinetic/potential), default = no_energy_domain
-  EnergyDomains energy_domain_;
-
 #if !defined(REMOVE_TRACEMANAGER)
   bool streaming_scalars_;
 
@@ -546,6 +541,11 @@ class OperatorBase : public QMCTraits
   Array<RealType, 1>* value_sample_;
 #endif
 
+  ///quantum_domain_ of the (particle) operator, default = no_quantum_domain
+  QuantumDomains quantum_domain_;
+  ///energy domain of the operator (kinetic/potential), default = no_energy_domain
+  EnergyDomains energy_domain_;
+
   ///return whether the energy domain is valid
   bool energyDomainValid(EnergyDomains edomain) const noexcept;
 

From 7325f32ec8bdf2188b89516d2a36d368c18f2fab Mon Sep 17 00:00:00 2001
From: Peter Doak <doakpw@ornl.gov>
Date: Tue, 28 Sep 2021 18:43:36 -0400
Subject: [PATCH 08/35] Adding OneBodyDensityMatricesInput and tests

---
 src/Estimators/CMakeLists.txt                 |   2 +-
 src/Estimators/InputSection.cpp               |   6 +-
 src/Estimators/InputSection.h                 |   9 +-
 .../OneBodyDensityMatricesInput.cpp           |  42 ++++++
 src/Estimators/OneBodyDensityMatricesInput.h  | 120 ++++++++++++++++++
 src/Estimators/tests/CMakeLists.txt           |   3 +-
 ...ensityTesting.cpp => EstimatorTesting.cpp} |   4 +-
 ...pinDensityTesting.h => EstimatorTesting.h} |   6 +-
 .../InvalidOneBodyDensityMatricesInput.h      |  53 ++++++++
 .../tests/ValidOneBodyDensityMatricesInput.h  |  53 ++++++++
 .../test_OneBodyDensityMatricesInput.cpp      |  53 ++++++++
 .../tests/test_SpinDensityInput.cpp           |   2 +-
 src/Estimators/tests/test_SpinDensityNew.cpp  |   4 +-
 13 files changed, 342 insertions(+), 15 deletions(-)
 create mode 100644 src/Estimators/OneBodyDensityMatricesInput.cpp
 create mode 100644 src/Estimators/OneBodyDensityMatricesInput.h
 rename src/Estimators/tests/{SpinDensityTesting.cpp => EstimatorTesting.cpp} (92%)
 rename src/Estimators/tests/{SpinDensityTesting.h => EstimatorTesting.h} (84%)
 create mode 100644 src/Estimators/tests/InvalidOneBodyDensityMatricesInput.h
 create mode 100644 src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
 create mode 100644 src/Estimators/tests/test_OneBodyDensityMatricesInput.cpp

diff --git a/src/Estimators/CMakeLists.txt b/src/Estimators/CMakeLists.txt
index ca025cce76..af3af2da5f 100644
--- a/src/Estimators/CMakeLists.txt
+++ b/src/Estimators/CMakeLists.txt
@@ -24,7 +24,7 @@ set(QMCEST_SRC
     OperatorEstBase.cpp
     SpinDensityNew.cpp
     MomentumDistribution.cpp
-    )
+    OneBodyDensityMatricesInput.cpp)
 
 ####################################
 # create libqmcestimators
diff --git a/src/Estimators/InputSection.cpp b/src/Estimators/InputSection.cpp
index e94e19ff74..4cee4759c5 100644
--- a/src/Estimators/InputSection.cpp
+++ b/src/Estimators/InputSection.cpp
@@ -63,7 +63,6 @@ void InputSection::readXML(xmlNodePtr cur)
 
   // check input validity
   check_valid();
-
   //report();
 }
 
@@ -79,7 +78,6 @@ void InputSection::init(const std::unordered_map<std::string, std::any>& init_va
 
   // check input validity
   check_valid();
-
   //report();
 }
 
@@ -91,7 +89,6 @@ void InputSection::set_defaults()
       set_from_value(name, default_value);
 }
 
-
 void InputSection::set_from_stream(const std::string& name, std::istringstream& svalue)
 {
   if (is_string(name))
@@ -158,7 +155,8 @@ void InputSection::check_valid()
             << " has not been assigned\n";
       throw UniformCommunicateError(error.str());
     }
-}
+  this->checkParticularValidity();
+};
 
 
 void InputSection::report() const
diff --git a/src/Estimators/InputSection.h b/src/Estimators/InputSection.h
index b68718d3f2..5b1736fadb 100644
--- a/src/Estimators/InputSection.h
+++ b/src/Estimators/InputSection.h
@@ -61,7 +61,7 @@ class InputSection
   // Enable read-only access to variable values.
   //   Needs updating to allow copy-less return.
   template<typename T>
-  T get(const std::string& name) const
+  const T get(const std::string& name) const
   {
     return std::any_cast<T>(values.at(name));
   }
@@ -74,6 +74,13 @@ class InputSection
   // Initialize from unordered_map/initializer list
   void init(const std::unordered_map<std::string, std::any>& init_values);
 
+protected:
+  /** Do validation for a particular subtype of InputSection
+   *  Called by check_valid.
+   *  Default implementation is noop
+   */
+  virtual void checkParticularValidity() {}
+  
 private:
   // Query functions
   bool is_attribute(const std::string& name) const { return attributes.find(name) != attributes.end(); }
diff --git a/src/Estimators/OneBodyDensityMatricesInput.cpp b/src/Estimators/OneBodyDensityMatricesInput.cpp
new file mode 100644
index 0000000000..6fda7243cd
--- /dev/null
+++ b/src/Estimators/OneBodyDensityMatricesInput.cpp
@@ -0,0 +1,42 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// Some code refactored from: DensityMatrices1b.cpp
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "string_utils.h"
+#include "OneBodyDensityMatricesInput.h"
+
+namespace qmcplusplus
+{
+
+OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(){};
+OneBodyDensityMatricesInput::OneBodyDensityMatricesInput(xmlNodePtr cur)
+{
+  // This results in checkParticularValidity being called on OneBodyDensityMatrixInputSection
+  input_section_.readXML(cur);
+}
+
+void OneBodyDensityMatricesInput::OneBodyDensityMatrixInputSection::checkParticularValidity()
+{
+  if (has("scale"))
+  {
+    Real scale = get<Real>("scale");
+    std::cout << "SCALE is :" << scale << '\n';
+    if (scale > 1.0 + 1e-10)
+      throw UniformCommunicateError("OneBodyDensityMatrices input: scale must be less than one");
+    else if (scale < 0.0 - 1e-10)
+      throw UniformCommunicateError("OneBodyDensityMatrices input: scale must be greater than zero");
+  }
+  std::string basis_str = get<std::string>("basis");
+  auto basis_set_names  = split(basis_str);
+  if (basis_set_names.size() == 0 || basis_set_names[0].size() == 0)
+    throw UniformCommunicateError("OneBodyDensityMatrices input: basis must have at least one sposet");
+}
+
+} // namespace qmcplusplus
diff --git a/src/Estimators/OneBodyDensityMatricesInput.h b/src/Estimators/OneBodyDensityMatricesInput.h
new file mode 100644
index 0000000000..3aa8855d7c
--- /dev/null
+++ b/src/Estimators/OneBodyDensityMatricesInput.h
@@ -0,0 +1,120 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
+//
+// Some code refactored from: DensityMatrices1b.h
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_ONE_BODY_DENSITY_MATRICES_INPUT_H
+#define QMCPLUSPLUS_ONE_BODY_DENSITY_MATRICES_INPUT_H
+
+#include "Configuration.h"
+#include "InputSection.h"
+
+namespace qmcplusplus
+{
+/** Native representation for DensityMatrices1B Estimator's inputs
+ */
+class OneBodyDensityMatricesInput
+{
+public:
+  enum class Integrators
+  {
+    UNIFORM_GRID,
+    UNIFORM,
+    DENSITY,
+    NO_INTEGRATOR
+  };
+
+  enum class Evaluators
+  {
+    LOOP,
+    MATRIX,
+    NO_EVALUATOR
+  };
+
+  enum class Samplings
+  {
+    VOLUME_BASED,
+    METROPOLIS,
+    NO_SAMPLING
+  };
+
+  class OneBodyDensityMatrixInputSection : public InputSection
+  {
+  public:
+    /** parse time definition of input parameters */
+    OneBodyDensityMatrixInputSection()
+    {
+      section_name = "OneBodyDensityMatrix";
+      attributes   = {"name", "type"};
+      parameters   = {"basis",      "energy_matrix", "integrator",        "evaluator",        "scale",
+                    "center",     "points",        "samples",           "warmup",           "timestep",
+                    "use_drift",  "check_overlap", "check_derivatives", "acceptance_ratio", "rstats",
+                    "normalized", "volumed_normed"};
+      bools        = {"energy_matrix", "use_drift",         "normalized", "volume_normed",
+               "check_overlap", "check_derivatives", "rstats",     "acceptance_ratio"};
+      strings      = {"name", "type", "basis", "integrator", "evaluator"};
+      integers     = {"points", "samples"};
+      reals        = {"scale", "timestep"};
+      required     = {"name", "basis"};
+      // I'd much rather see the default defined in simple native c++ as below
+    }
+
+    /** do parse time checks of input */
+    void checkParticularValidity() override;
+  };
+
+  using Position = QMCTraits::PosType;
+  using Real     = QMCTraits::FullPrecRealType;
+
+  OneBodyDensityMatricesInput();
+  OneBodyDensityMatricesInput(xmlNodePtr cur);
+
+private:
+  OneBodyDensityMatrixInputSection input_section_;
+
+  bool energy_matrix_     = false;
+  bool use_drift_         = false;
+  bool normalized_        = true;
+  bool volume_normalized_ = true;
+  bool check_overlap_     = false;
+  bool check_derivatives_ = false;
+  bool rstats_            = false;
+  bool acceptance_ratio_  = false;
+  Integrators integrator_ = Integrators::UNIFORM_GRID;
+  Samplings sampling_     = Samplings::VOLUME_BASED;
+  Evaluators evaluator_   = Evaluators::LOOP;
+  Real scale_             = 1.0;
+  Position center_        = 0.0;
+  Real timestep_          = 0.5;
+  int points_             = 10;
+  int samples_            = 10;
+  int warmup_samples_     = 30;
+public:
+  bool get_energy_matrix() const { return energy_matrix_; }
+  bool get_use_drift() const { return use_drift_; }
+  bool get_normalized() const { return normalized_; }
+  bool get_volume_normalized() const { return volume_normalized_; }
+  bool get_check_overlap() const { return check_overlap_; }
+  bool get_check_derivatives() const { return check_derivatives_; }
+  bool get_rstats() const { return rstats_; }
+  bool get_acceptance_ratio() const { return acceptance_ratio_; }
+  Integrators get_integrator() const { return integrator_; }
+  Samplings get_sampling() const { return sampling_; }
+  Evaluators get_evaluator() const { return evaluator_; }
+  Real get_scale() const { return scale_; }
+  Position get_center() const { return center_; }
+  Real get_timestep() const { return timestep_; }
+  int get_points() const { return points_; }
+  int get_samples() const { return samples_; }
+  int get_warmup_samples() const { return warmup_samples_; }
+};
+
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Estimators/tests/CMakeLists.txt b/src/Estimators/tests/CMakeLists.txt
index bd2c5f3a1f..2ee4136b3e 100644
--- a/src/Estimators/tests/CMakeLists.txt
+++ b/src/Estimators/tests/CMakeLists.txt
@@ -23,10 +23,11 @@ set(SRCS
     test_manager.cpp
     test_EstimatorManagerNew.cpp
     test_trace_manager.cpp
-    SpinDensityTesting.cpp
+    EstimatorTesting.cpp
     test_SpinDensityInput.cpp
     test_SpinDensityNew.cpp
     test_InputSection.cpp
+    test_OneBodyDensityMatricesInput.cpp
     )
 
 add_executable(${UTEST_EXE} ${SRCS})
diff --git a/src/Estimators/tests/SpinDensityTesting.cpp b/src/Estimators/tests/EstimatorTesting.cpp
similarity index 92%
rename from src/Estimators/tests/SpinDensityTesting.cpp
rename to src/Estimators/tests/EstimatorTesting.cpp
index b190f020f9..339a3e96ab 100644
--- a/src/Estimators/tests/SpinDensityTesting.cpp
+++ b/src/Estimators/tests/EstimatorTesting.cpp
@@ -2,14 +2,14 @@
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
-// Copyright (c) 2020 QMCPACK developers.
+// Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-#include "SpinDensityTesting.h"
+#include "EstimatorTesting.h"
 
 namespace qmcplusplus
 {
diff --git a/src/Estimators/tests/SpinDensityTesting.h b/src/Estimators/tests/EstimatorTesting.h
similarity index 84%
rename from src/Estimators/tests/SpinDensityTesting.h
rename to src/Estimators/tests/EstimatorTesting.h
index 324557980f..91f99e68b4 100644
--- a/src/Estimators/tests/SpinDensityTesting.h
+++ b/src/Estimators/tests/EstimatorTesting.h
@@ -2,15 +2,15 @@
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
-// Copyright (c) 2020 QMCPACK developers.
+// Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-#ifndef QMCPLUSPLUS_SPINDENSITYTESTING_H
-#define QMCPLUSPLUS_SPINDENSITYTESTING_H
+#ifndef QMCPLUSPLUS_ESTIMATOR_TESTING_H
+#define QMCPLUSPLUS_ESTIMATOR_TESTING_H
 
 #include "ParticleSet.h"
 
diff --git a/src/Estimators/tests/InvalidOneBodyDensityMatricesInput.h b/src/Estimators/tests/InvalidOneBodyDensityMatricesInput.h
new file mode 100644
index 0000000000..0a75477c63
--- /dev/null
+++ b/src/Estimators/tests/InvalidOneBodyDensityMatricesInput.h
@@ -0,0 +1,53 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_INVALID_OBDM_INPUT_H
+#define QMCPLUSPLUS_INVALID_OBDM_INPUT_H
+
+#include <array>
+
+namespace qmcplusplus
+{
+namespace testing
+{
+// clang-format: off
+constexpr std::array<const char*, 2> invalid_one_body_density_matrices_input_sections{
+    R"(
+<estimator type="dm1b" name="DensityMatrices">
+  <parameter name="basis"        >  spo_u spo_uv  </parameter>
+  <parameter name="evaluator"    >  matrix        </parameter>
+  <parameter name="integrator"   >  path          </parameter>
+  <parameter name="scale"        > -0.2           </parameter>
+  <parameter name="samples"      >  64            </parameter>
+  <parameter name="timestep"     >  0.5           </parameter>
+  <parameter name="use_drift"    >  no            </parameter>
+</estimator>
+)",
+    R"(
+<estimator type="dm1b" name="DensityMatrices">
+  <parameter name="basis"        >  dm_basis      </parameter>
+  <parameter name="evaluator"    >  loop          </parameter>
+  <parameter name="integrator"   >  uniform       </parameter>
+  <parameter name="samples"      >  128           </parameter>
+  <parameter name="scale"        >  1.1           </parameter>
+  <parameter name="timestep"     >  0.5           </parameter>
+  <parameter name="use_drift"    >  yes           </parameter>
+</estimator>
+)"
+    // clang-format: on
+};
+
+constexpr int invalid_obdm_input_bad_integrator = 0;
+constexpr int invalid_obdm_input_bad_scale      = 1;
+} // namespace testing
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
new file mode 100644
index 0000000000..5cf5d2e7ea
--- /dev/null
+++ b/src/Estimators/tests/ValidOneBodyDensityMatricesInput.h
@@ -0,0 +1,53 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_VALID_OBDM_INPUT_H
+#define QMCPLUSPLUS_VALID_OBDM_INPUT_H
+
+#include <array>
+
+namespace qmcplusplus
+{
+namespace testing
+{
+// clang-format: off
+constexpr std::array<const char*, 2> valid_one_body_density_matrices_input_sections{
+    R"(
+<estimator type="dm1b" name="DensityMatrices">
+  <parameter name="basis"        >  spo_u spo_uv  </parameter>
+  <parameter name="evaluator"    >  matrix        </parameter>
+  <parameter name="integrator"   >  density       </parameter>
+  <parameter name="samples"      >  64            </parameter>
+  <parameter name="timestep"     >  0.5           </parameter>
+  <parameter name="use_drift"    >  no            </parameter>
+</estimator>
+)",
+    R"(
+<estimator type="dm1b" name="DensityMatrices">
+  <parameter name="basis"        >  dm_basis      </parameter>
+  <parameter name="evaluator"    >  loop          </parameter>
+  <parameter name="integrator"   >  uniform       </parameter>
+  <parameter name="samples"      >  128           </parameter>
+  <parameter name="scale"        >  0.8           </parameter>
+  <parameter name="timestep"     >  0.5           </parameter>
+  <parameter name="use_drift"    >  yes           </parameter>
+</estimator>
+)"
+    // clang-format: on
+};
+
+constexpr int valid_obdm_input       = 0;
+constexpr int vlaid_obdm_input_scale = 1;
+
+} // namespace testing
+} // namespace qmcplusplus
+
+#endif
diff --git a/src/Estimators/tests/test_OneBodyDensityMatricesInput.cpp b/src/Estimators/tests/test_OneBodyDensityMatricesInput.cpp
new file mode 100644
index 0000000000..1beb4a80f6
--- /dev/null
+++ b/src/Estimators/tests/test_OneBodyDensityMatricesInput.cpp
@@ -0,0 +1,53 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2021 QMCPACK developers.
+//
+// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//
+// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "catch.hpp"
+
+#include "OneBodyDensityMatricesInput.h"
+#include "ValidOneBodyDensityMatricesInput.h"
+#include "InvalidOneBodyDensityMatricesInput.h"
+#include "EstimatorTesting.h"
+#include "ParticleSet.h"
+#include "OhmmsData/Libxml2Doc.h"
+#include "Message/UniformCommunicateError.h"
+
+#include <iostream>
+
+namespace qmcplusplus
+{
+TEST_CASE("OneBodyDensityMatricesInput::from_xml", "[estimators]")
+{
+  using POLT    = PtclOnLatticeTraits;
+  using Lattice = POLT::ParticleLayout_t;
+
+  for (auto input_xml : testing::valid_one_body_density_matrices_input_sections)
+  {
+    Libxml2Document doc;
+    bool okay = doc.parseFromString(input_xml);
+    REQUIRE(okay);
+    xmlNodePtr node = doc.getRoot();
+    OneBodyDensityMatricesInput obdmi(node);
+  }
+
+  for (auto input_xml : testing::invalid_one_body_density_matrices_input_sections)
+  {
+    Libxml2Document doc;
+    bool okay = doc.parseFromString(input_xml);
+    REQUIRE(okay);
+    xmlNodePtr node = doc.getRoot();
+    
+    CHECK_THROWS_AS(OneBodyDensityMatricesInput(node), UniformCommunicateError);
+  }
+
+}
+
+} // namespace qmcplusplus
diff --git a/src/Estimators/tests/test_SpinDensityInput.cpp b/src/Estimators/tests/test_SpinDensityInput.cpp
index 157906a7f9..64efa90125 100644
--- a/src/Estimators/tests/test_SpinDensityInput.cpp
+++ b/src/Estimators/tests/test_SpinDensityInput.cpp
@@ -14,7 +14,7 @@
 
 #include "SpinDensityInput.h"
 #include "ValidSpinDensityInput.h"
-#include "SpinDensityTesting.h"
+#include "EstimatorTesting.h"
 #include "ParticleSet.h"
 #include "OhmmsData/Libxml2Doc.h"
 
diff --git a/src/Estimators/tests/test_SpinDensityNew.cpp b/src/Estimators/tests/test_SpinDensityNew.cpp
index 6e4ec13d9d..ede76f9081 100644
--- a/src/Estimators/tests/test_SpinDensityNew.cpp
+++ b/src/Estimators/tests/test_SpinDensityNew.cpp
@@ -2,7 +2,7 @@
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
-// Copyright (c) 2020 QMCPACK developers.
+// Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Lab
 //
@@ -18,7 +18,7 @@
 #include "RandomForTest.h"
 #include "ParticleSet.h"
 #include "TrialWaveFunction.h"
-#include "SpinDensityTesting.h"
+#include "EstimatorTesting.h"
 
 #include "OhmmsData/Libxml2Doc.h"
 

From ed13bd8918b531817b350c9a5975aa6ecebeba0a Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Fri, 24 Sep 2021 10:47:26 -0400
Subject: [PATCH 09/35] Eliminate depricated find_package(CUDA) from qmcpack

Replace it with first-class language support and find_package(CUDAToolkit)

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 CMakeLists.txt                           | 59 +++++++++++-------------
 src/AFQMC/CMakeLists.txt                 | 11 ++---
 src/Particle/CMakeLists.txt              |  2 +-
 src/Platforms/CUDA/CMakeLists.txt        |  8 ++--
 src/Platforms/CUDA_legacy/CMakeLists.txt |  6 +--
 src/Platforms/tests/CUDA/CMakeLists.txt  |  2 +-
 src/QMCHamiltonians/CMakeLists.txt       |  2 +-
 src/QMCWaveFunctions/CMakeLists.txt      |  4 +-
 src/einspline/CMakeLists.txt             |  2 +-
 src/einspline/tests/CMakeLists.txt       |  2 +-
 10 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 075ebe69aa..8dab5013f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 ######################################################################
 # CMake version and policies
 ######################################################################
-cmake_minimum_required(VERSION 3.15.0)
+cmake_minimum_required(VERSION 3.17.0)
 
 # CMP0074: CMake find_package will use <PackageName>_ROOT CMake variable
 # and environment variable in search path.
@@ -14,6 +14,7 @@ cmake_policy(SET CMP0075 NEW)
 ######################################################################
 # QMCPACK project
 ######################################################################
+
 project(
   qmcpack
   VERSION 3.11.9
@@ -41,6 +42,17 @@ if(NOT QMC_CXX_STANDARD EQUAL 17)
                   "Using other versions of the C++ standard is unsupported and done entirely at user's own risk.")
 endif()
 
+#--------------------------------------------------------------------
+# Set CUDA standard
+#--------------------------------------------------------------------
+set(QMC_CUDA_STANDARD
+    14
+    CACHE STRING "QMCPACK CUDA C++ language standard")
+if(NOT QMC_CUDA_STANDARD EQUAL 14)
+	message(WARNING "C++14 is the only CUDA language standard officially supported by this QMCPACK version. "
+                        "Using other versions of the CUDA C++ standard is unsupported and done entirely at user's own risk.")
+endif()
+
 #--------------------------------------------------------------------
 # Programmind model related build options
 # MPI, OpenMP, GPU acceleration
@@ -260,6 +272,8 @@ endif()
 set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD})
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CUDA_STANDARD ${QMC_CUDA_STANDARD})
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
 
 # Check that a C++ compiler is compatible with the underlying libstdc++
 include(Testlibstdc++)
@@ -662,38 +676,19 @@ if(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
   else(QMC_CUDA2HIP)
-    # FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF
-    # It happened -ffast-math from host caused numerical issue in CUDA kernels.
-    option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-    find_package(CUDA REQUIRED)
-    set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-    #set(CUDA_NVCC_FLAGS
-    #  "-arch=sm_20;-Drestrict=__restrict__;-DNO_CUDA_MAIN;-O3;-use_fast_math")
-    if(CUDA_NVCC_FLAGS MATCHES "arch")
-      # User defined NVCC flags
-      message(STATUS "Setting CUDA FLAGS=${CUDA_NVCC_FLAGS}")
-    else(CUDA_NVCC_FLAGS MATCHES "arch")
-      # Automatically set the default NVCC flags
-      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Drestrict=__restrict__;-DNO_CUDA_MAIN;-std=c++14")
-      if(QMC_COMPLEX)
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DQMC_COMPLEX=${QMC_COMPLEX}")
-      endif()
-      if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-g;-G")
-      else()
-        # Temporarily disable fast_math because it causes multiple test failures
-        # SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3;-use_fast_math")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3")
-      endif()
-      set(CUDA_ARCH
-          sm_70
-          CACHE STRING "CUDA architecture sm_XX")
-      set(CUDA_NVCC_FLAGS "-arch=${CUDA_ARCH};${CUDA_NVCC_FLAGS}")
-    endif(CUDA_NVCC_FLAGS MATCHES "arch")
-    include_directories(${CUDA_INCLUDE_DIRS})
+    enable_language(CUDA)
+    find_package(CUDAToolkit REQUIRED)
+    if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+      set(CMAKE_CUDA_ARCHITECTURES 70)
+    endif()
+    # Automatically set the default NVCC flags
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
+    if(QMC_COMPLEX)
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}")
+    endif()
     set(HAVE_CUDA 1)
-    message("   CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
-  endif(QMC_CUDA2HIP)
+    message("   CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}")
+  endif()
 else(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.")
diff --git a/src/AFQMC/CMakeLists.txt b/src/AFQMC/CMakeLists.txt
index 0b8564a7e1..22e9cf7b98 100644
--- a/src/AFQMC/CMakeLists.txt
+++ b/src/AFQMC/CMakeLists.txt
@@ -97,16 +97,11 @@ elseif(ENABLE_HIP)
       Numerics/detail/HIP/Kernels/inplace_product.hip.cpp
       Numerics/detail/HIP/Kernels/get_diagonal.hip.cpp)
   set(AFQMC_SRCS ${AFQMC_SRCS} Memory/HIP/hip_utilities.cpp Memory/HIP/hip_arch.cpp Memory/HIP/hip_init.cpp)
-else(ENABLE_CUDA)
-
-endif(ENABLE_CUDA)
+endif()
 
 if(ENABLE_CUDA)
-  cuda_add_library(afqmc ${AFQMC_SRCS})
-  cuda_add_cublas_to_target(afqmc)
-  target_link_libraries(afqmc PRIVATE ${CUDA_cusparse_LIBRARY})
-  target_link_libraries(afqmc PRIVATE ${CUDA_cusolver_LIBRARY})
-  target_link_libraries(afqmc PRIVATE ${CUDA_curand_LIBRARY})
+  add_library(afqmc ${AFQMC_SRCS})
+  target_link_libraries(afqmc PRIVATE CUDA::curand CUDA::cusparse CUDA::cusolver CUDA::cublas)
 elseif(ENABLE_HIP)
   set_source_files_properties(${AFQMC_HIP_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
   hip_add_library(afqmc_hip_lib ${AFQMC_HIP_SRCS})
diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt
index bd23e31d67..a3e996bb67 100644
--- a/src/Particle/CMakeLists.txt
+++ b/src/Particle/CMakeLists.txt
@@ -63,7 +63,7 @@ target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime)
 
 if(QMC_CUDA)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(qmcparticle_cuda accept_kernel.cu)
+    add_library(qmcparticle_cuda accept_kernel.cu)
   else()
     hip_add_library(qmcparticle_cuda accept_kernel.cu)
   endif(NOT QMC_CUDA2HIP)
diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt
index 11a34fc793..343016fe3d 100644
--- a/src/Platforms/CUDA/CMakeLists.txt
+++ b/src/Platforms/CUDA/CMakeLists.txt
@@ -13,9 +13,11 @@ set(CUDA_RT_SRCS CUDAfill.cpp CUDAallocator.cpp CUDAruntime.cpp)
 set(CUDA_LA_SRCS cuBLAS_missing_functions.cu)
 
 if(NOT QMC_CUDA2HIP)
-  cuda_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
-  cuda_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
-  cuda_add_cublas_to_target(platform_cuda_LA)
+  add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
+  add_library(platform_cuda_LA ${CUDA_LA_SRCS})
+  target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas)
+  target_include_directories(platform_cuda_runtime PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  target_link_libraries(platform_cuda_runtime PRIVATE CUDA::cudart)
 else()
   hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
   hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt
index 1c2ecc96a3..7eb121c67a 100644
--- a/src/Platforms/CUDA_legacy/CMakeLists.txt
+++ b/src/Platforms/CUDA_legacy/CMakeLists.txt
@@ -12,9 +12,9 @@
 set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp)
 
 if(NOT QMC_CUDA2HIP)
-  cuda_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
-  cuda_add_cublas_to_target(platform_cuda_legacy)
-  target_link_libraries(platform_cuda_legacy PRIVATE ${CUDA_LIBRARIES})
+  add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
+  target_include_directories(platform_cuda_legacy PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  target_link_libraries(platform_cuda_legacy PRIVATE CUDA::cublas)
 else()
   hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
   target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA)
diff --git a/src/Platforms/tests/CUDA/CMakeLists.txt b/src/Platforms/tests/CUDA/CMakeLists.txt
index 41cb8096be..82762f78ba 100644
--- a/src/Platforms/tests/CUDA/CMakeLists.txt
+++ b/src/Platforms/tests/CUDA/CMakeLists.txt
@@ -15,7 +15,7 @@ set(UTEST_EXE test_${SRC_DIR})
 set(UTEST_NAME deterministic-unit_test_${SRC_DIR})
 
 if(NOT QMC_CUDA2HIP)
-  cuda_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
+  add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
 else()
   hip_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
 endif()
diff --git a/src/QMCHamiltonians/CMakeLists.txt b/src/QMCHamiltonians/CMakeLists.txt
index f910b2f481..bf37f918fe 100644
--- a/src/QMCHamiltonians/CMakeLists.txt
+++ b/src/QMCHamiltonians/CMakeLists.txt
@@ -99,7 +99,7 @@ else()
 endif()
 if(QMC_CUDA)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(qmcham_cuda ${HAMSRCS_CUDA})
+    add_library(qmcham_cuda ${HAMSRCS_CUDA})
   else()
     hip_add_library(qmcham_cuda ${HAMSRCS_CUDA})
   endif(NOT QMC_CUDA2HIP)
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 6c83e7edb5..ee111fbb50 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -187,7 +187,7 @@ endif(USE_OBJECT_TARGET)
 
 if(QMC_CUDA OR ENABLE_CUDA)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
+    add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
   else()
     hip_add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
     target_link_libraries(qmcwfs_cuda PUBLIC platform_LA)
@@ -208,7 +208,7 @@ target_link_libraries(qmcwfs PRIVATE einspline platform_LA Math::FFTW3)
 if(ENABLE_CUDA)
   set(DIRECT_INVERSION_SRCS detail/CUDA/cuBLAS_LU.cu)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
+    add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
   else()
     hip_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
   endif()
diff --git a/src/einspline/CMakeLists.txt b/src/einspline/CMakeLists.txt
index e85e6955c5..34ca21049e 100644
--- a/src/einspline/CMakeLists.txt
+++ b/src/einspline/CMakeLists.txt
@@ -37,7 +37,7 @@ set(SRCS
 if(QMC_CUDA)
   set(SRCS ${SRCS} multi_bspline_create_cuda.cu bspline_create_cuda.cu)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(einspline ${SRCS})
+    add_library(einspline ${SRCS})
   else()
     hip_add_library(einspline ${SRCS})
   endif(NOT QMC_CUDA2HIP)
diff --git a/src/einspline/tests/CMakeLists.txt b/src/einspline/tests/CMakeLists.txt
index 1d114fe1a5..1f1400508a 100644
--- a/src/einspline/tests/CMakeLists.txt
+++ b/src/einspline/tests/CMakeLists.txt
@@ -18,7 +18,7 @@ set(SRCS test_one.cpp test_3d.cpp)
 if(QMC_CUDA)
   set(SRCS ${SRCS} test_cuda.cu)
   if(NOT QMC_CUDA2HIP)
-    cuda_add_library(cudatests test_cuda.cu)
+    add_library(cudatests test_cuda.cu)
   else()
     hip_add_library(cudatests test_cuda.cu)
   endif(NOT QMC_CUDA2HIP)

From b607510cfc4dd3ab94579b5aaa70e7e4d86df766 Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Wed, 29 Sep 2021 15:06:19 -0400
Subject: [PATCH 10/35] Don't change required CMake version

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8dab5013f4..14ab6dbb6f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 ######################################################################
 # CMake version and policies
 ######################################################################
-cmake_minimum_required(VERSION 3.17.0)
+cmake_minimum_required(VERSION 3.15.0)
 
 # CMP0074: CMake find_package will use <PackageName>_ROOT CMake variable
 # and environment variable in search path.
@@ -676,6 +676,9 @@ if(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
   else(QMC_CUDA2HIP)
+    if (CMAKE_VERSION VERSION_LESS 3.17.0)
+      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later")
+    endif()
     enable_language(CUDA)
     find_package(CUDAToolkit REQUIRED)
     if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)

From 6601fe903d535a70fac040b46d29e77c00955392 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 16:23:13 -0500
Subject: [PATCH 11/35] Fix accidentally moved executables by #3467

---
 src/QMCTools/CMakeLists.txt | 1 +
 src/Sandbox/CMakeLists.txt  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/QMCTools/CMakeLists.txt b/src/QMCTools/CMakeLists.txt
index 1c48896f66..5f358da550 100644
--- a/src/QMCTools/CMakeLists.txt
+++ b/src/QMCTools/CMakeLists.txt
@@ -17,6 +17,7 @@
 
 project(qmctools)
 
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${qmcpack_BINARY_DIR}/bin)
 add_executable(convert4qmc convert4qmc.cpp QMCGaussianParserBase.cpp GaussianFCHKParser.cpp GamesAsciiParser.cpp
     LCAOHDFParser.cpp DiracParser.cpp)
 
diff --git a/src/Sandbox/CMakeLists.txt b/src/Sandbox/CMakeLists.txt
index eaea5cc545..c371a21443 100644
--- a/src/Sandbox/CMakeLists.txt
+++ b/src/Sandbox/CMakeLists.txt
@@ -1,5 +1,6 @@
 project(Sandbox)
 
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${qmcpack_BINARY_DIR}/bin)
 # add apps XYZ.cpp, e.g., qmc_particles.cpp
 set(ESTEST diff_distancetables einspline_spo einspline_spo_nested determinant restart determinant_delayed_update)
 

From d8b9ea4ffff501adeea4292534776ebe0dc0f586 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 16:30:43 -0500
Subject: [PATCH 12/35] Need special care in src/QMCTools/tests

---
 src/QMCTools/tests/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/QMCTools/tests/CMakeLists.txt b/src/QMCTools/tests/CMakeLists.txt
index 2be1e44c11..c4d276111e 100644
--- a/src/QMCTools/tests/CMakeLists.txt
+++ b/src/QMCTools/tests/CMakeLists.txt
@@ -10,6 +10,7 @@
 #//////////////////////////////////////////////////////////////////////////////////////
 
 
+unset(CMAKE_RUNTIME_OUTPUT_DIRECTORY)
 set(SRC_DIR tools)
 set(UTEST_EXE test_${SRC_DIR})
 set(UTEST_NAME deterministic-unit_test_${SRC_DIR})

From 761745bfaeb1bcd38c012169a8c5d4a98fac2506 Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Wed, 29 Sep 2021 17:36:10 -0400
Subject: [PATCH 13/35] Apply changes recommended by @ye-lou.

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 CMakeLists.txt                           | 22 +++++++---------------
 src/Platforms/CUDA/CMakeLists.txt        |  7 +++----
 src/Platforms/CUDA_legacy/CMakeLists.txt |  3 +--
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 14ab6dbb6f..24a7e9bfac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,7 +14,6 @@ cmake_policy(SET CMP0075 NEW)
 ######################################################################
 # QMCPACK project
 ######################################################################
-
 project(
   qmcpack
   VERSION 3.11.9
@@ -42,17 +41,6 @@ if(NOT QMC_CXX_STANDARD EQUAL 17)
                   "Using other versions of the C++ standard is unsupported and done entirely at user's own risk.")
 endif()
 
-#--------------------------------------------------------------------
-# Set CUDA standard
-#--------------------------------------------------------------------
-set(QMC_CUDA_STANDARD
-    14
-    CACHE STRING "QMCPACK CUDA C++ language standard")
-if(NOT QMC_CUDA_STANDARD EQUAL 14)
-	message(WARNING "C++14 is the only CUDA language standard officially supported by this QMCPACK version. "
-                        "Using other versions of the CUDA C++ standard is unsupported and done entirely at user's own risk.")
-endif()
-
 #--------------------------------------------------------------------
 # Programmind model related build options
 # MPI, OpenMP, GPU acceleration
@@ -272,8 +260,9 @@ endif()
 set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD})
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
-set(CMAKE_CUDA_STANDARD ${QMC_CUDA_STANDARD})
+set(CMAKE_CUDA_STANDARD ${QMC_CXX_STANDARD})
 set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CUDA_EXTENSIONS OFF)
 
 # Check that a C++ compiler is compatible with the underlying libstdc++
 include(Testlibstdc++)
@@ -679,11 +668,14 @@ if(QMC_CUDA OR ENABLE_CUDA)
     if (CMAKE_VERSION VERSION_LESS 3.17.0)
       message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later")
     endif()
-    enable_language(CUDA)
-    find_package(CUDAToolkit REQUIRED)
+    if(DEFINED CUDA_ARCH)
+      message(FATAL_ERROR "Use CMAKE_CUDA_ARCHITECTURES instead of CUDA_ARCH variable")
+    endif()
     if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
       set(CMAKE_CUDA_ARCHITECTURES 70)
     endif()
+    enable_language(CUDA)
+    find_package(CUDAToolkit REQUIRED)
     # Automatically set the default NVCC flags
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
     if(QMC_COMPLEX)
diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt
index 343016fe3d..a2cadf6019 100644
--- a/src/Platforms/CUDA/CMakeLists.txt
+++ b/src/Platforms/CUDA/CMakeLists.txt
@@ -15,9 +15,8 @@ set(CUDA_LA_SRCS cuBLAS_missing_functions.cu)
 if(NOT QMC_CUDA2HIP)
   add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
   add_library(platform_cuda_LA ${CUDA_LA_SRCS})
-  target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas)
-  target_include_directories(platform_cuda_runtime PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-  target_link_libraries(platform_cuda_runtime PRIVATE CUDA::cudart)
+  target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver)
+  target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
 else()
   hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
   hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
@@ -25,4 +24,4 @@ else()
   target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
 endif()
 
-target_link_libraries(platform_cuda_LA PRIVATE ${CUDA_cusolver_LIBRARY} platform_cuda_runtime)
+target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime)
diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt
index 7eb121c67a..af20038d91 100644
--- a/src/Platforms/CUDA_legacy/CMakeLists.txt
+++ b/src/Platforms/CUDA_legacy/CMakeLists.txt
@@ -13,8 +13,7 @@ set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp)
 
 if(NOT QMC_CUDA2HIP)
   add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
-  target_include_directories(platform_cuda_legacy PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-  target_link_libraries(platform_cuda_legacy PRIVATE CUDA::cublas)
+  target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas)
 else()
   hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
   target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA)

From 707977ed689a145a1a1dc5d03b276255780457fb Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 17:22:44 -0500
Subject: [PATCH 14/35] More accurate stopper message.

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 24a7e9bfac..215ada295d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -669,7 +669,8 @@ if(QMC_CUDA OR ENABLE_CUDA)
       message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later")
     endif()
     if(DEFINED CUDA_ARCH)
-      message(FATAL_ERROR "Use CMAKE_CUDA_ARCHITECTURES instead of CUDA_ARCH variable")
+      unset(CUDA_ARCH CACHE)
+      message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
     endif()
     if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
       set(CMAKE_CUDA_ARCHITECTURES 70)

From 734415c99f811275bfdd14edf608986abb746e23 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 17:24:08 -0500
Subject: [PATCH 15/35] CMAKE_CUDA_ARCHITECTURES needs CMake 3.18.

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 215ada295d..b8b013d4d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -665,8 +665,8 @@ if(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
   else(QMC_CUDA2HIP)
-    if (CMAKE_VERSION VERSION_LESS 3.17.0)
-      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later")
+    if (CMAKE_VERSION VERSION_LESS 3.18.0)
+      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
     endif()
     if(DEFINED CUDA_ARCH)
       unset(CUDA_ARCH CACHE)

From 57e55d33e21134d16ebc2d25f5d5c204f656b24f Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 17:30:50 -0500
Subject: [PATCH 16/35] Set back CUDA default to C++14.

---
 CMakeLists.txt | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b8b013d4d9..b86fd6e287 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -260,9 +260,6 @@ endif()
 set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD})
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
-set(CMAKE_CUDA_STANDARD ${QMC_CXX_STANDARD})
-set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
-set(CMAKE_CUDA_EXTENSIONS OFF)
 
 # Check that a C++ compiler is compatible with the underlying libstdc++
 include(Testlibstdc++)
@@ -672,6 +669,12 @@ if(QMC_CUDA OR ENABLE_CUDA)
       unset(CUDA_ARCH CACHE)
       message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
     endif()
+    # a few production machines use CUDA 10 which only supports C++14.
+    if(NOT DEFINED CMAKE_CUDA_STANDARD)
+      set(CMAKE_CUDA_STANDARD 14)
+    endif()
+    set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+    set(CMAKE_CUDA_EXTENSIONS OFF)
     if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
       set(CMAKE_CUDA_ARCHITECTURES 70)
     endif()

From ebf78496ec445f5d5063288a7323c5fd2aafb667 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 19:02:47 -0500
Subject: [PATCH 17/35] Make platform_cuda_legacy depend on CUDA::cudart

---
 src/Platforms/CUDA/CMakeLists.txt        | 4 ++--
 src/Platforms/CUDA_legacy/CMakeLists.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt
index a2cadf6019..f84c63ee1c 100644
--- a/src/Platforms/CUDA/CMakeLists.txt
+++ b/src/Platforms/CUDA/CMakeLists.txt
@@ -14,14 +14,14 @@ set(CUDA_LA_SRCS cuBLAS_missing_functions.cu)
 
 if(NOT QMC_CUDA2HIP)
   add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
+  target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
   add_library(platform_cuda_LA ${CUDA_LA_SRCS})
   target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver)
-  target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
 else()
   hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
+  target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
   hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
   target_link_libraries(platform_cuda_runtime PUBLIC platform_rocm_runtime)
-  target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
 endif()
 
 target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime)
diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt
index af20038d91..d97c90c1de 100644
--- a/src/Platforms/CUDA_legacy/CMakeLists.txt
+++ b/src/Platforms/CUDA_legacy/CMakeLists.txt
@@ -13,7 +13,7 @@ set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp)
 
 if(NOT QMC_CUDA2HIP)
   add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
-  target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas)
+  target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas CUDA::cudart)
 else()
   hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
   target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA)

From b39aea6a7861a3779f9f641154eda250b226acd6 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Wed, 29 Sep 2021 19:06:57 -0500
Subject: [PATCH 18/35] Set CMAKE_CUDA_ARCHITECTURES early.

---
 CMake/ClangCompilers.cmake |  4 ++++
 CMake/NVHPCCompilers.cmake |  8 ++++++--
 CMakeLists.txt             | 18 +++++++++++-------
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index bed72d755d..7128c96c3b 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -19,6 +19,10 @@ if(QMC_OMP)
         CACHE STRING "Offload target architecture")
     set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}")
 
+    if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
+    endif()
+
     if(DEFINED OFFLOAD_ARCH)
       set(OPENMP_OFFLOAD_COMPILE_OPTIONS
           "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")
diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
index 6acb60d496..9128b3c9b6 100644
--- a/CMake/NVHPCCompilers.cmake
+++ b/CMake/NVHPCCompilers.cmake
@@ -8,8 +8,12 @@ if(QMC_OMP)
   if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
     if(NOT DEFINED OFFLOAD_ARCH)
-      message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
-                          "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+      if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+        set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
+                            "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+      endif()
     endif()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu")
     set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b86fd6e287..8c44c52e3f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,17 @@ if(ENABLE_CUDA AND QMC_CUDA)
   message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!")
 endif(ENABLE_CUDA AND QMC_CUDA)
 
+# set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it
+if(ENABLE_CUDA OR QMC_CUDA AND NOT QMC_CUDA2HIP)
+  if(DEFINED CUDA_ARCH)
+    unset(CUDA_ARCH CACHE)
+    message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
+  endif()
+  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES 70)
+  endif()
+endif()
+
 #--------------------------------------------------------------------
 # Set compiler-time parameters
 # WALKER_MAX_PROPERTIES max number of observables + 12 or so standard
@@ -665,19 +676,12 @@ if(QMC_CUDA OR ENABLE_CUDA)
     if (CMAKE_VERSION VERSION_LESS 3.18.0)
       message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
     endif()
-    if(DEFINED CUDA_ARCH)
-      unset(CUDA_ARCH CACHE)
-      message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
-    endif()
     # a few production machines use CUDA 10 which only supports C++14.
     if(NOT DEFINED CMAKE_CUDA_STANDARD)
       set(CMAKE_CUDA_STANDARD 14)
     endif()
     set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
     set(CMAKE_CUDA_EXTENSIONS OFF)
-    if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-      set(CMAKE_CUDA_ARCHITECTURES 70)
-    endif()
     enable_language(CUDA)
     find_package(CUDAToolkit REQUIRED)
     # Automatically set the default NVCC flags

From adca6d172759c93d9014d9e8789d3be4f84d0635 Mon Sep 17 00:00:00 2001
From: Jaron Krogel <krogeljt@ornl.gov>
Date: Thu, 30 Sep 2021 07:48:53 -0400
Subject: [PATCH 19/35] nexus: fix convert4qmc usage

---
 nexus/lib/qmcpack_converters.py                        | 10 ++++------
 nexus/tests/unit/test_qmcpack_converter_simulations.py |  6 ++++--
 nexus/tests/unit/test_qmcpack_simulation.py            |  7 +++++--
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/nexus/lib/qmcpack_converters.py b/nexus/lib/qmcpack_converters.py
index f4c66562dc..09867ac56e 100644
--- a/nexus/lib/qmcpack_converters.py
+++ b/nexus/lib/qmcpack_converters.py
@@ -799,10 +799,8 @@ def get_result(self,result_name,sim):
         wfn_file,ptcl_file = self.list_output_files()
         if result_name=='orbitals':
             result.location = os.path.join(self.locdir,wfn_file)
-            if self.input.hdf5==True:
-                orbfile = self.get_prefix()+'.orbs.h5'
-                result.orbfile = os.path.join(self.locdir,orbfile)
-            #end if
+            orbfile = self.get_prefix()+'.orbs.h5'
+            result.orbfile = os.path.join(self.locdir,orbfile)
         elif result_name=='particles':
             result.location = os.path.join(self.locdir,ptcl_file)
         else:
@@ -841,7 +839,7 @@ def incorporate_result(self,result_name,result,sim):
             self.input_code = 'pyscf'
             if result_name=='orbitals':
                 orbpath = os.path.relpath(result.h5_file,self.locdir)
-                input.pyscf = orbpath
+                input.orbitals = orbpath
             else:
                 implemented = False
             #end if
@@ -849,7 +847,7 @@ def incorporate_result(self,result_name,result,sim):
             self.input_code = 'qp'
             if result_name=='orbitals':
                 orbpath = os.path.relpath(result.outfile,self.locdir)
-                input.qp = orbpath
+                input.orbitals = orbpath
             else:
                 implemented = False
             #end if
diff --git a/nexus/tests/unit/test_qmcpack_converter_simulations.py b/nexus/tests/unit/test_qmcpack_converter_simulations.py
index 0704ab4176..ba82d7bc1b 100644
--- a/nexus/tests/unit/test_qmcpack_converter_simulations.py
+++ b/nexus/tests/unit/test_qmcpack_converter_simulations.py
@@ -251,6 +251,7 @@ def test_convert4qmc_get_result():
 
     result_ref = obj(
         location = './runs/sample.wfj.xml',
+        orbfile  = './runs/sample.orbs.h5',
         )
 
     assert(object_eq(result,result_ref))
@@ -338,7 +339,7 @@ def test_convert4qmc_incorporate_result():
     sim.incorporate_result('orbitals',pscf_result,pscf)
 
     assert(sim.input_code=='pyscf')
-    assert(sim.input.pyscf=='../scf.h5')
+    assert(sim.input.orbitals=='../scf.h5')
     
     # incorporate orbitals from quantum package
     sim = sim_start.copy()
@@ -349,7 +350,8 @@ def test_convert4qmc_incorporate_result():
     sim.incorporate_result('orbitals',qp_result,qp)
 
     assert(sim.input_code=='qp')
-    assert(sim.input.qp=='../qp_savewf.out')
+    #assert(sim.input.qp=='../qp_savewf.out')
+    assert(sim.input.orbitals=='../qp_savewf.out')
 
     clear_all_sims()
 #end def test_convert4qmc_incorporate_result
diff --git a/nexus/tests/unit/test_qmcpack_simulation.py b/nexus/tests/unit/test_qmcpack_simulation.py
index 2a3fdaa523..4c8f7c7237 100644
--- a/nexus/tests/unit/test_qmcpack_simulation.py
+++ b/nexus/tests/unit/test_qmcpack_simulation.py
@@ -272,7 +272,8 @@ def test_incorporate_result():
 
     result = c4q_orb.get_result('orbitals',None)
 
-    wfn_file = os.path.join(tpath,'c4q_orbitals.wfj.xml')
+    wfn_file  = os.path.join(tpath,'c4q_orbitals.wfj.xml')
+    wfn_file2 = os.path.join(tpath,'c4q_orbitals.orbs.h5')
     input = sim.input.copy()
     dset = input.get('determinantset')
     dset.href = 'orbs.h5'
@@ -281,6 +282,8 @@ def test_incorporate_result():
     input.qmcsystem = qs
     input.write(wfn_file)
     assert(os.path.exists(wfn_file))
+    open(wfn_file2,'w').write('fake')
+    assert(os.path.exists(wfn_file2))
 
     from qmcpack_input import QmcpackInput
     inp = QmcpackInput(wfn_file)
@@ -291,7 +294,7 @@ def test_incorporate_result():
     sim.incorporate_result('orbitals',result,c4q_orb)
 
     dset = sim.input.get('determinantset')
-    assert(dset.href=='orbs.h5')
+    assert(dset.href=='c4q_orbitals.orbs.h5')
 
 
     # incorporate qmcpack jastrow

From 058e4465180197b00157785a2d0012ea13df7369 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 10:06:18 -0400
Subject: [PATCH 20/35] Update recipes under config for CUDA change.

---
 CMakeLists.txt                    |  2 +-
 config/build_olcf_summit.sh       |  4 ++--
 config/build_olcf_summit_Clang.sh | 15 ++++++++-------
 config/build_tulip.sh             |  2 +-
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c44c52e3f..9ff80f9b64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -690,7 +690,7 @@ if(QMC_CUDA OR ENABLE_CUDA)
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}")
     endif()
     set(HAVE_CUDA 1)
-    message("   CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}")
+    message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
   endif()
 else(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
diff --git a/config/build_olcf_summit.sh b/config/build_olcf_summit.sh
index dc40f90f57..2429214ae5 100755
--- a/config/build_olcf_summit.sh
+++ b/config/build_olcf_summit.sh
@@ -12,8 +12,8 @@ echo "Either source $BUILD_MODULES or load these same modules to run QMCPACK"
 
 declare -A builds=( ["cpu"]=" -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
                     ["complex_cpu"]="-DQMC_COMPLEX=1  -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
-                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCUDA_ARCH=sm_70 " \
-		    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCUDA_ARCH=sm_70 " )
+                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 " \
+                    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCMAKE_CUDA_ARCHITECTURES=70 " )
 
 mkdir bin
 
diff --git a/config/build_olcf_summit_Clang.sh b/config/build_olcf_summit_Clang.sh
index 1defa4ce34..598734a300 100755
--- a/config/build_olcf_summit_Clang.sh
+++ b/config/build_olcf_summit_Clang.sh
@@ -32,28 +32,29 @@ module load llvm/main-20210811-cuda10.1
 TYPE=Release
 Compiler=Clang
 
-source_folder=..
+source_folder=~/opt/qmcpack
 
 for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \
             cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx
 do
 
-CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D QMC_MATH_VENDOR=IBM_MASS -D MASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=`which jsrun` -D MPIEXEC_NUMPROC_FLAG='-n' -D MPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off'"
+CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -DMPIEXEC_EXECUTABLE=`which jsrun` -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off' -DCMAKE_CXX_STANDARD_LIBRARIES=/sw/summit/gcc/9.3.0-2/lib64/libstdc++.a"
 
 if [[ $name == *"cplx"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON"
 fi
 
 if [[ $name == *"_MP"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_MIXED_PRECISION=1"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON"
 fi
 
 if [[ $name == *"offload"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DUSE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70"
 fi
 
 if [[ $name == *"cuda"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D CUDA_HOST_COMPILER=/usr/bin/gcc -D CUDA_NVCC_FLAGS='-Xcompiler;-mno-float128'"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++"
+  CUDA_FLAGS="-Xcompiler -mno-float128"
 fi
 
 folder=build_summit_${Compiler}_${name}
@@ -64,7 +65,7 @@ echo "**********************************"
 mkdir $folder
 cd $folder
 if [ ! -f CMakeCache.txt ] ; then
-cmake $CMAKE_FLAGS -D CMAKE_C_COMPILER=mpicc -D CMAKE_CXX_COMPILER=mpicxx $source_folder
+cmake $CMAKE_FLAGS -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_CUDA_FLAGS="$CUDA_FLAGS" $source_folder
 cmake .
 fi
 make -j16
diff --git a/config/build_tulip.sh b/config/build_tulip.sh
index 717afc09bd..324c93bac3 100644
--- a/config/build_tulip.sh
+++ b/config/build_tulip.sh
@@ -47,7 +47,7 @@ elif [[ $build == *"MI60"* ]]; then
 fi
 
 if [[ $build == *"cuda"* ]]; then
-  CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCUDA_ARCH=sm_70 -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_ROOT -DCUDA_HOST_COMPILER=`which gcc`"
+  CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDAToolkit_ROOT=$CUDA_ROOT -DCMAKE_CUDA_HOST_COMPILER=`which g++`"
 fi
 
 if [[ $build == *"cplx"* ]]; then

From d40feac6243b9837d7165f8aefaf38eee35ec4a7 Mon Sep 17 00:00:00 2001
From: Jaron Krogel <krogeljt@ornl.gov>
Date: Thu, 30 Sep 2021 14:46:40 -0400
Subject: [PATCH 21/35] nexus: fix syntax warnings

---
 nexus/lib/pwscf_analyzer.py | 6 +++---
 nexus/lib/qmcpack_input.py  | 2 +-
 nexus/lib/structure.py      | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/nexus/lib/pwscf_analyzer.py b/nexus/lib/pwscf_analyzer.py
index 6efee89286..03ee461a89 100644
--- a/nexus/lib/pwscf_analyzer.py
+++ b/nexus/lib/pwscf_analyzer.py
@@ -924,14 +924,14 @@ def plot_bandstructure(self, filename=None, filepath=None, max_min_e = None, sho
                 #end if              
             #end for
             for ln, li in enumerate(labels):
-                if li is not '':
+                if li != '':
                     axvline(x[ln], ymin=-100, ymax=100, linewidth=3, color='k')
                     if li == 'GAMMA':
                         labels[ln] = r'$\Gamma$'
-                    elif li is not '':
+                    elif li != '':
                         labels[ln] = '${0}$'.format(li)
                     #end if
-                    if labels[ln-1] is not '' and ln > 0:
+                    if labels[ln-1] != '' and ln > 0:
                         labels[ln] = labels[ln-1]+'|'+labels[ln]
                         labels[ln-1] = ''
                     #end if
diff --git a/nexus/lib/qmcpack_input.py b/nexus/lib/qmcpack_input.py
index 58ad64a71f..8cdacb669b 100644
--- a/nexus/lib/qmcpack_input.py
+++ b/nexus/lib/qmcpack_input.py
@@ -5377,7 +5377,7 @@ def generate_jastrow1(function='bspline',size=8,rcut=None,coeff=None,cusp=0.,ena
     corrs = []
     for i in range(len(elements)):
         element = elements[i]
-        if cusp is 'Z':
+        if cusp == 'Z':
             QmcpackInput.class_error('need to implement Z cusp','generate_jastrow1')
         else:
             lcusp  = cusp
diff --git a/nexus/lib/structure.py b/nexus/lib/structure.py
index 7580b06d3f..89b5dcda28 100644
--- a/nexus/lib/structure.py
+++ b/nexus/lib/structure.py
@@ -5562,7 +5562,7 @@ def _getseekpath(
         structure = structure.folded_structure
     #end if
     structure = structure.copy()
-    if structure.units is not 'A':
+    if structure.units != 'A':
         structure.change_units('A')
     #end if
     axes       = structure.axes
@@ -6783,9 +6783,9 @@ def __init__(self,
         pos  = []
         if basis_vectors is None:
             basis_vectors = axes
-        elif basis_vectors is 'primitive':
+        elif basis_vectors=='primitive':
             basis_vectors = axes_prim
-        elif basis_vectors is 'conventional':
+        elif basis_vectors=='conventional':
             basis_vectors = axes_conv
         #end if
         nbasis = len(atoms)

From 278b8e4f1523d5fd2679ed2f9e4933ed883ad157 Mon Sep 17 00:00:00 2001
From: Jaron Krogel <krogeljt@ornl.gov>
Date: Thu, 30 Sep 2021 15:08:18 -0400
Subject: [PATCH 22/35] nexus: protect equality checks

---
 nexus/lib/structure.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/nexus/lib/structure.py b/nexus/lib/structure.py
index 89b5dcda28..4c72ed4727 100644
--- a/nexus/lib/structure.py
+++ b/nexus/lib/structure.py
@@ -6783,10 +6783,12 @@ def __init__(self,
         pos  = []
         if basis_vectors is None:
             basis_vectors = axes
-        elif basis_vectors=='primitive':
-            basis_vectors = axes_prim
-        elif basis_vectors=='conventional':
-            basis_vectors = axes_conv
+        elif isinstance(basis_vectors,str):
+            if basis_vectors=='primitive':
+                basis_vectors = axes_prim
+            elif basis_vectors=='conventional':
+                basis_vectors = axes_conv
+            #end if
         #end if
         nbasis = len(atoms)
         for point in points:

From afd22943d1aaccbc43ce2c861bab4030fdd7313c Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 30 Sep 2021 16:06:50 -0400
Subject: [PATCH 23/35] Check LLVM offload only contains one architecture

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 CMake/ClangCompilers.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index 7128c96c3b..9638b6aef1 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -20,6 +20,10 @@ if(QMC_OMP)
     set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}")
 
     if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NOT NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        message(FATAL_ERROR "LLVM does not support offload to multiple architectures!")
+      endif()
       set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
     endif()
 

From 8f1b5af1742588d7fe0bf0ae11a34828bee138e8 Mon Sep 17 00:00:00 2001
From: Steven Hahn <hahnse@ornl.gov>
Date: Thu, 30 Sep 2021 16:30:05 -0400
Subject: [PATCH 24/35] update documentation

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
---
 docs/installation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/installation.rst b/docs/installation.rst
index b941e284c1..c218502bc2 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -289,7 +289,7 @@ the path to the source directory.
                           particularly for large electron counts.
     ENABLE_CUDA           ON/OFF(default). Enable CUDA code path for NVIDIA GPU acceleration.
                           Production quality for AFQMC. Pre-production quality for real-space.
-                          Use CUDA_ARCH, default sm_70, to set the actual GPU architecture.
+                          Use CMAKE_CUDA_ARCHITECTURES, default 70, to set the actual GPU architecture.
     ENABLE_OFFLOAD        ON/OFF(default). Enable OpenMP target offload for GPU acceleration.
     ENABLE_TIMERS         ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse
                           to avoid potential slowdown in tiny systems.
@@ -448,7 +448,7 @@ For example, using Clang 11 on Summit.
 
   ::
   
-    -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CUDA_ARCH=sm_70 -D CUDA_HOST_COMPILER=`which gcc`
+    -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CUDA_HOST_COMPILER=`which gcc`
 
 
 Installation from CMake

From be7893e67aeef6964fe9bfb0229e5e09698f6a92 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 16:58:44 -0500
Subject: [PATCH 25/35] Our header only wrappers needs cuda include path.

---
 src/Platforms/CUDA/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt
index f84c63ee1c..ca1dd00921 100644
--- a/src/Platforms/CUDA/CMakeLists.txt
+++ b/src/Platforms/CUDA/CMakeLists.txt
@@ -16,7 +16,7 @@ if(NOT QMC_CUDA2HIP)
   add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
   target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
   add_library(platform_cuda_LA ${CUDA_LA_SRCS})
-  target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver)
+  target_link_libraries(platform_cuda_LA PUBLIC CUDA::cublas CUDA::cusolver)
 else()
   hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
   target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
@@ -24,4 +24,4 @@ else()
   target_link_libraries(platform_cuda_runtime PUBLIC platform_rocm_runtime)
 endif()
 
-target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime)
+target_link_libraries(platform_cuda_LA PUBLIC platform_cuda_runtime)

From a9e69a14d3634299298796a333e3ff29089f53ae Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 18:43:55 -0500
Subject: [PATCH 26/35] Make NVHPC support CMAKE_CUDA_ARCHITECTURES as a list.

---
 CMake/ClangCompilers.cmake | 9 ++++++---
 CMake/NVHPCCompilers.cmake | 8 +++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index 9638b6aef1..7c38a6295d 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -21,10 +21,13 @@ if(QMC_OMP)
 
     if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
       list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
-      if(NOT NUMBER_CUDA_ARCHITECTURES EQUAL "1")
-        message(FATAL_ERROR "LLVM does not support offload to multiple architectures!")
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
+                            "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
+                            "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH")
       endif()
-      set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
     endif()
 
     if(DEFINED OFFLOAD_ARCH)
diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
index 9128b3c9b6..c02531cc5a 100644
--- a/CMake/NVHPCCompilers.cmake
+++ b/CMake/NVHPCCompilers.cmake
@@ -9,7 +9,13 @@ if(QMC_OMP)
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
     if(NOT DEFINED OFFLOAD_ARCH)
       if(DEFINED CMAKE_CUDA_ARCHITECTURES)
-        set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
+        list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+        if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+          set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
+        else()
+          string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
+          set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}")
+        endif()
       else()
         message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
                             "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")

From fe7fba7087f1f44cd011217e141db38444720216 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 19:00:23 -0500
Subject: [PATCH 27/35] Allow OFFLOAD_ARCH not being set for NVHPC.

---
 CMake/NVHPCCompilers.cmake | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
index c02531cc5a..f4ef80aba4 100644
--- a/CMake/NVHPCCompilers.cmake
+++ b/CMake/NVHPCCompilers.cmake
@@ -7,22 +7,24 @@ if(QMC_OMP)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores")
   if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
-    if(NOT DEFINED OFFLOAD_ARCH)
-      if(DEFINED CMAKE_CUDA_ARCHITECTURES)
-        list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
-        if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
-          set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
-        else()
-          string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
-          set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}")
-        endif()
+    if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
       else()
+        string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
+        set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}")
+      endif()
+    endif()
+
+    if(DEFINED OFFLOAD_ARCH)
+      if(NOT OFFLOAD_ARCH MATCHES "cc")
         message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
                             "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
       endif()
+      set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
     endif()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu")
-    set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
   else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
   endif()

From 587634d816b20c12836ccb321ad02ac99ac064b2 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 19:10:31 -0500
Subject: [PATCH 28/35] Update installation.rst

---
 CMake/ClangCompilers.cmake |  2 +-
 docs/installation.rst      | 10 ++--------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
index 7c38a6295d..eab5cf13df 100644
--- a/CMake/ClangCompilers.cmake
+++ b/CMake/ClangCompilers.cmake
@@ -26,7 +26,7 @@ if(QMC_OMP)
       else()
         message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
                             "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
-                            "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH")
+                            "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
       endif()
     endif()
 
diff --git a/docs/installation.rst b/docs/installation.rst
index c218502bc2..a1c46191f9 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -416,13 +416,7 @@ to be reached. The following compilers have been verified:
   ::
 
     OFFLOAD_TARGET for the offload target. default nvptx64-nvidia-cuda.
-    OFFLOAD_ARCH for the target architecture if not using the compiler default.
-
-- IBM XL 16.1. Support NVIDIA GPUs.
-  
-  ::
-
-    -D ENABLE_OFFLOAD=ON
+    OFFLOAD_ARCH for the target architecture (sm_80, gfx906, ...) if not using the compiler default.
 
 - AMD AOMP Clang 11.8. Support AMD GPUs.
   
@@ -448,7 +442,7 @@ For example, using Clang 11 on Summit.
 
   ::
   
-    -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CUDA_HOST_COMPILER=`which gcc`
+    -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CMAKE_CUDA_HOST_COMPILER=`which gcc`
 
 
 Installation from CMake

From 517dea7090ca3f75ba6e2e235fa2008d6816917f Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 01:14:49 -0500
Subject: [PATCH 29/35] Cure non-determinisitic offload J2.

Reproducer:
NiO a64 batched_driver performance test. Run 1 VMC step with 1 thread over and over.
The scalar.dat is not deterministic. Kinetic is different.
mw_updateVGL. Inject print before and after the offload region. walker 13 and electron 741.
Sometimes the value is not updated even if a walker is accepted.
---
 src/QMCWaveFunctions/Jastrow/BsplineFunctor.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
index 9358bbd087..d6ea578045 100644
--- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
+++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
@@ -218,6 +218,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
       PRAGMA_OFFLOAD("omp parallel for reduction(+: val_sum, grad_x, grad_y, grad_z, lapl)")
       for (int j = 0; j < n_src; j++)
       {
+        if (j == iat) continue;
         const int ig    = grp_ids[j];
         const T* coefs  = mw_coefs[ig];
         T DeltaRInv     = mw_DeltaRInv[ig];
@@ -227,7 +228,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
         T u(0);
         T dudr(0);
         T d2udr2(0);
-        if (j != iat && r < cutoff_radius)
+        if (r < cutoff_radius)
         {
           u = evaluate_impl(dist[j], coefs, DeltaRInv, dudr, d2udr2);
           dudr *= T(1) / r;
@@ -541,7 +542,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
       T* mw_DeltaRInv       = reinterpret_cast<T*>(transfer_buffer_ptr + sizeof(T*) * num_groups);
       T* mw_cutoff_radius   = mw_DeltaRInv + num_groups;
       int* accepted_indices = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(T*) + sizeof(T) * 2) * num_groups);
-      int ip                = accepted_indices[iw];
+      const int ip          = accepted_indices[iw];
 
       const T* dist_new   = mw_dist + ip * dist_stride;
       const T* dipl_x_new = dist_new + n_padded;
@@ -564,6 +565,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
       PRAGMA_OFFLOAD("omp parallel for")
       for (int j = 0; j < n_src; j++)
       {
+        if (j == iat) continue;
         const int ig    = grp_ids[j];
         const T* coefs  = mw_coefs[ig];
         T DeltaRInv     = mw_DeltaRInv[ig];
@@ -573,7 +575,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
         T u(0);
         T dudr(0);
         T d2udr2(0);
-        if (j != iat && r < cutoff_radius)
+        if (r < cutoff_radius)
         {
           u = evaluate_impl(dist_old[j], coefs, DeltaRInv, dudr, d2udr2);
           dudr *= T(1) / r;

From d546e4e3730648aaa8a458191d79cb718a1e43fe Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 30 Sep 2021 22:19:53 -0500
Subject: [PATCH 30/35] Minimize recompute in J2.

---
 src/Containers/OhmmsSoA/VectorSoaContainer.h |  3 +-
 src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp | 41 +++++++++++++++++---
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h
index 40f0021139..86aad416c5 100644
--- a/src/Containers/OhmmsSoA/VectorSoaContainer.h
+++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h
@@ -62,7 +62,8 @@ struct VectorSoaContainer
   {
     if (myData != in.myData)
     {
-      resize(in.nLocal);
+      if (nLocal != in.nLocal)
+        resize(in.nLocal);
       std::copy_n(in.myData, nGhosts * D, myData);
     }
     return *this;
diff --git a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
index 61fad92901..1ca950b601 100644
--- a/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
+++ b/src/QMCWaveFunctions/Jastrow/J2OMPTarget.cpp
@@ -69,12 +69,23 @@ void J2OMPTarget<FT>::acquireResource(ResourceCollection& collection,
   mw_allUat.resize(N_padded * (DIM + 2) * nw);
   for (size_t iw = 0; iw < nw; iw++)
   {
-    size_t offset = N_padded * (DIM + 2) * iw;
-    auto& wfc     = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    // copy per walker Uat, dUat, d2Uat to shared buffer and attach buffer
+    auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+
+    Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
+    Uat_view = wfc.Uat;
     wfc.Uat.free();
     wfc.Uat.attachReference(mw_allUat.data() + iw * N_padded, N);
+
+    VectorSoaContainer<valT, DIM, aligned_allocator<valT>> dUat_view(mw_allUat.data() + nw * N_padded +
+                                                                         iw * N_padded * DIM,
+                                                                     N, N_padded);
+    dUat_view = wfc.dUat;
     wfc.dUat.free();
     wfc.dUat.attachReference(N, N_padded, mw_allUat.data() + nw * N_padded + iw * N_padded * DIM);
+
+    Vector<valT, aligned_allocator<valT>> d2Uat_view(mw_allUat.data() + nw * N_padded * (DIM + 1) + iw * N_padded, N);
+    d2Uat_view = wfc.d2Uat;
     wfc.d2Uat.free();
     wfc.d2Uat.attachReference(mw_allUat.data() + nw * N_padded * (DIM + 1) + iw * N_padded, N);
   }
@@ -86,14 +97,31 @@ void J2OMPTarget<FT>::releaseResource(ResourceCollection& collection,
                                       const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
 {
   auto& wfc_leader = wfc_list.getCastedLeader<J2OMPTarget<FT>>();
-  collection.takebackResource(std::move(wfc_leader.mw_mem_));
-  for (size_t iw = 0; iw < wfc_list.size(); iw++)
+  const size_t nw  = wfc_list.size();
+  auto& mw_allUat  = wfc_leader.mw_mem_->mw_allUat;
+  for (size_t iw = 0; iw < nw; iw++)
   {
+    // detach buffer and copy per walker Uat, dUat, d2Uat from shared buffer
     auto& wfc = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+
+    Vector<valT, aligned_allocator<valT>> Uat_view(mw_allUat.data() + iw * N_padded, N);
     wfc.Uat.free();
+    wfc.Uat.resize(N);
+    wfc.Uat = Uat_view;
+
+    VectorSoaContainer<valT, DIM, aligned_allocator<valT>> dUat_view(mw_allUat.data() + nw * N_padded +
+                                                                         iw * N_padded * DIM,
+                                                                     N, N_padded);
     wfc.dUat.free();
+    wfc.dUat.resize(N);
+    wfc.dUat = dUat_view;
+
+    Vector<valT, aligned_allocator<valT>> d2Uat_view(mw_allUat.data() + nw * N_padded * (DIM + 1) + iw * N_padded, N);
     wfc.d2Uat.free();
+    wfc.d2Uat.resize(N);
+    wfc.d2Uat = d2Uat_view;
   }
+  collection.takebackResource(std::move(wfc_leader.mw_mem_));
 }
 
 template<typename FT>
@@ -674,7 +702,8 @@ void J2OMPTarget<FT>::mw_recompute(const RefVectorWithLeader<WaveFunctionCompone
   assert(this == &wfc_leader);
 #pragma omp parallel for
   for (int iw = 0; iw < wfc_list.size(); iw++)
-    wfc_list[iw].recompute(p_list[iw]);
+    if (recompute[iw])
+      wfc_list[iw].recompute(p_list[iw]);
   wfc_leader.mw_mem_->mw_allUat.updateTo();
 }
 
@@ -738,7 +767,7 @@ void J2OMPTarget<FT>::mw_evaluateGL(const RefVectorWithLeader<WaveFunctionCompon
 
   for (int iw = 0; iw < wfc_list.size(); iw++)
   {
-    auto& wfc    = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
+    auto& wfc      = wfc_list.getCastedElement<J2OMPTarget<FT>>(iw);
     wfc.log_value_ = wfc.computeGL(G_list[iw], L_list[iw]);
   }
 }

From d947ca1ae5c58d9f6a364f1c3571c317e7cf0c78 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Fri, 1 Oct 2021 12:08:26 -0400
Subject: [PATCH 31/35] Update build_olcf_summit_Clang.sh

---
 config/build_olcf_summit_Clang.sh | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/config/build_olcf_summit_Clang.sh b/config/build_olcf_summit_Clang.sh
index 598734a300..65f254982d 100755
--- a/config/build_olcf_summit_Clang.sh
+++ b/config/build_olcf_summit_Clang.sh
@@ -32,7 +32,18 @@ module load llvm/main-20210811-cuda10.1
 TYPE=Release
 Compiler=Clang
 
-source_folder=~/opt/qmcpack
+if [[ $# -eq 0 ]]; then
+  source_folder=`pwd`
+else
+  source_folder=$1
+fi
+
+if [[ -f $source_folder/CMakeLists.txt ]]; then
+  echo Using QMCPACK source directory $source_folder
+else
+  echo "Source directory $source_folder doesn't contain CMakeLists.txt. Pass QMCPACK source directory as the first argument."
+  exit
+fi
 
 for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \
             cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx

From 72d4c01983f2f12f37af16a0e0712637bf4a92d4 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 28 Sep 2021 16:39:07 -0400
Subject: [PATCH 32/35] Add macOS CI on GitHub Actions

gcc-11 real build
brew dependencies
---
 .github/workflows/ci-github-actions.yaml      | 189 ++++++++++--------
 .../github-actions/ci/run_step.sh             |   7 +
 2 files changed, 116 insertions(+), 80 deletions(-)

diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
index 4678d1c90f..b996d3d9a8 100644
--- a/.github/workflows/ci-github-actions.yaml
+++ b/.github/workflows/ci-github-actions.yaml
@@ -1,16 +1,14 @@
-
 name: GitHub Actions CI
 
-on: 
+on:
   push:
-    branches: 
-    - develop
+    branches:
+      - develop
   pull_request:
-    branches: 
-    - develop
+    branches:
+      - develop
 
 jobs:
-
   linux:
     runs-on: ubuntu-latest
     container: ${{ matrix.container }}
@@ -20,79 +18,110 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        jobname: [
-          gcc-openmpi-real-coverage,
-          gcc-openmpi-complex-coverage,
-          gcc11-real-werror,
-          gcc11-complex-werror,
-          clang-real-asan,
-          clang-complex-asan,
-          clang-openmpi-real-ubsan,
-          clang-latest-openmp-offload
-        ]
+        jobname:
+          [
+            gcc-openmpi-real-coverage,
+            gcc-openmpi-complex-coverage,
+            gcc11-real-werror,
+            gcc11-complex-werror,
+            clang-real-asan,
+            clang-complex-asan,
+            clang-openmpi-real-ubsan,
+            clang-latest-openmp-offload,
+          ]
         include:
-        - jobname: gcc-openmpi-real-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-        
-        - jobname: gcc-openmpi-complex-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-        
-        - jobname: gcc11-real-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-        
-        - jobname: gcc11-complex-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-            
-        - jobname: clang-real-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-            
-        - jobname: clang-complex-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-        
-        - jobname: clang-openmpi-real-ubsan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-        
-        - jobname: clang-latest-openmp-offload
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
-            options: -u 1001
+          - jobname: gcc-openmpi-real-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc-openmpi-complex-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc11-real-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: gcc11-complex-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: clang-real-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-complex-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-openmpi-real-ubsan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-latest-openmp-offload
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
+              options: -u 1001
+
+    steps:
+      - name: Checkout Action
+        uses: actions/checkout@v1
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Coverage
+        if: contains(matrix.jobname, 'coverage')
+        run: tests/test_automation/github-actions/ci/run_step.sh coverage
+
+      - name: Upload Coverage
+        if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
+        uses: codecov/codecov-action@v1
+        with:
+          file: ../qmcpack-build/coverage.xml
+          flags: tests-deterministic # optional
+          name: codecov-QMCPACK # optional
+          fail_ci_if_error: true # optional (default = false)
+
+  macos:
+    runs-on: macos-latest
+    env:
+      GH_JOBNAME: ${{ matrix.jobname }}
+      GH_OS: macOS
+
+    strategy:
+      fail-fast: false
+      matrix:
+        jobname: [macOS-gcc11-real]
 
     steps:
-    - name: Checkout Action
-      uses: actions/checkout@v1
-
-    - name: Configure
-      run: tests/test_automation/github-actions/ci/run_step.sh configure
-
-    - name: Build
-      run: tests/test_automation/github-actions/ci/run_step.sh build
-
-    - name: Test
-      run: tests/test_automation/github-actions/ci/run_step.sh test
-    
-    - name: Coverage
-      if: contains(matrix.jobname, 'coverage')
-      run: tests/test_automation/github-actions/ci/run_step.sh coverage
-    
-    - name: Upload Coverage
-      if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
-      uses: codecov/codecov-action@v1
-      with:
-        file:  ../qmcpack-build/coverage.xml
-        flags: tests-deterministic # optional
-        name: codecov-QMCPACK # optional
-        fail_ci_if_error: true # optional (default = false)
+      - name: Checkout Action
+        uses: actions/checkout@v2
+
+      - name: Setup Dependencies
+        run: brew install ninja hdf5 libxml2 openblas lapack boost fftw
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Install
+        run: tests/test_automation/github-actions/ci/run_step.sh install
diff --git a/tests/test_automation/github-actions/ci/run_step.sh b/tests/test_automation/github-actions/ci/run_step.sh
index b0b2f029be..22e8ae8ea1 100755
--- a/tests/test_automation/github-actions/ci/run_step.sh
+++ b/tests/test_automation/github-actions/ci/run_step.sh
@@ -106,6 +106,13 @@ case "$1" in
                       -DQMC_COMPLEX=$IS_COMPLEX \
                       ${GITHUB_WORKSPACE}
       ;;
+      *"macOS-gcc11"*)
+        echo 'Configure for building on macOS using gcc-11'
+        cmake -GNinja -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 \
+                      -DQMC_MPI=0 \
+                      -DQMC_COMPLEX=$IS_COMPLEX \
+                      ${GITHUB_WORKSPACE}
+      ;;
       # Configure with default compilers
       *)
         echo 'Configure for default system compilers and options'

From 60724b9ff67fa3a494bea1f8290bfa5f72a94676 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Tue, 28 Sep 2021 17:06:13 -0400
Subject: [PATCH 33/35] Reduce brew dependencies

---
 .github/workflows/ci-github-actions.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
index b996d3d9a8..d7b2846910 100644
--- a/.github/workflows/ci-github-actions.yaml
+++ b/.github/workflows/ci-github-actions.yaml
@@ -112,7 +112,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Setup Dependencies
-        run: brew install ninja hdf5 libxml2 openblas lapack boost fftw
+        run: brew install ninja hdf5 fftw boost
 
       - name: Configure
         run: tests/test_automation/github-actions/ci/run_step.sh configure

From fe5aa2dddb03141997b6a064a789cc89ccd4e257 Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Wed, 29 Sep 2021 09:46:20 -0400
Subject: [PATCH 34/35] Add Python dependencies on macOS runner

---
 .github/workflows/ci-github-actions.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
index d7b2846910..cbe19125af 100644
--- a/.github/workflows/ci-github-actions.yaml
+++ b/.github/workflows/ci-github-actions.yaml
@@ -112,7 +112,9 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Setup Dependencies
-        run: brew install ninja hdf5 fftw boost
+        run: |
+             brew install ninja hdf5 fftw boost
+             pip3 install numpy h5py pandas
 
       - name: Configure
         run: tests/test_automation/github-actions/ci/run_step.sh configure

From 91fa88841a5cd983d0e497854a8d4e53664e481f Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Wed, 29 Sep 2021 11:10:37 -0400
Subject: [PATCH 35/35] Add docs for macOS CI

---
 docs/github_actions.rst | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/docs/github_actions.rst b/docs/github_actions.rst
index 313ee5ed9f..933bc5874a 100644
--- a/docs/github_actions.rst
+++ b/docs/github_actions.rst
@@ -36,6 +36,8 @@ The following is a summary of the jobs run in the CI process required for a PR:
 +-------------------------------+--------+----------+---------------+------+----------+
 | clang-latest-openmp-offload   | GitHub | clang-12 | unit          | 35   | PR/merge |
 +-------------------------------+--------+----------+---------------+------+----------+
+| macOS-gcc11-real              | GitHub | gcc-11   | deterministic | 27   | PR/merge |
++-------------------------------+--------+----------+---------------+------+----------+
 | gcc-real-gpu-cuda-mixed       | sulfur | clang-11 | deterministic | 2    | manual   |
 +-------------------------------+--------+----------+---------------+------+----------+
 | gcc-complex-gpu-cuda-mixed    | sulfur | clang-11 | deterministic | 2    | manual   |
@@ -47,7 +49,7 @@ The following is a summary of the jobs run in the CI process required for a PR:
 
 Jobs running on GitHub hosted runners are triggered automatically. Permission from an admin is required to run jobs on self-hosted runners (e.g. sulfur) for security reasons. In addition, jobs running on GitHub hosted runners run automatically in parallel and the time each job takes may vary depending on system utilization. For information on the underlying hardware see the GitHub Actions `docs on the topic <https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners>`_.  
 
-All jobs Github Runner hosts currently use the `williamfgc/qmcpack-ci:ubuntu20-openmpi <https://hub.docker.com/r/williamfgc/qmcpack-ci>`_ docker image, if you would like to reproduce theses tests exactly using docker, please refer to `Running QMCPACK on Docker Containers <https://qmcpack.readthedocs.io/en/develop/running_docker.html>`_ section in the QMCPACK documentation.
+All Linux jobs Github Runner hosts currently use the `williamfgc/qmcpack-ci:ubuntu20-openmpi <https://hub.docker.com/r/williamfgc/qmcpack-ci>`_ docker image, if you would like to reproduce theses tests exactly using docker, please refer to `Running QMCPACK on Docker Containers <https://qmcpack.readthedocs.io/en/develop/running_docker.html>`_ section in the QMCPACK documentation. The macOS job runs directly on the `macos-latest GitHub Actions VM runner <https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources>`_
 
 
 .. note::
@@ -170,6 +172,21 @@ linux (clang-latest-openmp-offload)
 | Duration      | ~35 Minutes                                                                                                                                                                |
 +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
+macOS (macOS-gcc11-real)
+"""""""""""""""""""""""""""""""""""
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Compiler      | gcc-11                                                                                                                                                                      |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Build Command | `cmake -GNinja -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DQMC_MPI=0 -DQMC_COMPLEX=0 ..`                                                                                       |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Test Command  | `ctest --output-on-failure -L deterministic`                                                                                                                                        |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Objective     | Build for macOS CI using Accelerate framework and gcc-11 for openmp                                                                                                        |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Duration      | ~27 Minutes                                                                                                                                                                |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+
 Self-Hosted Runners
 -------------------