Use dynamic bitset by default for cpu mask

STEllAR-GROUP · Nov 27, 2018 · 1b8bc6c · 1b8bc6c
1 parent e880a1a
commit 1b8bc6c
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 30 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -454,12 +454,14 @@ endif()
 
 ## Thread Manager related build options
 
-set(HPX_MAX_CPU_COUNT_DEFAULT "64")
+set(HPX_MAX_CPU_COUNT_DEFAULT "")
 hpx_option(HPX_WITH_MAX_CPU_COUNT STRING
-  "HPX applications will not use more that this number of OS-Threads (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
-  ${HPX_MAX_CPU_COUNT_DEFAULT}
+  "HPX applications will not use more that this number of OS-Threads (empty is unlimited) (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
+  "${HPX_MAX_CPU_COUNT_DEFAULT}"
   CATEGORY "Thread Manager" ADVANCED)
-hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
+if(HPX_WITH_MAX_CPU_COUNT)
+  hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
+endif()
 
 set(HPX_MAX_NUMA_DOMAIN_COUNT_DEFAULT "4")
 hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
@@ -469,7 +471,7 @@ hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
 hpx_add_config_define(HPX_HAVE_MAX_NUMA_DOMAIN_COUNT ${HPX_WITH_MAX_NUMA_DOMAIN_COUNT})
 
 set(HPX_MORE_THAN_64_THREADS_DEFAULT OFF)
-if(HPX_WITH_MAX_CPU_COUNT GREATER 64)
+if((NOT HPX_WITH_MAX_CPU_COUNT) OR (HPX_WITH_MAX_CPU_COUNT GREATER 64))
   set(HPX_MORE_THAN_64_THREADS_DEFAULT ON)
 endif()
 hpx_option(HPX_WITH_MORE_THAN_64_THREADS BOOL

diff --git a/hpx/runtime/serialization/dynamic_bitset.hpp b/hpx/runtime/serialization/dynamic_bitset.hpp
@@ -7,6 +7,7 @@
 #define HPX_SERIALIZATION_DYNAMIC_BITSET_HPP
 
 #include <hpx/runtime/serialization/serialize.hpp>
+#include <hpx/runtime/serialization/vector.hpp>
 
 #include <cstddef>
 #include <vector>

diff --git a/hpx/runtime/threads/policies/local_queue_scheduler.hpp b/hpx/runtime/threads/policies/local_queue_scheduler.hpp
@@ -120,15 +120,8 @@ namespace hpx { namespace threads { namespace policies
             steals_in_numa_domain_(),
             steals_outside_numa_domain_(),
 #endif
-#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
             numa_domain_masks_(init.num_queues_),
             outside_numa_domain_masks_(init.num_queues_)
-#else
-            numa_domain_masks_(init.num_queues_,
-                topology_.get_machine_affinity_mask()),
-            outside_numa_domain_masks_(init.num_queues_,
-                topology_.get_machine_affinity_mask())
-#endif
         {
 #if !defined(HPX_NATIVE_MIC)        // we know that the MIC has one NUMA domain only
             resize(steals_in_numa_domain_, init.num_queues_);

diff --git a/hpx/runtime/threads/policies/shared_priority_queue_scheduler.hpp b/hpx/runtime/threads/policies/shared_priority_queue_scheduler.hpp
@@ -21,14 +21,14 @@
 #include <hpx/util/logging.hpp>
 #include <hpx/util_fwd.hpp>
 
-#include <array>
 #include <cstddef>
 #include <cstdint>
 #include <exception>
 #include <memory>
 #include <string>
 #include <numeric>
 #include <type_traits>
+#include <vector>
 
 #include <hpx/config/warnings_prefix.hpp>
 
@@ -1149,14 +1149,21 @@ namespace policies {
                 auto &rp = resource::get_partitioner();
                 auto const& topo = rp.get_topology();
 
+                d_lookup_.resize(num_workers_);
+                hp_lookup_.resize(num_workers_);
+                np_lookup_.resize(num_workers_);
+                lp_lookup_.resize(num_workers_);
+                q_lookup_.resize(num_workers_);
+
+                std::fill(d_lookup_.begin(), d_lookup_.end(), 0);
+                std::fill(hp_lookup_.begin(), d_lookup_.end(), 0);
+                std::fill(np_lookup_.begin(), d_lookup_.end(), 0);
+                std::fill(lp_lookup_.begin(), d_lookup_.end(), 0);
+                std::fill(q_lookup_.begin(), q_lookup_.end(), 0);
+
                 // For each worker thread, count which each numa domain they
                 // belong to and build lists of useful indexes/refs
                 num_domains_ = 1;
-                std::array<std::size_t, HPX_HAVE_MAX_NUMA_DOMAIN_COUNT> q_counts_;
-                std::fill(d_lookup_.begin(), d_lookup_.end(), 0);
-                std::fill(q_lookup_.begin(), q_lookup_.end(), 0);
-                std::fill(q_counts_.begin(), q_counts_.end(), 0);
-                std::fill(counters_.begin(), counters_.end(), 0);
 
                 for (std::size_t local_id=0; local_id!=num_workers_; ++local_id)
                 {
@@ -1167,7 +1174,14 @@ namespace policies {
                     num_domains_ = (std::max)(num_domains_, domain+1);
                 }
 
-                HPX_ASSERT(num_domains_ <= HPX_HAVE_MAX_NUMA_DOMAIN_COUNT);
+                std::vector<std::size_t> q_counts_(num_domains_, 0);
+
+                np_queues_.resize(num_domains_);
+                hp_queues_.resize(num_domains_);
+                lp_queues_.resize(num_domains_);
+                counters_.resize(num_domains_);
+
+                std::fill(counters_.begin(), counters_.end(), 0);
 
                 for (std::size_t local_id=0; local_id!=num_workers_; ++local_id)
                 {
@@ -1277,21 +1291,21 @@ namespace policies {
     protected:
         typedef queue_holder<thread_queue_type> numa_queues;
 
-        std::array<numa_queues, HPX_HAVE_MAX_NUMA_DOMAIN_COUNT> np_queues_;
-        std::array<numa_queues, HPX_HAVE_MAX_NUMA_DOMAIN_COUNT> hp_queues_;
-        std::array<numa_queues, HPX_HAVE_MAX_NUMA_DOMAIN_COUNT> lp_queues_;
-        std::array<std::size_t, HPX_HAVE_MAX_NUMA_DOMAIN_COUNT> counters_;
+        std::vector<numa_queues> np_queues_;
+        std::vector<numa_queues> hp_queues_;
+        std::vector<numa_queues> lp_queues_;
+        std::vector<std::size_t> counters_;
 
         // lookup domain from local worker index
-        std::array<std::size_t, HPX_HAVE_MAX_CPU_COUNT> d_lookup_;
+        std::vector<std::size_t> d_lookup_;
 
         // index of queue on domain from local worker index
-        std::array<std::size_t, HPX_HAVE_MAX_CPU_COUNT> hp_lookup_;
-        std::array<std::size_t, HPX_HAVE_MAX_CPU_COUNT> np_lookup_;
-        std::array<std::size_t, HPX_HAVE_MAX_CPU_COUNT> lp_lookup_;
+        std::vector<std::size_t> hp_lookup_;
+        std::vector<std::size_t> np_lookup_;
+        std::vector<std::size_t> lp_lookup_;
 
         // lookup sub domain queue index from local worker index
-        std::array<std::size_t, HPX_HAVE_MAX_CPU_COUNT> q_lookup_;
+        std::vector<std::size_t> q_lookup_;
 
         // number of cores per queue for HP, NP, LP queues
         core_ratios cores_per_queue_;

diff --git a/src/runtime/threads/policies/affinity_data.cpp b/src/runtime/threads/policies/affinity_data.cpp
@@ -144,7 +144,7 @@ namespace hpx { namespace threads { namespace policies { namespace detail
         else if (!affinity_desc.empty())
         {
             affinity_masks_.clear();
-            affinity_masks_.resize(num_threads_, 0);
+            affinity_masks_.resize(num_threads_, mask_type{});
 
             for (std::size_t i = 0; i != num_threads_; ++i)
                 threads::resize(affinity_masks_[i], num_system_pus);

diff --git a/src/runtime/threads/topology.cpp b/src/runtime/threads/topology.cpp
@@ -1339,7 +1339,7 @@ namespace hpx { namespace threads
             HPX_THROW_EXCEPTION(kernel_error,
                 "hpx::threads::topology::get_area_membind_nodeset",
                 "hwloc_get_area_membind_nodeset failed");
-            return -1;
+            return bitmap_to_mask(ns, HWLOC_OBJ_MACHINE);
             std::cout << "error in  ";
         }
         return bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);