Fix use of dynamic bitset for CPU masks

STEllAR-GROUP · Feb 26, 2019 · eff5e51 · eff5e51
1 parent 33b440a
commit eff5e51
Show file tree

Hide file tree

Showing 9 changed files with 66 additions and 27 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -474,10 +474,12 @@ endif()
 
 set(HPX_MAX_CPU_COUNT_DEFAULT "64")
 hpx_option(HPX_WITH_MAX_CPU_COUNT STRING
-  "HPX applications will not use more that this number of OS-Threads (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
-  ${HPX_MAX_CPU_COUNT_DEFAULT}
+  "HPX applications will not use more that this number of OS-Threads (empty string means dynamic) (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
+  "${HPX_MAX_CPU_COUNT_DEFAULT}"
   CATEGORY "Thread Manager" ADVANCED)
-hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
+if(HPX_WITH_MAX_CPU_COUNT)
+  hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
+endif()
 
 set(HPX_MAX_NUMA_DOMAIN_COUNT_DEFAULT "4")
 hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
@@ -487,7 +489,7 @@ hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
 hpx_add_config_define(HPX_HAVE_MAX_NUMA_DOMAIN_COUNT ${HPX_WITH_MAX_NUMA_DOMAIN_COUNT})
 
 set(HPX_MORE_THAN_64_THREADS_DEFAULT OFF)
-if(HPX_WITH_MAX_CPU_COUNT GREATER 64)
+if((NOT HPX_WITH_MAX_CPU_COUNT) OR (HPX_WITH_MAX_CPU_COUNT GREATER 64))
   set(HPX_MORE_THAN_64_THREADS_DEFAULT ON)
 endif()
 hpx_option(HPX_WITH_MORE_THAN_64_THREADS BOOL

diff --git a/examples/resource_partitioner/shared_priority_queue_scheduler.hpp b/examples/resource_partitioner/shared_priority_queue_scheduler.hpp
@@ -36,6 +36,16 @@
 #include <type_traits>
 #include <vector>
 
+#if !defined(HPX_HAVE_MAX_CPU_COUNT) && defined(HPX_HAVE_MORE_THAN_64_THREADS)
+static_assert(false,
+    "The shared_priority_scheduler does not support dynamic bitsets for CPU "
+    "masks, i.e. HPX_WITH_MAX_CPU_COUNT=\"\" and "
+    "HPX_WITH_MORE_THAN_64_THREADS=ON. Reconfigure HPX with either "
+    "HPX_WITH_MAX_CPU_COUNT=N, where N is an integer, or disable the "
+    "shared_priority_scheduler by setting HPX_WITH_THREAD_SCHEDULERS to not "
+    "include \"all\" or \"shared-priority\"");
+#else
+
 // #define SHARED_PRIORITY_SCHEDULER_DEBUG 1
 // #define SHARED_PRIORITY_SCHEDULER_MINIMAL_DEBUG 1
 
@@ -1541,6 +1551,7 @@ namespace example {
         hpx::lcos::local::spinlock init_mutex;
     };
 }}}}
+#endif
 
 #include <hpx/config/warnings_suffix.hpp>
 

diff --git a/hpx/runtime/serialization/dynamic_bitset.hpp b/hpx/runtime/serialization/dynamic_bitset.hpp
@@ -7,6 +7,7 @@
 #define HPX_SERIALIZATION_DYNAMIC_BITSET_HPP
 
 #include <hpx/runtime/serialization/serialize.hpp>
+#include <hpx/runtime/serialization/vector.hpp>
 
 #include <cstddef>
 #include <vector>

diff --git a/hpx/runtime/threads/policies/local_queue_scheduler.hpp b/hpx/runtime/threads/policies/local_queue_scheduler.hpp
@@ -110,29 +110,30 @@ namespace hpx { namespace threads { namespace policies
         typedef init_parameter init_parameter_type;
 
         local_queue_scheduler(init_parameter_type const& init,
-                bool deferred_initialization = true)
-          : scheduler_base(init.num_queues_, init.description_),
-            max_queue_thread_count_(init.max_queue_thread_count_),
-            queues_(init.num_queues_),
-            curr_queue_(0),
-            numa_sensitive_(init.numa_sensitive_),
-#ifndef HPX_NATIVE_MIC        // we know that the MIC has one NUMA domain only
-            steals_in_numa_domain_(),
-            steals_outside_numa_domain_(),
-#endif
-#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
-            numa_domain_masks_(init.num_queues_),
-            outside_numa_domain_masks_(init.num_queues_)
-#else
-            numa_domain_masks_(init.num_queues_,
-                topology_.get_machine_affinity_mask()),
-            outside_numa_domain_masks_(init.num_queues_,
-                topology_.get_machine_affinity_mask())
+            bool deferred_initialization = true)
+          : scheduler_base(init.num_queues_, init.description_)
+          , max_queue_thread_count_(init.max_queue_thread_count_)
+          , queues_(init.num_queues_)
+          , curr_queue_(0)
+          , numa_sensitive_(init.numa_sensitive_)
+          ,
+#ifndef HPX_NATIVE_MIC    // we know that the MIC has one NUMA domain only
+          steals_in_numa_domain_()
+          , steals_outside_numa_domain_()
 #endif
+          , numa_domain_masks_(init.num_queues_,
+              resource::get_partitioner()
+                  .get_topology()
+                  .get_machine_affinity_mask())
+          , outside_numa_domain_masks_(init.num_queues_,
+                resource::get_partitioner()
+                    .get_topology()
+                    .get_machine_affinity_mask())
         {
 #if !defined(HPX_NATIVE_MIC)        // we know that the MIC has one NUMA domain only
-            resize(steals_in_numa_domain_, init.num_queues_);
-            resize(steals_outside_numa_domain_, init.num_queues_);
+            resize(steals_in_numa_domain_, threads::hardware_concurrency());
+            resize(
+                steals_outside_numa_domain_, threads::hardware_concurrency());
 #endif
             if (!deferred_initialization)
             {

diff --git a/hpx/runtime/threads/policies/shared_priority_queue_scheduler.hpp b/hpx/runtime/threads/policies/shared_priority_queue_scheduler.hpp
@@ -32,6 +32,16 @@
 
 #include <hpx/config/warnings_prefix.hpp>
 
+#if !defined(HPX_HAVE_MAX_CPU_COUNT) && defined(HPX_HAVE_MORE_THAN_64_THREADS)
+static_assert(false,
+    "The shared_priority_scheduler does not support dynamic bitsets for CPU "
+    "masks, i.e. HPX_WITH_MAX_CPU_COUNT=\"\" and "
+    "HPX_WITH_MORE_THAN_64_THREADS=ON. Reconfigure HPX with either "
+    "HPX_WITH_MAX_CPU_COUNT=N, where N is an integer, or disable the "
+    "shared_priority_scheduler by setting HPX_WITH_THREAD_SCHEDULERS to not "
+    "include \"all\" or \"shared-priority\"");
+#else
+
 namespace hpx {
 namespace threads {
 namespace policies {
@@ -1304,6 +1314,7 @@ namespace policies {
         hpx::lcos::local::spinlock init_mutex;
     };
 }}}
+#endif
 
 #include <hpx/config/warnings_suffix.hpp>
 

diff --git a/hpx/runtime/threads/topology.hpp b/hpx/runtime/threads/topology.hpp
@@ -308,7 +308,7 @@ namespace hpx { namespace threads
             std::size_t num_numa_node
             ) const;
         mask_type init_core_affinity_mask_from_core(
-            std::size_t num_core, mask_cref_type default_mask = mask_type()
+            std::size_t num_core, mask_cref_type default_mask = empty_mask
             ) const;
         mask_type init_thread_affinity_mask(std::size_t num_thread) const;
         mask_type init_thread_affinity_mask(

diff --git a/src/runtime/resource/detail/detail_partitioner.cpp b/src/runtime/resource/detail/detail_partitioner.cpp
@@ -227,6 +227,7 @@ namespace hpx { namespace resource { namespace detail
                 "Cannot instantiate more than one resource partitioner");
         }
 
+#if defined(HPX_HAVE_MAX_CPU_COUNT)
         if(HPX_HAVE_MAX_CPU_COUNT < topo_.get_number_of_pus())
         {
             throw_runtime_error("partitioner::partioner",
@@ -238,6 +239,7 @@ namespace hpx { namespace resource { namespace detail
                     "HPX.",
                     HPX_HAVE_MAX_CPU_COUNT, topo_.get_number_of_pus()));
         }
+#endif
 
         // Create the default pool
         initial_thread_pools_.push_back(init_pool_data("default"));
@@ -252,6 +254,7 @@ namespace hpx { namespace resource { namespace detail
     bool partitioner::pu_exposed(std::size_t pu_num)
     {
         threads::mask_type pu_mask = threads::mask_type();
+        threads::resize(pu_mask, threads::hardware_concurrency());
         threads::set(pu_mask, pu_num);
         threads::topology& topo = get_topology();
 

diff --git a/src/runtime/threads/policies/affinity_data.cpp b/src/runtime/threads/policies/affinity_data.cpp
@@ -94,6 +94,8 @@ namespace hpx { namespace threads { namespace policies { namespace detail
             throw std::runtime_error(
                 "Cannot instantiate more than one affinity data instance");
         }
+
+        threads::resize(no_affinity_, hardware_concurrency());
     }
 
     affinity_data::~affinity_data()
@@ -144,7 +146,7 @@ namespace hpx { namespace threads { namespace policies { namespace detail
         else if (!affinity_desc.empty())
         {
             affinity_masks_.clear();
-            affinity_masks_.resize(num_threads_, 0);
+            affinity_masks_.resize(num_threads_, mask_type{});
 
             for (std::size_t i = 0; i != num_threads_; ++i)
                 threads::resize(affinity_masks_[i], num_system_pus);
@@ -204,6 +206,7 @@ namespace hpx { namespace threads { namespace policies { namespace detail
         if (threads::test(no_affinity_, global_thread_num))
         {
             static mask_type m = mask_type();
+            threads::resize(m, hardware_concurrency());
             return m;
         }
 

diff --git a/src/runtime/threads/topology.cpp b/src/runtime/threads/topology.cpp
@@ -185,7 +185,11 @@ namespace hpx { namespace threads
     }
 
     ///////////////////////////////////////////////////////////////////////////
+#if !defined(HPX_HAVE_MAX_CPU_COUNT)
+    mask_type topology::empty_mask = mask_type(hardware_concurrency());
+#else
     mask_type topology::empty_mask = mask_type();
+#endif
 
     topology::topology()
       : topo(nullptr), machine_affinity_mask_(0)
@@ -1054,7 +1058,9 @@ namespace hpx { namespace threads
         ) const
     { // {{{
         if (std::size_t(-1) == core)
+        {
             return default_mask;
+        }
 
         hwloc_obj_t core_obj = nullptr;
 
@@ -1344,7 +1350,7 @@ namespace hpx { namespace threads
             HPX_THROW_EXCEPTION(kernel_error,
                 "hpx::threads::topology::get_area_membind_nodeset",
                 "hwloc_get_area_membind_nodeset failed");
-            return -1;
+            return bitmap_to_mask(ns, HWLOC_OBJ_MACHINE);
             std::cout << "error in  ";
         }
         return bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);
@@ -1410,6 +1416,7 @@ namespace hpx { namespace threads
         hwloc_obj_type_t htype) const
     {
         mask_type mask = mask_type();
+        resize(mask, get_number_of_pus());
         std::size_t num = hwloc_get_nbobjs_by_type(topo, htype);
         //
         int const pu_depth = hwloc_get_type_or_below_depth(topo, htype);