Skip to content

Commit

Permalink
Fix use of dynamic bitset for CPU masks
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg committed Feb 26, 2019
1 parent 33b440a commit eff5e51
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 27 deletions.
10 changes: 6 additions & 4 deletions CMakeLists.txt
Expand Up @@ -474,10 +474,12 @@ endif()

set(HPX_MAX_CPU_COUNT_DEFAULT "64")
hpx_option(HPX_WITH_MAX_CPU_COUNT STRING
"HPX applications will not use more that this number of OS-Threads (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
${HPX_MAX_CPU_COUNT_DEFAULT}
"HPX applications will not use more that this number of OS-Threads (empty string means dynamic) (default: ${HPX_MAX_CPU_COUNT_DEFAULT})"
"${HPX_MAX_CPU_COUNT_DEFAULT}"
CATEGORY "Thread Manager" ADVANCED)
hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
if(HPX_WITH_MAX_CPU_COUNT)
hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})
endif()

set(HPX_MAX_NUMA_DOMAIN_COUNT_DEFAULT "4")
hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
Expand All @@ -487,7 +489,7 @@ hpx_option(HPX_WITH_MAX_NUMA_DOMAIN_COUNT STRING
hpx_add_config_define(HPX_HAVE_MAX_NUMA_DOMAIN_COUNT ${HPX_WITH_MAX_NUMA_DOMAIN_COUNT})

set(HPX_MORE_THAN_64_THREADS_DEFAULT OFF)
if(HPX_WITH_MAX_CPU_COUNT GREATER 64)
if((NOT HPX_WITH_MAX_CPU_COUNT) OR (HPX_WITH_MAX_CPU_COUNT GREATER 64))
set(HPX_MORE_THAN_64_THREADS_DEFAULT ON)
endif()
hpx_option(HPX_WITH_MORE_THAN_64_THREADS BOOL
Expand Down
11 changes: 11 additions & 0 deletions examples/resource_partitioner/shared_priority_queue_scheduler.hpp
Expand Up @@ -36,6 +36,16 @@
#include <type_traits>
#include <vector>

#if !defined(HPX_HAVE_MAX_CPU_COUNT) && defined(HPX_HAVE_MORE_THAN_64_THREADS)
static_assert(false,
"The shared_priority_scheduler does not support dynamic bitsets for CPU "
"masks, i.e. HPX_WITH_MAX_CPU_COUNT=\"\" and "
"HPX_WITH_MORE_THAN_64_THREADS=ON. Reconfigure HPX with either "
"HPX_WITH_MAX_CPU_COUNT=N, where N is an integer, or disable the "
"shared_priority_scheduler by setting HPX_WITH_THREAD_SCHEDULERS to not "
"include \"all\" or \"shared-priority\"");
#else

// #define SHARED_PRIORITY_SCHEDULER_DEBUG 1
// #define SHARED_PRIORITY_SCHEDULER_MINIMAL_DEBUG 1

Expand Down Expand Up @@ -1541,6 +1551,7 @@ namespace example {
hpx::lcos::local::spinlock init_mutex;
};
}}}}
#endif

#include <hpx/config/warnings_suffix.hpp>

Expand Down
1 change: 1 addition & 0 deletions hpx/runtime/serialization/dynamic_bitset.hpp
Expand Up @@ -7,6 +7,7 @@
#define HPX_SERIALIZATION_DYNAMIC_BITSET_HPP

#include <hpx/runtime/serialization/serialize.hpp>
#include <hpx/runtime/serialization/vector.hpp>

#include <cstddef>
#include <vector>
Expand Down
41 changes: 21 additions & 20 deletions hpx/runtime/threads/policies/local_queue_scheduler.hpp
Expand Up @@ -110,29 +110,30 @@ namespace hpx { namespace threads { namespace policies
typedef init_parameter init_parameter_type;

local_queue_scheduler(init_parameter_type const& init,
bool deferred_initialization = true)
: scheduler_base(init.num_queues_, init.description_),
max_queue_thread_count_(init.max_queue_thread_count_),
queues_(init.num_queues_),
curr_queue_(0),
numa_sensitive_(init.numa_sensitive_),
#ifndef HPX_NATIVE_MIC // we know that the MIC has one NUMA domain only
steals_in_numa_domain_(),
steals_outside_numa_domain_(),
#endif
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
numa_domain_masks_(init.num_queues_),
outside_numa_domain_masks_(init.num_queues_)
#else
numa_domain_masks_(init.num_queues_,
topology_.get_machine_affinity_mask()),
outside_numa_domain_masks_(init.num_queues_,
topology_.get_machine_affinity_mask())
bool deferred_initialization = true)
: scheduler_base(init.num_queues_, init.description_)
, max_queue_thread_count_(init.max_queue_thread_count_)
, queues_(init.num_queues_)
, curr_queue_(0)
, numa_sensitive_(init.numa_sensitive_)
,
#ifndef HPX_NATIVE_MIC // we know that the MIC has one NUMA domain only
steals_in_numa_domain_()
, steals_outside_numa_domain_()
#endif
, numa_domain_masks_(init.num_queues_,
resource::get_partitioner()
.get_topology()
.get_machine_affinity_mask())
, outside_numa_domain_masks_(init.num_queues_,
resource::get_partitioner()
.get_topology()
.get_machine_affinity_mask())
{
#if !defined(HPX_NATIVE_MIC) // we know that the MIC has one NUMA domain only
resize(steals_in_numa_domain_, init.num_queues_);
resize(steals_outside_numa_domain_, init.num_queues_);
resize(steals_in_numa_domain_, threads::hardware_concurrency());
resize(
steals_outside_numa_domain_, threads::hardware_concurrency());
#endif
if (!deferred_initialization)
{
Expand Down
11 changes: 11 additions & 0 deletions hpx/runtime/threads/policies/shared_priority_queue_scheduler.hpp
Expand Up @@ -32,6 +32,16 @@

#include <hpx/config/warnings_prefix.hpp>

#if !defined(HPX_HAVE_MAX_CPU_COUNT) && defined(HPX_HAVE_MORE_THAN_64_THREADS)
static_assert(false,
"The shared_priority_scheduler does not support dynamic bitsets for CPU "
"masks, i.e. HPX_WITH_MAX_CPU_COUNT=\"\" and "
"HPX_WITH_MORE_THAN_64_THREADS=ON. Reconfigure HPX with either "
"HPX_WITH_MAX_CPU_COUNT=N, where N is an integer, or disable the "
"shared_priority_scheduler by setting HPX_WITH_THREAD_SCHEDULERS to not "
"include \"all\" or \"shared-priority\"");
#else

namespace hpx {
namespace threads {
namespace policies {
Expand Down Expand Up @@ -1304,6 +1314,7 @@ namespace policies {
hpx::lcos::local::spinlock init_mutex;
};
}}}
#endif

#include <hpx/config/warnings_suffix.hpp>

Expand Down
2 changes: 1 addition & 1 deletion hpx/runtime/threads/topology.hpp
Expand Up @@ -308,7 +308,7 @@ namespace hpx { namespace threads
std::size_t num_numa_node
) const;
mask_type init_core_affinity_mask_from_core(
std::size_t num_core, mask_cref_type default_mask = mask_type()
std::size_t num_core, mask_cref_type default_mask = empty_mask
) const;
mask_type init_thread_affinity_mask(std::size_t num_thread) const;
mask_type init_thread_affinity_mask(
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/resource/detail/detail_partitioner.cpp
Expand Up @@ -227,6 +227,7 @@ namespace hpx { namespace resource { namespace detail
"Cannot instantiate more than one resource partitioner");
}

#if defined(HPX_HAVE_MAX_CPU_COUNT)
if(HPX_HAVE_MAX_CPU_COUNT < topo_.get_number_of_pus())
{
throw_runtime_error("partitioner::partioner",
Expand All @@ -238,6 +239,7 @@ namespace hpx { namespace resource { namespace detail
"HPX.",
HPX_HAVE_MAX_CPU_COUNT, topo_.get_number_of_pus()));
}
#endif

// Create the default pool
initial_thread_pools_.push_back(init_pool_data("default"));
Expand All @@ -252,6 +254,7 @@ namespace hpx { namespace resource { namespace detail
bool partitioner::pu_exposed(std::size_t pu_num)
{
threads::mask_type pu_mask = threads::mask_type();
threads::resize(pu_mask, threads::hardware_concurrency());
threads::set(pu_mask, pu_num);
threads::topology& topo = get_topology();

Expand Down
5 changes: 4 additions & 1 deletion src/runtime/threads/policies/affinity_data.cpp
Expand Up @@ -94,6 +94,8 @@ namespace hpx { namespace threads { namespace policies { namespace detail
throw std::runtime_error(
"Cannot instantiate more than one affinity data instance");
}

threads::resize(no_affinity_, hardware_concurrency());
}

affinity_data::~affinity_data()
Expand Down Expand Up @@ -144,7 +146,7 @@ namespace hpx { namespace threads { namespace policies { namespace detail
else if (!affinity_desc.empty())
{
affinity_masks_.clear();
affinity_masks_.resize(num_threads_, 0);
affinity_masks_.resize(num_threads_, mask_type{});

for (std::size_t i = 0; i != num_threads_; ++i)
threads::resize(affinity_masks_[i], num_system_pus);
Expand Down Expand Up @@ -204,6 +206,7 @@ namespace hpx { namespace threads { namespace policies { namespace detail
if (threads::test(no_affinity_, global_thread_num))
{
static mask_type m = mask_type();
threads::resize(m, hardware_concurrency());
return m;
}

Expand Down
9 changes: 8 additions & 1 deletion src/runtime/threads/topology.cpp
Expand Up @@ -185,7 +185,11 @@ namespace hpx { namespace threads
}

///////////////////////////////////////////////////////////////////////////
#if !defined(HPX_HAVE_MAX_CPU_COUNT)
mask_type topology::empty_mask = mask_type(hardware_concurrency());
#else
mask_type topology::empty_mask = mask_type();
#endif

topology::topology()
: topo(nullptr), machine_affinity_mask_(0)
Expand Down Expand Up @@ -1054,7 +1058,9 @@ namespace hpx { namespace threads
) const
{ // {{{
if (std::size_t(-1) == core)
{
return default_mask;
}

hwloc_obj_t core_obj = nullptr;

Expand Down Expand Up @@ -1344,7 +1350,7 @@ namespace hpx { namespace threads
HPX_THROW_EXCEPTION(kernel_error,
"hpx::threads::topology::get_area_membind_nodeset",
"hwloc_get_area_membind_nodeset failed");
return -1;
return bitmap_to_mask(ns, HWLOC_OBJ_MACHINE);
std::cout << "error in ";
}
return bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);
Expand Down Expand Up @@ -1410,6 +1416,7 @@ namespace hpx { namespace threads
hwloc_obj_type_t htype) const
{
mask_type mask = mask_type();
resize(mask, get_number_of_pus());
std::size_t num = hwloc_get_nbobjs_by_type(topo, htype);
//
int const pu_depth = hwloc_get_type_or_below_depth(topo, htype);
Expand Down

0 comments on commit eff5e51

Please sign in to comment.