Skip to content

Commit

Permalink
Fixing configuration for use of more than 64 cores
Browse files Browse the repository at this point in the history
- also fixes NUMA-allocation debug tests for MIC
  • Loading branch information
hkaiser committed Nov 4, 2015
1 parent badff74 commit aca96e3
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 17 deletions.
12 changes: 6 additions & 6 deletions CMakeLists.txt
Expand Up @@ -272,12 +272,12 @@ hpx_option(HPX_WITH_MAX_CPU_COUNT STRING
CATEGORY "Thread Manager" ADVANCED)
hpx_add_config_define(HPX_HAVE_MAX_CPU_COUNT ${HPX_WITH_MAX_CPU_COUNT})

if(NOT (${HPX_WITH_MAX_CPU_COUNT} LESS 65))
set(HPX_WITH_MORE_THAN_64_THREADS_DEFAULT OFF)
hpx_option(HPX_WITH_MORE_THAN_64_THREADS BOOL
"HPX applications will be able to run on more than 64 cores"
${HPX_WITH_MORE_THAN_64_THREADS_DEFAULT}
CATEGORY "Thread Manager" ADVANCED)
hpx_option(HPX_WITH_MORE_THAN_64_THREADS BOOL
"HPX applications will be able to run on more than 64 cores"
(NOT (${HPX_WITH_MAX_CPU_COUNT} LESS 65))
CATEGORY "Thread Manager" ADVANCED)
if(HPX_WITH_MORE_THAN_64_THREADS)
hpx_add_config_define(HPX_HAVE_MORE_THAN_64_THREADS)
endif()

hpx_option(HPX_WITH_THREAD_STACK_MMAP BOOL
Expand Down
4 changes: 2 additions & 2 deletions cmake/templates/config_defines.hpp.in
Expand Up @@ -16,8 +16,8 @@
#define HPX_PREFIX "@HPX_DEFINES_PREFIX@"
#endif
@hpx_config_defines@
#if defined(HPX_HAVE_MAX_CPU_COUNT) && (HPX_HAVE_MAX_CPU_COUNT > 64) && !defined(HPX_WITH_MORE_THAN_64_THREADS)
#define HPX_WITH_MORE_THAN_64_THREADS
#if defined(HPX_HAVE_MAX_CPU_COUNT) && (HPX_HAVE_MAX_CPU_COUNT > 64) && !defined(HPX_HAVE_MORE_THAN_64_THREADS)
#define HPX_HAVE_MORE_THAN_64_THREADS
#endif

#endif
Expand Down
4 changes: 2 additions & 2 deletions hpx/config.hpp
Expand Up @@ -487,8 +487,8 @@

///////////////////////////////////////////////////////////////////////////////
// Make sure we have support for more than 64 threads for Xeon Phi
#if defined(__MIC__) && !defined(HPX_WITH_MORE_THAN_64_THREADS)
# define HPX_WITH_MORE_THAN_64_THREADS
#if defined(__MIC__) && !defined(HPX_HAVE_MORE_THAN_64_THREADS)
# define HPX_HAVE_MORE_THAN_64_THREADS
#endif
#if defined(__MIC__) && !defined(HPX_HAVE_MAX_CPU_COUNT)
# define HPX_HAVE_MAX_CPU_COUNT 256
Expand Down
6 changes: 5 additions & 1 deletion hpx/parallel/util/numa_allocator.hpp
Expand Up @@ -14,6 +14,7 @@
#include <hpx/parallel/algorithms/for_each.hpp>
#include <hpx/parallel/executors/executor_information_traits.hpp>
#include <hpx/parallel/executors/static_chunk_size.hpp>
#include <hpx/util/assert.hpp>

#include <cstddef>
#include <vector>
Expand Down Expand Up @@ -107,7 +108,10 @@ namespace hpx { namespace parallel { namespace util
hpx::threads::mask_cref_type thread_mask =
traits::get_pu_mask(executors_[i], topo_, thread_num);

HPX_ASSERT(mem_mask & thread_mask);
HPX_ASSERT(threads::mask_size(mem_mask) ==
threads::mask_size(thread_mask));
HPX_ASSERT(threads::bit_and(mem_mask, thread_mask,
threads::mask_size(mem_mask)));
#endif
})
);
Expand Down
Expand Up @@ -125,7 +125,7 @@ namespace hpx { namespace threads { namespace policies
steals_in_numa_domain_(),
steals_outside_numa_domain_(),
#endif
#if !defined(HPX_WITH_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
numa_domain_masks_(init.num_queues_),
outside_numa_domain_masks_(init.num_queues_)
#else
Expand Down
2 changes: 1 addition & 1 deletion hpx/runtime/threads/policies/local_queue_scheduler.hpp
Expand Up @@ -113,7 +113,7 @@ namespace hpx { namespace threads { namespace policies
steals_in_numa_domain_(),
steals_outside_numa_domain_(),
#endif
#if !defined(HPX_WITH_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || defined(HPX_HAVE_MAX_CPU_COUNT)
numa_domain_masks_(init.num_queues_),
outside_numa_domain_masks_(init.num_queues_)
#else
Expand Down
42 changes: 40 additions & 2 deletions hpx/runtime/threads/topology.hpp
Expand Up @@ -21,15 +21,15 @@
#include <vector>
#include <iosfwd>
#include <limits>
#if defined(HPX_WITH_MORE_THAN_64_THREADS) || (defined(HPX_HAVE_MAX_CPU_COUNT) \
#if defined(HPX_HAVE_MORE_THAN_64_THREADS) || (defined(HPX_HAVE_MAX_CPU_COUNT) \
&& HPX_HAVE_MAX_CPU_COUNT > 64)
#include <bitset>
#endif

namespace hpx { namespace threads
{
/// \cond NOINTERNAL
#if !defined(HPX_WITH_MORE_THAN_64_THREADS) || (defined(HPX_HAVE_MAX_CPU_COUNT) \
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || (defined(HPX_HAVE_MAX_CPU_COUNT) \
&& HPX_HAVE_MAX_CPU_COUNT <= 64)
typedef boost::uint64_t mask_type;
typedef boost::uint64_t mask_cref_type;
Expand Down Expand Up @@ -92,6 +92,18 @@ namespace hpx { namespace threads
return lhs == rhs;
}

// return true if at least one of the masks has a bit set
inline bool bit_or(mask_cref_type lhs, mask_cref_type rhs, std::size_t)
{
return lhs | rhs;
}

// return true if at least one bit is set in both masks
inline bool bit_and(mask_cref_type lhs, mask_cref_type rhs, std::size_t)
{
return lhs & rhs;
}

#define HPX_CPU_MASK_PREFIX "0x"

#else
Expand Down Expand Up @@ -171,6 +183,32 @@ namespace hpx { namespace threads
}
return true;
}

// return true if at least one of the masks has a bit set
inline bool bit_or(mask_cref_type lhs, mask_cref_type rhs, std::size_t numbits)
{
for (std::size_t j = 0; j != numbits; ++j)
{
if (test(lhs, j) || test(rhs, j))
{
return true;
}
}
return false;
}

// return true if at least one bit is set in both masks
inline bool bit_and(mask_cref_type lhs, mask_cref_type rhs, std::size_t numbits)
{
for (std::size_t j = 0; j != numbits; ++j)
{
if (test(lhs, j) && test(rhs, j))
{
return true;
}
}
return false;
}
#endif
/// \endcond

Expand Down
2 changes: 1 addition & 1 deletion src/runtime/threads/detail/thread_pool.cpp
Expand Up @@ -331,7 +331,7 @@ namespace hpx { namespace threads { namespace detail
<< "thread_pool::run: " << pool_name_
<< " create OS thread " << thread_num //-V128
<< ": will run on processing units within this mask: "
#if !defined(HPX_WITH_MORE_THAN_64_THREADS) || \
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || \
(defined(HPX_HAVE_MAX_CPU_COUNT) && HPX_HAVE_MAX_CPU_COUNT <= 64)
<< std::hex << "0x" << mask;
#else
Expand Down
5 changes: 4 additions & 1 deletion tests/performance/local/stream.cpp
Expand Up @@ -246,7 +246,10 @@ numa_domain_worker(std::size_t domain,
hpx::threads::mask_cref_type thread_mask =
traits::get_pu_mask(policy.executor(), topo, thread_num);

HPX_ASSERT(mem_mask & thread_mask);
HPX_ASSERT(hpx::threads::mask_size(mem_mask) ==
hpx::threads::mask_size(thread_mask));
HPX_ASSERT(hpx::threads::bit_and(mem_mask, thread_mask,
hpx::threads::mask_size(mem_mask)));
#endif
});
t = 1.0E6 * (mysecond() - t);
Expand Down

0 comments on commit aca96e3

Please sign in to comment.