Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slightly modify the detection of the number of CPU cores #44973

Merged
merged 9 commits into from Feb 20, 2023
79 changes: 68 additions & 11 deletions src/Common/getNumberOfPhysicalCPUCores.cpp
Expand Up @@ -6,10 +6,16 @@
# include <fstream>
#endif

#include <boost/algorithm/string/trim.hpp>

#include <thread>
#include <set>

namespace
{

#if defined(OS_LINUX)
static int32_t readFrom(const char * filename, int default_value)
int32_t readFrom(const char * filename, int default_value)
{
std::ifstream infile(filename);
if (!infile.is_open())
Expand All @@ -22,7 +28,7 @@ static int32_t readFrom(const char * filename, int default_value)
}

/// Try to look at cgroups limit if it is available.
static uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
{
uint32_t quota_count = default_cpu_count;
/// Return the number of milliseconds per period process is guaranteed to run.
Expand All @@ -36,20 +42,69 @@ static uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
}
#endif

static unsigned getNumberOfPhysicalCPUCoresImpl()
/// Returns number of physical cores, unlike std::thread::hardware_concurrency() which returns the logical core count. With 2-way SMT
/// (HyperThreading) enabled, physical_concurrency() returns half of of std::thread::hardware_concurrency(), otherwise return the same.
#if defined(__x86_64__) && defined(OS_LINUX)
unsigned physical_concurrency()
try
serxa marked this conversation as resolved.
Show resolved Hide resolved
{
/// The CPUID instruction isn't reliable across different vendors and CPU models. The best option to get the physical core count is
/// to parse /proc/cpuinfo. boost::thread::physical_concurrency() does the same, so use their implementation.
///
/// See https://doc.callmematthi.eu/static/webArticles/Understanding%20Linux%20_proc_cpuinfo.pdf
std::ifstream proc_cpuinfo("/proc/cpuinfo");
rschu1ze marked this conversation as resolved.
Show resolved Hide resolved

using CoreEntry = std::pair<size_t, size_t>; /// physical id, core id
using CoreEntrySet = std::set<CoreEntry>;

CoreEntrySet core_entries;

CoreEntry cur_core_entry;
std::string line;

while (std::getline(proc_cpuinfo, line))
{
size_t pos = line.find(std::string(":"));
if (pos == std::string::npos)
continue;

std::string key = line.substr(0, pos);
std::string val = line.substr(pos + 1);

if (key.find("physical id") != std::string::npos)
{
cur_core_entry.first = std::stoi(val);
continue;
}

if (key.find("core id") != std::string::npos)
{
cur_core_entry.second = std::stoi(val);
core_entries.insert(cur_core_entry);
continue;
}
}
return core_entries.empty() ? /*unexpected format*/ std::thread::hardware_concurrency() : static_cast<unsigned>(core_entries.size());
}
catch (...)
{
return std::thread::hardware_concurrency(); /// parsing error
}
#endif

unsigned getNumberOfPhysicalCPUCoresImpl()
{
unsigned cpu_count = std::thread::hardware_concurrency();
unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)

/// Most of x86_64 CPUs have 2-way Hyper-Threading
/// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
/// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
/// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.

#if defined(__x86_64__)
/// Let's limit ourself to the number of physical cores.
/// But if the number of logical cores is small - maybe it is a small machine
/// or very limited cloud instance and it is reasonable to use all the cores.
#if defined(__x86_64__) && defined(OS_LINUX)
/// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
/// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
if (cpu_count >= 32)
cpu_count /= 2;
cpu_count = physical_concurrency();
serxa marked this conversation as resolved.
Show resolved Hide resolved
#endif

#if defined(OS_LINUX)
Expand All @@ -59,6 +114,8 @@ static unsigned getNumberOfPhysicalCPUCoresImpl()
return cpu_count;
}

}

unsigned getNumberOfPhysicalCPUCores()
{
/// Calculate once.
Expand Down