Skip to content

Commit

Permalink
add linux pressure stall metrics
Browse files Browse the repository at this point in the history
This change adds the following new metrics, which can be used to provide
feedback on where a system is currently constrained. The metrics are collected
for both EC2 instances and Titus containers, with the exception of the full:cpu
metric, which is meaningless on EC2 instances.

* name=sys.pressure.some,id=[cpu|io|memory] counter unit=seconds/second
* name=sys.pressure.full,id=[cpu|io|memory] counter unit=seconds/second

The `some` pressure stall category represents the amount of time that SOME
tasks on the system are unable to make progress due to a resource constraint.

The `full` pressure stall category represents the amount of time that ALL
runnable tasks are unable to make progress due to a resource constraint.

For example, the some:cpu category represents the case where a task is sitting
in the CPU run queue, ready to run, but it cannot make forward progress because
all CPUs are busy.

The io:full category represents a situation where every runnable task on the
system is unable to make progress because they are all stalled on IO.

The counters report the `total` field for each pressure stall category, which
represents the total number of seconds/second that a system is in a given stall
state.
  • Loading branch information
copperlight committed Feb 22, 2024
1 parent 71d1e82 commit 2dd921d
Show file tree
Hide file tree
Showing 25 changed files with 246 additions and 21 deletions.
11 changes: 8 additions & 3 deletions bin/atlas-agent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "ntp.h"
#include "nvml.h"
#include "perfmetrics.h"
#include "pressure_stall.h"
#include "proc.h"
#include "tagger.h"
#include "backward.hpp"
Expand All @@ -34,6 +35,7 @@ using Ethtool = atlasagent::Ethtool<>;
using GpuMetrics = atlasagent::GpuMetrics<TaggingRegistry, Nvml>;
using Ntp = atlasagent::Ntp<>;
using PerfMetrics = atlasagent::PerfMetrics<>;
using PressureStall = atlasagent::PressureStall<>;
using Proc = atlasagent::Proc<>;

#if defined(__linux__) && defined(CGROUP2_SUPER_MAGIC)
Expand Down Expand Up @@ -98,12 +100,14 @@ static void gather_peak_system_metrics(Proc* proc) { proc->peak_cpu_stats(); }

static void gather_scaling_metrics(CpuFreq* cpufreq) { cpufreq->Stats(); }

static void gather_slow_system_metrics(Proc* proc, Disk* disk, Ethtool* ethtool, Ntp* ntp, Aws* aws) {
static void gather_slow_system_metrics(Proc* proc, Disk* disk, Ethtool* ethtool, Ntp* ntp,
PressureStall* pressureStall, Aws* aws) {
Logger()->info("Gathering system metrics");
aws->update_stats();
disk->disk_stats();
ethtool->update_stats();
ntp->update_stats();
pressureStall->update_stats();
proc->arp_stats();
proc->cpu_stats();
proc->loadavg_stats();
Expand Down Expand Up @@ -223,13 +227,14 @@ void collect_system_metrics(TaggingRegistry* registry, std::unique_ptr<atlasagen
Ethtool ethtool{registry, net_tags};
Ntp ntp{registry};
PerfMetrics perf_metrics{registry, ""};
PressureStall pressureStall{registry};
Proc proc{registry, net_tags};

auto gpu = init_gpu(registry, std::move(nvidia_lib));

// the first call to this gather function takes >1 second, so it must
// be done before we start calculating times to wait for peak metrics
gather_slow_system_metrics(&proc, &disk, &ethtool, &ntp, &aws);
gather_slow_system_metrics(&proc, &disk, &ethtool, &ntp, &pressureStall, &aws);

auto now = system_clock::now();
auto next_run = now;
Expand All @@ -240,7 +245,7 @@ void collect_system_metrics(TaggingRegistry* registry, std::unique_ptr<atlasagen
gather_peak_system_metrics(&proc);
gather_scaling_metrics(&cpufreq);
if (system_clock::now() >= next_slow_run) {
gather_slow_system_metrics(&proc, &disk, &ethtool, &ntp, &aws);
gather_slow_system_metrics(&proc, &disk, &ethtool, &ntp, &pressureStall, &aws);
perf_metrics.collect();
if (gpu) {
gpu->gpu_metrics();
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ add_library(sysagent
"nvml.cc"
"nvml.h"
"perfmetrics.h"
"pressure_stall.h"
"proc.h"
"sample_config.cc"
"tagger.h"
Expand Down
1 change: 1 addition & 0 deletions lib/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class CGroup {
void memory_stats_std_v1() noexcept;
void memory_stats_std_v2() noexcept;
void network_stats() noexcept;
void pressure_stall() noexcept;
void set_prefix(std::string new_prefix) noexcept { path_prefix_ = std::move(new_prefix); }

private:
Expand Down
22 changes: 22 additions & 0 deletions lib/cgroup_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,28 @@ TEST(CGroup, Net) {
EXPECT_EQ(map["cgroup.net.bandwidthBytes|gauge"], megabits2bytes(128));
}

TEST(CGroup, PressureStall) {
Registry registry;
CGroupTest cGroup{&registry, "testdata/resources", absl::Seconds(30)};

cGroup.pressure_stall();
auto initial = my_measurements(&registry);
auto initial_map = measurements_to_map(initial, "");
EXPECT_EQ(initial_map.size(), 0);

cGroup.set_prefix("testdata/resources2");
cGroup.pressure_stall();
const auto& ms = my_measurements(&registry);
measurement_map map = measurements_to_map(ms, "");
expect_value(&map, "sys.pressure.some|count|cpu", 10);
expect_value(&map, "sys.pressure.some|count|io", 10);
expect_value(&map, "sys.pressure.some|count|memory", 10);
expect_value(&map, "sys.pressure.full|count|cpu", 20);
expect_value(&map, "sys.pressure.full|count|io", 20);
expect_value(&map, "sys.pressure.full|count|memory", 20);
EXPECT_TRUE(map.empty());
}

TEST(CGroup, ParseCpuV1) {
Registry registry;
CGroupTest cGroup{&registry, "testdata/resources", absl::Seconds(30)};
Expand Down
10 changes: 5 additions & 5 deletions lib/ethtool_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ using atlasagent::Ethtool;
using Registry = spectator::TestRegistry;
using spectator::Tags;

class CT : public Ethtool<Registry> {
class EthtoolTest : public Ethtool<Registry> {
public:
explicit CT(Registry* registry) : Ethtool{registry, Tags{}} {}
explicit EthtoolTest(Registry* registry) : Ethtool{registry, Tags{}} {}

void stats(const std::vector<std::string>& nic_stats, const char* iface) noexcept {
Ethtool::ethtool_stats(nic_stats, iface);
Expand All @@ -25,7 +25,7 @@ class CT : public Ethtool<Registry> {

TEST(Ethtool, Stats) {
Registry registry;
CT ethtool{&registry};
EthtoolTest ethtool{&registry};

std::vector<std::string> first_sample = {
"NIC statistics:\n",
Expand Down Expand Up @@ -76,7 +76,7 @@ TEST(Ethtool, Stats) {

TEST(Ethtool, StatsEmpty) {
Registry registry;
CT ethtool{&registry};
EthtoolTest ethtool{&registry};

ethtool.stats({}, "");
auto ms = registry.Measurements();
Expand All @@ -89,7 +89,7 @@ TEST(Ethtool, StatsEmpty) {

TEST(Ethtool, EnumerateInterfaces) {
Registry registry;
CT ethtool{&registry};
EthtoolTest ethtool{&registry};

std::vector<std::string> ip_links = {
"1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000\n",
Expand Down
46 changes: 43 additions & 3 deletions lib/internal/cgroup.inc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@

namespace atlasagent {

using spectator::Id;
using spectator::Tags;

constexpr auto MICROS = 1000 * 1000.0;
constexpr auto NANOS = 1000 * 1000 * 1000.0;

template <typename Reg>
void CGroup<Reg>::network_stats() noexcept {
auto megabits = std::getenv("TITUS_NUM_NETWORK_BANDWIDTH");
Expand All @@ -18,6 +24,43 @@ void CGroup<Reg>::network_stats() noexcept {
}
}

template <typename Reg>
void CGroup<Reg>::pressure_stall() noexcept {
auto lines = read_lines_fields(path_prefix_, "cpu.pressure");

if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "cpu"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);

auto full = registry_->GetMonotonicCounter(Id::of("sys.pressure.full", Tags{{"id", "cpu"}}));
usecs = std::strtoul(lines[1][4].substr(6).c_str(), nullptr, 10);
full->Set(usecs / MICROS);
}

lines = read_lines_fields(path_prefix_, "io.pressure");
if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "io"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);

auto full = registry_->GetMonotonicCounter(Id::of("sys.pressure.full", Tags{{"id", "io"}}));
usecs = std::strtoul(lines[1][4].substr(6).c_str(), nullptr, 10);
full->Set(usecs / MICROS);
}

lines = read_lines_fields(path_prefix_, "memory.pressure");
if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "memory"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);

auto full = registry_->GetMonotonicCounter(Id::of("sys.pressure.full", Tags{{"id", "memory"}}));
usecs = std::strtoul(lines[1][4].substr(6).c_str(), nullptr, 10);
full->Set(usecs / MICROS);
}
}

template <typename Reg>
void CGroup<Reg>::cpu_shares_v1(absl::Time now) noexcept {
static absl::Time last_updated;
Expand Down Expand Up @@ -93,9 +136,6 @@ void CGroup<Reg>::cpu_shares_v2(absl::Time now) noexcept {
}
}

constexpr auto MICROS = 1000 * 1000.0;
constexpr auto NANOS = 1000 * 1000 * 1000.0;

template <typename Reg>
void CGroup<Reg>::cpu_processing_time_v1() noexcept {
auto time_nanos = read_num_from_file(path_prefix_, "cpuacct/cpuacct.usage");
Expand Down
18 changes: 9 additions & 9 deletions lib/ntp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,23 @@ struct TestClock {
}
};

class CT : public Ntp<Registry, TestClock> {
class NtpTest : public Ntp<Registry, TestClock> {
public:
explicit CT(Registry* registry) : Ntp{registry} {}
explicit NtpTest(Registry* registry) : Ntp{registry} {}
void stats(const std::string& tracking, const std::vector<std::string>& sources) noexcept {
Ntp::chrony_stats(tracking, sources);
}
void ntp(int err, timex* time) { Ntp::ntp_stats(err, time); }
[[nodiscard]] absl::Time lastSample() const { return lastSampleTime_; }
};

double get_default_sample_age(const CT& ntp) {
double get_default_sample_age(const NtpTest& ntp) {
return absl::ToDoubleSeconds(TestClock::now() - ntp.lastSample());
}

TEST(Ntp, Stats) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};

std::string tracking =
"A9FEA97B,169.254.169.123,4,1553630752.756016394,0.000042,-0.000048721,"
Expand All @@ -51,7 +51,7 @@ TEST(Ntp, Stats) {

TEST(Ntp, StatsEmpty) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};
ntp.stats("", {});

auto ms = registry.Measurements();
Expand All @@ -60,7 +60,7 @@ TEST(Ntp, StatsEmpty) {

TEST(Ntp, StatsInvalid) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};

std::string tracking =
"A9FEA97B,1.2.3.4,4,1.1,foo,-0.021,1,-2,-0.022,0.079,0.0005,0.0001,775.8,Normal\n";
Expand All @@ -83,7 +83,7 @@ TEST(Ntp, StatsInvalid) {
// (maybe a race between the commands ntpc tracking; ntpc sources)
TEST(Ntp, NoSources) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};

std::string tracking =
"A9FEA97B,1.2.3.4,4,1.1,10,-0.021,1,-2,-0.022,0.079,0.0005,0.0001,775.8,Normal\n";
Expand All @@ -103,7 +103,7 @@ TEST(Ntp, NoSources) {

TEST(Ntp, adjtime) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};

struct timex t {};
t.esterror = 100000;
Expand All @@ -118,7 +118,7 @@ TEST(Ntp, adjtime) {

TEST(Ntp, adjtime_err) {
Registry registry;
CT ntp{&registry};
NtpTest ntp{&registry};

struct timex t {};
t.esterror = 200000;
Expand Down
58 changes: 58 additions & 0 deletions lib/pressure_stall.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#pragma once

#include "absl/strings/str_split.h"
#include "tagging_registry.h"
#include "util.h"

namespace atlasagent {

using spectator::Id;
using spectator::IdPtr;
using spectator::Tags;

template <typename Reg = TaggingRegistry>
class PressureStall {
public:
explicit PressureStall(Reg* registry, std::string path_prefix = "/proc/pressure") noexcept
: registry_(registry),
path_prefix_(std::move(path_prefix)) {}

void set_prefix(std::string new_prefix) noexcept { path_prefix_ = std::move(new_prefix); }

void update_stats() noexcept {
auto lines = read_lines_fields(path_prefix_, "cpu");
if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "cpu"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);
}

lines = read_lines_fields(path_prefix_, "io");
if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "io"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);

auto full = registry_->GetMonotonicCounter(Id::of("sys.pressure.full", Tags{{"id", "io"}}));
usecs = std::strtoul(lines[1][4].substr(6).c_str(), nullptr, 10);
full->Set(usecs / MICROS);
}

lines = read_lines_fields(path_prefix_, "memory");
if (lines.size() == 2) {
auto some = registry_->GetMonotonicCounter(Id::of("sys.pressure.some", Tags{{"id", "memory"}}));
auto usecs = std::strtoul(lines[0][4].substr(6).c_str(), nullptr, 10);
some->Set(usecs / MICROS);

auto full = registry_->GetMonotonicCounter(Id::of("sys.pressure.full", Tags{{"id", "memory"}}));
usecs = std::strtoul(lines[1][4].substr(6).c_str(), nullptr, 10);
full->Set(usecs / MICROS);
}
}

private:
Reg* registry_;
std::string path_prefix_;
static constexpr double MICROS = 1000 * 1000.0;
};
} // namespace atlasagent
45 changes: 45 additions & 0 deletions lib/pressure_stall_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "measurement_utils.h"
#include "pressure_stall.h"
#include <gtest/gtest.h>

namespace {

using atlasagent::Logger;
using atlasagent::PressureStall;
using Registry = spectator::TestRegistry;
using spectator::Tags;

class PressureStallTest : public PressureStall<Registry> {
public:
explicit PressureStallTest(Registry* registry, std::string path_prefix = "/proc/pressure")
: PressureStall{registry, std::move(path_prefix)} {}

void stats() {
PressureStall::update_stats();
}
};

TEST(PressureStall, UpdateStats) {
Registry registry;
PressureStallTest pressure{&registry, "testdata/resources/proc/pressure"};

pressure.stats();
auto ms = registry.Measurements();
EXPECT_EQ(ms.size(), 0);

// we need two samples, because these are all monotonic counters
pressure.set_prefix("testdata/resources/proc2/pressure");
pressure.stats();
ms = registry.Measurements();
EXPECT_EQ(ms.size(), 5);

auto map = measurements_to_map(ms, "");
std::unordered_map<std::string, double> expected = {
{"sys.pressure.some|count|cpu", 10},
{"sys.pressure.some|count|io", 10},
{"sys.pressure.some|count|memory", 10},
{"sys.pressure.full|count|io", 20},
{"sys.pressure.full|count|memory", 20}};
EXPECT_EQ(map, expected);
}
} // namespace
Loading

0 comments on commit 2dd921d

Please sign in to comment.