Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ find_package(absl REQUIRED)

# libdatadog_profiling
include(Findlibdatadog)
# watcher_sample_types.hpp includes <datadog/common.h>, so all targets need this.
include_directories(${Datadog_INCLUDE_DIR})

# Event Parser
add_subdirectory(src/event_parser)
Expand Down
21 changes: 15 additions & 6 deletions cmake/Findlibdatadog.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,24 @@

# libdatadog : common profiler imported libraries https://github.com/DataDog/libdatadog/releases
set(TAG_LIBDATADOG
"v26.0.0"
"v29.0.0"
CACHE STRING "libdatadog github tag")

set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG})
# Override with a local build by passing -DDatadog_LOCAL_ROOT=/path/to/libdatadog
set(Datadog_LOCAL_ROOT
""
CACHE PATH "Path to a local libdatadog build (skips GitHub download)")

message(STATUS "${CMAKE_SOURCE_DIR}/tools/fetch_libddprof.sh ${TAG_LIBDATADOG} ${LIBDATADOG_ROOT}")
execute_process(
COMMAND "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh" ${TAG_LIBDATADOG} ${Datadog_ROOT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
if(Datadog_LOCAL_ROOT)
message(STATUS "Using local libdatadog override: ${Datadog_LOCAL_ROOT}")
set(Datadog_ROOT ${Datadog_LOCAL_ROOT})
else()
set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG})
message(STATUS "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh ${TAG_LIBDATADOG} ${Datadog_ROOT}")
execute_process(
COMMAND "${CMAKE_SOURCE_DIR}/tools/fetch_libdatadog.sh" ${TAG_LIBDATADOG} ${Datadog_ROOT}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
endif()

set(DataDog_DIR "${Datadog_ROOT}/cmake")

Expand Down
79 changes: 27 additions & 52 deletions include/perf_watcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

#include "ddprof_defs.hpp"
#include "event_config.hpp"
#include <string>
#include "watcher_sample_types.hpp"

#include <cstdint>
#include <linux/perf_event.h>
#include <string>

namespace ddprof {

Expand Down Expand Up @@ -55,7 +56,9 @@ struct PerfWatcher {
int64_t sample_period;
uint64_t sample_frequency;
};
int sample_type_id; // index into the sample types defined in this header
WatcherSampleTypes sample_type_info; // pprof types for each aggregation mode
bool
pprof_active; // false = watcher does not contribute to pprof (e.g., sDUM)

EventConfValueSource value_source; // how to normalize the sample value
EventAggregationMode aggregation_mode;
Expand All @@ -76,27 +79,6 @@ struct PerfWatcher {
bool instrument_self; // do my own perf_event_open, etc
};

// The Datadog backend only understands pre-configured event types. Those
// types are defined here, and then referenced in the watcher
// The last column is a dependent type which is always aggregated as a count
// whenever the main type is aggregated.
// type, pprof, unit, live-pprof, sample_type,
// a, b, c, d, e,
#define PROFILE_TYPE_TABLE(X) \
X(NOCOUNT, "nocount", nocount, "undef", NOCOUNT) \
X(TRACEPOINT, "tracepoint", events, "undef", NOCOUNT) \
X(CPU_NANOS, "cpu-time", nanoseconds, "undef", CPU_SAMPLE) \
X(CPU_SAMPLE, "cpu-samples", count, "undef", NOCOUNT) \
X(ALLOC_SAMPLE, "alloc-samples", count, "inuse-objects", NOCOUNT) \
X(ALLOC_SPACE, "alloc-space", bytes, "inuse-space", ALLOC_SAMPLE)

// defines enum of profile types
#define X_ENUM(a, b, c, d, e) DDPROF_PWT_##a,
enum DDPROF_SAMPLE_TYPES : uint8_t {
PROFILE_TYPE_TABLE(X_ENUM) DDPROF_PWT_LENGTH,
};
#undef X_ENUM

// Define our own event type on top of perf event types
enum DDProfTypeId : uint8_t { kDDPROF_TYPE_CUSTOM = PERF_TYPE_MAX + 100 };

Expand Down Expand Up @@ -128,33 +110,33 @@ enum DDProfCustomCountId : uint8_t {
// events are marked as tracepoint unless they represent a well-known profiling
// type!
// clang-format off
// short desc perf event type perf event count type period/freq profile sample type addtl. configs
// short desc perf event type perf event count type period/freq sample types pprof_active addtl. configs
// cppcheck-suppress preprocessorErrorDirective
#define EVENT_CONFIG_TABLE(X) \
X(hCPU, "CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hREF, "Ref. CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hINST, "Instr. Count", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hCREF, "Cache Ref.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hCMISS, "Cache Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBRANCH, "Branche Instr.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBMISS, "Branch Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, 999, DDPROF_PWT_TRACEPOINT, {}) \
X(hBUS, "Bus Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hBSTF, "Bus Stalls(F)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(hBSTB, "Bus Stalls(B)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 1000, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sCPU, "CPU Time", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, 99, DDPROF_PWT_CPU_NANOS, IS_FREQ_TRY_KERNEL) \
X(sPF, "Page Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, 1, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sCS, "Con. Switch", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, 1, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sMig, "CPU Migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sPFMAJ, "Major Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, 99, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sPFMIN, "Minor Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, 99, DDPROF_PWT_TRACEPOINT, USE_KERNEL) \
X(sALGN, "Align. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sEMU, "Emu. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, 99, DDPROF_PWT_TRACEPOINT, IS_FREQ) \
X(sDUM, "Dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY, 1, DDPROF_PWT_NOCOUNT, {}) \
X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, DDPROF_PWT_ALLOC_SPACE, SKIP_FRAMES)
X(hCPU, "CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 99, k_stype_tracepoint, true, IS_FREQ) \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could replace pprof_active with a dummy sample type (similarly to what was done before):

inline constexpr WatcherSampleTypes k_stype_dummy = {
    {k_stype_none,   k_stype_none},
    {k_stype_none, k_stype_none}};

inline constexpr bool is_pprof_active(const WatcherSampleTypes &st) {
  return st.sample_types[0] != k_stype_none;
}

X(hREF, "Ref. CPU Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 1000, k_stype_tracepoint, true, IS_FREQ) \
X(hINST, "Instr. Count", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 1000, k_stype_tracepoint, true, IS_FREQ) \
X(hCREF, "Cache Ref.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, 999, k_stype_tracepoint, true, {}) \
X(hCMISS, "Cache Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, 999, k_stype_tracepoint, true, {}) \
X(hBRANCH, "Branche Instr.", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 999, k_stype_tracepoint, true, {}) \
X(hBMISS, "Branch Miss", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, 999, k_stype_tracepoint, true, {}) \
X(hBUS, "Bus Cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, 1000, k_stype_tracepoint, true, IS_FREQ) \
X(hBSTF, "Bus Stalls(F)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 1000, k_stype_tracepoint, true, IS_FREQ) \
X(hBSTB, "Bus Stalls(B)", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 1000, k_stype_tracepoint, true, IS_FREQ) \
X(sCPU, "CPU Time", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, 99, k_stype_cpu, true, IS_FREQ_TRY_KERNEL) \
X(sPF, "Page Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, 1, k_stype_tracepoint, true, USE_KERNEL) \
X(sCS, "Con. Switch", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, 1, k_stype_tracepoint, true, USE_KERNEL) \
X(sMig, "CPU Migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, 99, k_stype_tracepoint, true, IS_FREQ) \
X(sPFMAJ, "Major Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, 99, k_stype_tracepoint, true, USE_KERNEL) \
X(sPFMIN, "Minor Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, 99, k_stype_tracepoint, true, USE_KERNEL) \
X(sALGN, "Align. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, 99, k_stype_tracepoint, true, IS_FREQ) \
X(sEMU, "Emu. Faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, 99, k_stype_tracepoint, true, IS_FREQ) \
X(sDUM, "Dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY, 1, k_stype_tracepoint, false, {}) \
X(sALLOC, "Allocations", kDDPROF_TYPE_CUSTOM, kDDPROF_COUNT_ALLOCATIONS, 524288, k_stype_alloc, true, SKIP_FRAMES)

// clang-format on

#define X_ENUM(a, b, c, d, e, f, g) DDPROF_PWE_##a,
#define X_ENUM(a, b, c, d, e, f, g, h) DDPROF_PWE_##a,
enum DDPROF_EVENT_NAMES : int8_t {
DDPROF_PWE_TRACEPOINT = -1,
EVENT_CONFIG_TABLE(X_ENUM) DDPROF_PWE_LENGTH,
Expand All @@ -165,16 +147,9 @@ enum DDPROF_EVENT_NAMES : int8_t {
const PerfWatcher *ewatcher_from_idx(int idx);
const PerfWatcher *ewatcher_from_str(const char *str);
const PerfWatcher *tracepoint_default_watcher();
bool watcher_has_countable_sample_type(const PerfWatcher *watcher);
bool watcher_has_tracepoint(const PerfWatcher *watcher);
int watcher_to_count_sample_type_id(const PerfWatcher *watcher);
const char *event_type_name_from_idx(int idx);

// Helper functions for sample types
const char *sample_type_name_from_idx(int idx, EventAggregationModePos pos);
const char *sample_type_unit_from_idx(int idx);
int sample_type_id_to_count_sample_type_id(int idx);

// Helper functions, mostly for tests
uint64_t perf_event_default_sample_type();
void log_watcher(const PerfWatcher *w, int idx);
Expand Down
45 changes: 45 additions & 0 deletions include/watcher_sample_types.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
// Datadog, Inc.

#pragma once

// Maps a watcher's event to pprof sample types for each aggregation mode.
// k_stype_none signals "no sample/count type for this aggregation mode".
// Fields are uint32_t (not the enum) to allow k_stype_none = UINT32_MAX,
// which lies outside the valid enum range.

#include "event_config.hpp"

#include <cstdint>
#include <datadog/common.h>

namespace ddprof {

struct WatcherSampleTypes {
uint32_t sample_types[kNbEventAggregationModes]; // [kSumPos, kLiveSumPos]
uint32_t count_types[kNbEventAggregationModes]; // companion counts
};

// Sentinel: slot is unused for this aggregation mode.
inline constexpr uint32_t k_stype_none = UINT32_MAX;

// Tracepoints: one event = one sample, no count companion, no live mode.
// clang-format off
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nsavoire this part looks OK ? wdyt ?

inline constexpr WatcherSampleTypes k_stype_tracepoint = {
{DDOG_PROF_SAMPLE_TYPE_TRACEPOINT, k_stype_none},
{k_stype_none, k_stype_none}};

// CPU: nanoseconds in sum mode only — no live profile for CPU.
inline constexpr WatcherSampleTypes k_stype_cpu = {
{DDOG_PROF_SAMPLE_TYPE_CPU_TIME, k_stype_none},
{DDOG_PROF_SAMPLE_TYPE_CPU_SAMPLES, k_stype_none}};

// Allocation: bytes allocated / live bytes, with object-count companions.
inline constexpr WatcherSampleTypes k_stype_alloc = {
{DDOG_PROF_SAMPLE_TYPE_ALLOC_SPACE, DDOG_PROF_SAMPLE_TYPE_INUSE_SPACE},
{DDOG_PROF_SAMPLE_TYPE_ALLOC_SAMPLES, DDOG_PROF_SAMPLE_TYPE_INUSE_OBJECTS}};
// clang-format on

} // namespace ddprof
11 changes: 8 additions & 3 deletions src/exporter/ddprof_exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ DDRes create_pprof_file(ddog_Timespec start, const char *dbg_pprof_prefix,
strftime(time_start, std::size(time_start), "%Y%m%dT%H%M%SZ", tm_start);

char filename[PATH_MAX];
snprintf(filename, std::size(filename), "%s%s.pprof.lz4", dbg_pprof_prefix,
snprintf(filename, std::size(filename), "%s%s.pprof.zst", dbg_pprof_prefix,
time_start);
LG_NTC("[EXPORTER] Writing pprof to file %s", filename);
constexpr int read_write_user_only = 0600;
Expand Down Expand Up @@ -261,12 +261,17 @@ DDRes ddprof_exporter_new(const UserTags *user_tags, DDProfExporter *exporter) {
fill_stable_tags(user_tags, exporter, tags_exporter);

ddog_CharSlice const base_url = to_CharSlice(exporter->_url);
// ddprof is an out-of-process profiler and does not fork during export,
// so the system DNS resolver (/etc/resolv.conf) is safe and preferred.
constexpr bool k_use_system_resolver = true;
ddog_prof_Endpoint endpoint;
if (exporter->_agent) {
endpoint = ddog_prof_Endpoint_agent(base_url, k_timeout_ms);
endpoint =
ddog_prof_Endpoint_agent(base_url, k_timeout_ms, k_use_system_resolver);
} else {
ddog_CharSlice const api_key = to_CharSlice(exporter->_input.api_key);
endpoint = ddog_prof_Endpoint_agentless(base_url, api_key, k_timeout_ms);
endpoint = ddog_prof_Endpoint_agentless(base_url, api_key, k_timeout_ms,
k_use_system_resolver);
}

ddog_prof_ProfileExporter_Result res_exporter = ddog_prof_Exporter_new(
Expand Down
Loading