From fdb6393c173c9a5903c9f529d467d3f891724b5d Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Wed, 30 Apr 2025 14:23:01 +0200 Subject: [PATCH 1/5] feat: support origin detection Origin Detection allows to detect where the contaienr traces come from, and add container tags automatically to the local root span. --- CMakePresets.json | 10 ++ examples/http-server/Dockerfile | 2 +- examples/http-server/docker-compose.yaml | 1 + include/datadog/datadog_agent_config.h | 3 + include/datadog/environment.h | 3 +- src/datadog/datadog_agent.cpp | 44 ++++++-- src/datadog/datadog_agent.h | 3 +- src/datadog/datadog_agent_config.cpp | 7 ++ src/datadog/platform_util.cpp | 131 ++++++++++++++++++++++- src/datadog/platform_util.h | 28 +++++ test/CMakeLists.txt | 1 + test/test_datadog_agent.cpp | 6 +- test/test_platform_util.cpp | 66 ++++++++++++ test/test_tracer_config.cpp | 13 +++ 14 files changed, 300 insertions(+), 18 deletions(-) create mode 100644 test/test_platform_util.cpp diff --git a/CMakePresets.json b/CMakePresets.json index 0c2a91a2..6ebca408 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -19,6 +19,16 @@ "cacheVariables": { "CMAKE_POLICY_VERSION_MINIMUM": "3.5" } + }, + { + "name": "dev", + "displayName": "Development", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "DD_TRACE_ENABLE_SANITIZE": "ON", + "DD_TRACE_BUILD_TESTING": "ON", + "DD_TRACE_BUILD_EXAMPLES": "ON" + } } ] } diff --git a/examples/http-server/Dockerfile b/examples/http-server/Dockerfile index b2e16c77..1e1181f6 100644 --- a/examples/http-server/Dockerfile +++ b/examples/http-server/Dockerfile @@ -3,7 +3,7 @@ from ubuntu:22.04 WORKDIR /dd-trace-cpp ARG DEBIAN_FRONTEND=noninteractive -ARG BRANCH=v0.2.1 +ARG BRANCH=v1.0.0 run apt update -y \ && apt install -y g++ make git wget sed \ diff --git a/examples/http-server/docker-compose.yaml b/examples/http-server/docker-compose.yaml index ac8a1908..bc9144ef 100644 --- a/examples/http-server/docker-compose.yaml +++ b/examples/http-server/docker-compose.yaml @@ -53,3 +53,4 @@ services: - DD_APM_ENABLED=true - DD_LOG_LEVEL=ERROR - DOCKER_HOST + - DD_SITE diff --git a/include/datadog/datadog_agent_config.h b/include/datadog/datadog_agent_config.h index 0c05d643..a8a0742b 100644 --- a/include/datadog/datadog_agent_config.h +++ b/include/datadog/datadog_agent_config.h @@ -87,6 +87,9 @@ class FinalizedDatadogAgentConfig { std::chrono::steady_clock::duration shutdown_timeout; std::chrono::steady_clock::duration remote_configuration_poll_interval; std::unordered_map metadata; + + // Origin detection + Optional admission_controller_uid; }; Expected finalize_config( diff --git a/include/datadog/environment.h b/include/datadog/environment.h index 266737dc..65d445f2 100644 --- a/include/datadog/environment.h +++ b/include/datadog/environment.h @@ -59,7 +59,8 @@ namespace environment { MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \ MACRO(DD_INSTRUMENTATION_INSTALL_ID) \ MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \ - MACRO(DD_INSTRUMENTATION_INSTALL_TIME) + MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \ + MACRO(DD_EXTERNAL_ENV) #define WITH_COMMA(ARG) ARG, diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index e3fb611e..9958b508 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -17,6 +17,7 @@ #include "collector_response.h" #include "json.hpp" #include "msgpack.h" +#include "platform_util.h" #include "span_data.h" #include "telemetry_metrics.h" #include "trace_sampler.h" @@ -156,10 +157,36 @@ DatadogAgent::DatadogAgent( flush_interval_(config.flush_interval), request_timeout_(config.request_timeout), shutdown_timeout_(config.shutdown_timeout), - remote_config_(tracer_signature, rc_listeners, logger), - tracer_signature_(tracer_signature) { + remote_config_(tracer_signature, rc_listeners, logger) { assert(logger_); + // Set HTTP headers + headers_.emplace("Content-Type", "application/msgpack"); + headers_.emplace("Datadog-Meta-Lang", "cpp"); + headers_.emplace("Datadog-Meta-Lang-Version", + tracer_signature.library_language_version); + headers_.emplace("Datadog-Meta-Tracer-Version", + tracer_signature.library_version); + + // Origin Detection headers are not necessary when Unix Domain Socket (UDS) + // is used to communicate with the Datadog Agent. + if (!contains(config.url.scheme, "unix")) { + if (auto container_id = container::get_id()) { + if (container_id->type == container::ContainerID::Type::container_id) { + headers_.emplace("Datadog-Container-ID", container_id->value); + headers_.emplace("Datadog-Entity-Id", "ci-" + container_id->value); + } else if (container_id->type == + container::ContainerID::Type::cgroup_inode) { + headers_.emplace("Datadog-Entity-Id", "in-" + container_id->value); + } + } + + if (config.admission_controller_uid) { + headers_.emplace("Datadog-External-Env", + *config.admission_controller_uid); + } + } + tasks_.emplace_back(event_scheduler_->schedule_recurring_event( config.flush_interval, [this]() { flush(); })); @@ -236,14 +263,11 @@ void DatadogAgent::flush() { // This is the callback for setting request headers. // It's invoked synchronously (before `post` returns). - auto set_request_headers = [&](DictWriter& headers) { - headers.set("Content-Type", "application/msgpack"); - headers.set("Datadog-Meta-Lang", "cpp"); - headers.set("Datadog-Meta-Lang-Version", - tracer_signature_.library_language_version); - headers.set("Datadog-Meta-Tracer-Version", - tracer_signature_.library_version); - headers.set("X-Datadog-Trace-Count", std::to_string(trace_chunks.size())); + auto set_request_headers = [&](DictWriter& writer) { + writer.set("X-Datadog-Trace-Count", std::to_string(trace_chunks.size())); + for (const auto& [key, value] : headers_) { + writer.set(key, value); + } }; // This is the callback for the HTTP response. It's invoked diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index 16e48da1..fe016c46 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -49,7 +49,8 @@ class DatadogAgent : public Collector { std::chrono::steady_clock::duration shutdown_timeout_; remote_config::Manager remote_config_; - TracerSignature tracer_signature_; + + std::unordered_map headers_; void flush(); diff --git a/src/datadog/datadog_agent_config.cpp b/src/datadog/datadog_agent_config.cpp index f7cdd646..9ffe6217 100644 --- a/src/datadog/datadog_agent_config.cpp +++ b/src/datadog/datadog_agent_config.cpp @@ -7,6 +7,7 @@ #include "default_http_client.h" #include "parse_util.h" +#include "platform_util.h" #include "threaded_event_scheduler.h" namespace datadog { @@ -144,6 +145,12 @@ Expected finalize_config( result.metadata[ConfigName::AGENT_URL] = ConfigMetadata(ConfigName::AGENT_URL, url, origin); + /// Starting Agent X, the admission controller inject a unique identifier + /// through `DD_EXTERNAL_ENV`. This uid is used for origin detection. + if (auto external_env = lookup(environment::DD_EXTERNAL_ENV)) { + result.admission_controller_uid = std::string(*external_env); + } + return result; } diff --git a/src/datadog/platform_util.cpp b/src/datadog/platform_util.cpp index 169094f5..ad643e1f 100644 --- a/src/datadog/platform_util.cpp +++ b/src/datadog/platform_util.cpp @@ -1,5 +1,7 @@ #include "platform_util.h" +#include + // clang-format off #if defined(__x86_64__) || defined(_M_X64) # define DD_SDK_CPU_ARCH "x86_64" @@ -24,11 +26,13 @@ # define DD_SDK_OS "GNU/Linux" # define DD_SDK_KERNEL "Linux" # include "string_util.h" +# include # include +# include # include # include -# include -# include +# include +# include # endif #elif defined(_MSC_VER) # include @@ -102,7 +106,7 @@ std::tuple get_windows_info() { // application manifest, which is the lowest version supported by the // application. Use `RtlGetVersion` to obtain the accurate OS version // regardless of the manifest. - using RtlGetVersion = auto(*)(LPOSVERSIONINFOEXW)->NTSTATUS; + using RtlGetVersion = auto (*)(LPOSVERSIONINFOEXW)->NTSTATUS; RtlGetVersion func = (RtlGetVersion)GetProcAddress(GetModuleHandleA("ntdll"), "RtlGetVersion"); @@ -281,5 +285,126 @@ Expected InMemoryFile::make(StringView) { } #endif +namespace container { +namespace { +#if defined(__linux__) || defined(__unix__) +/// Magic numbers from linux/magic.h: +/// +constexpr uint64_t CGROUP_SUPER_MAGIC = 0x27e0eb; +constexpr uint64_t CGROUP2_SUPER_MAGIC = 0x63677270; + +/// Magic number from linux/proc_ns.h: +/// +constexpr ino_t HOST_CGROUP_NAMESPACE_INODE = 0xeffffffb; + +/// Represents the cgroup version of the current process. +enum class Cgroup : char { v1, v2 }; + +Optional get_inode(std::string_view path) { + struct stat buf; + if (stat(path.data(), &buf) != 0) { + return nullopt; + } + + return buf.st_ino; +} + +// Host namespace inode number are hardcoded, which allows for dectection of +// whether the binary is running in host or not. However, it does not work when +// running in a Docker in Docker environment. +bool is_running_in_host_namespace() { + if (auto inode = get_inode("/proc/self/ns/cgroup")) { + return *inode == HOST_CGROUP_NAMESPACE_INODE; + } + + return false; +} + +Optional get_cgroup_version() { + struct statfs buf; + + if (statfs("/sys/fs/cgroup", &buf) != 0) { + return nullopt; + } + + if (buf.f_type == CGROUP_SUPER_MAGIC) + return Cgroup::v1; + else if (buf.f_type == CGROUP2_SUPER_MAGIC) + return Cgroup::v2; + + return nullopt; +} + +Optional find_docker_container_id_from_cgroup() { + constexpr std::string_view cgroup_path = "/proc/self/cgroup"; + + auto cgroup_fd = std::ifstream(cgroup_path.data(), std::ios::in); + if (!cgroup_fd.is_open()) return nullopt; + + return find_docker_container_id(cgroup_fd); +} +#endif +} // namespace + +Optional find_docker_container_id(std::istream& source) { + constexpr std::string_view docker_str = "docker-"; + + std::string line; + while (std::getline(source, line)) { + // Example: + // `0::/system.slice/docker-abcdef0123456789abcdef0123456789.scope` + if (auto beg = line.find(docker_str); beg != std::string::npos) { + beg += docker_str.size(); + auto end = line.find(".scope", beg); + if (end == std::string::npos || end - beg <= 0) { + continue; + } + + auto container_id = line.substr(beg, end - beg); + return container_id; + } + } + + return nullopt; +} + +Optional get_id() { +#if defined(__linux__) || defined(__unix__) + if (is_running_in_host_namespace()) { + // Not in a container, no need to continue. + return nullopt; + } + + auto maybe_cgroup = get_cgroup_version(); + if (!maybe_cgroup) return nullopt; + + ContainerID id; + switch (*maybe_cgroup) { + case Cgroup::v1: { + if (auto maybe_id = find_docker_container_id_from_cgroup()) { + id.value = *maybe_id; + id.type = ContainerID::Type::container_id; + break; + } + } + // NOTE(@dmehala): failed to find the container ID, try getting the cgroup + // inode. + [[fallthrough]]; + case Cgroup::v2: { + if (auto maybe_inode = get_inode("/sys/fs/cgroup")) { + id.type = ContainerID::Type::cgroup_inode; + id.value = std::to_string(*maybe_inode); + } + }; break; + } + + return id; +#else + return nullopt; +#endif +} + +} // namespace container + } // namespace tracing } // namespace datadog diff --git a/src/datadog/platform_util.h b/src/datadog/platform_util.h index 5b0acb7a..b5739345 100644 --- a/src/datadog/platform_util.h +++ b/src/datadog/platform_util.h @@ -72,5 +72,33 @@ std::string get_process_name(); int at_fork_in_child(void (*on_fork)()); +namespace container { + +struct ContainerID final { + /// Type of unique ID. + enum class Type : char { container_id, cgroup_inode } type; + /// Identifier of the container. It _mostly_ depends on the + /// cgroup version: + /// - For cgroup v1, it contains the container ID. + /// - For cgroup v2, it contains the "container" inode. + std::string value; +}; + +/// Find the docker container ID from a given source. +/// This function is exposed mainly for testing purposes. +/// +/// @param source The input from which to read the Docker container ID. +/// @return An Optional containing the Docker container ID if found, otherwise +/// nothing. +Optional find_docker_container_id(std::istream& source); + +/// Function to retrieve the container metadata. +/// +/// @return A `ContainerMetadata` object containing metadata of the container in +/// which the current process is running. +Optional get_id(); + +} // namespace container + } // namespace tracing } // namespace datadog diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecc6aede..60f83671 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable(tests test_glob.cpp test_limiter.cpp test_msgpack.cpp + test_platform_util.cpp test_parse_util.cpp test_smoke.cpp test_span.cpp diff --git a/test/test_datadog_agent.cpp b/test/test_datadog_agent.cpp index 3a9bfc7f..d07fb0fb 100644 --- a/test/test_datadog_agent.cpp +++ b/test/test_datadog_agent.cpp @@ -18,7 +18,9 @@ using namespace datadog; using namespace datadog::tracing; using namespace std::chrono_literals; -TEST_CASE("CollectorResponse", "[datadog_agent]") { +#define DATADOG_AGENT_TEST(x) TEST_CASE(x, "[datadog_agent]") + +DATADOG_AGENT_TEST("CollectorResponse") { TracerConfig config; config.service = "testsvc"; const auto logger = @@ -180,7 +182,7 @@ TEST_CASE("CollectorResponse", "[datadog_agent]") { // - telemetry is disabled, no event scheduled. // - telemetry is enabled, after x sec generate metrics is called. // - send_app_started? -TEST_CASE("Remote Configuration", "[datadog_agent]") { +DATADOG_AGENT_TEST("Remote Configuration") { const auto logger = std::make_shared(std::cerr, MockLogger::ERRORS_ONLY); logger->echo = nullptr; diff --git a/test/test_platform_util.cpp b/test/test_platform_util.cpp new file mode 100644 index 00000000..e333e04e --- /dev/null +++ b/test/test_platform_util.cpp @@ -0,0 +1,66 @@ +#include + +#include "platform_util.h" +#include "test.h" + +using namespace datadog::tracing; + +#define PLATFORM_UTIL_TEST(x) TEST_CASE(x, "[platform_util]") + +PLATFORM_UTIL_TEST("find docker container ID") { + struct TestCase { + size_t line; + std::string_view name; + std::string input; + Optional expected_container_id; + }; + + auto test_case = GENERATE(values({ + {__LINE__, "empty inputs", "", nullopt}, + {__LINE__, "no docker container ID", "coucou", nullopt}, + {__LINE__, "one line with docker container ID", + "0::/system.slice/docker-abcdef0123456789abcdef0123456789.scope", + "abcdef0123456789abcdef0123456789"}, + {__LINE__, "multiline wihtout docker container ID", R"( +0::/ +10:memory:/user.slice/user-0.slice/session-14.scope +9:hugetlb:/ +8:cpuset:/ +7:pids:/user.slice/user-0.slice/session-14.scope +6:freezer:/ +5:net_cls,net_prio:/ +4:perf_event:/ +3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope +2:devices:/user.slice/user-0.slice/session-14.scope +1:name=systemd:/user.slice/user-0.slice/session-14.scope +)", + nullopt}, + {__LINE__, "multiline with docker container ID", R"( +11:blkio:/user.slice/user-0.slice/session-14.scope +10:memory:/user.slice/user-0.slice/session-14.scope +9:hugetlb:/ +8:cpuset:/ +7:pids:/user.slice/user-0.slice/session-14.scope +3:cpu:/system.slice/docker-abcdef0123456789abcdef0123456789.scope +6:freezer:/ +5:net_cls,net_prio:/ +4:perf_event:/ +3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope +2:devices:/user.slice/user-0.slice/session-14.scope +1:name=systemd:/user.slice/user-0.slice/session-14.scope + )", + "abcdef0123456789abcdef0123456789"}, + })); + + CAPTURE(test_case.name); + + std::istringstream is(test_case.input); + + auto maybe_container_id = container::find_docker_container_id(is); + if (test_case.expected_container_id.has_value()) { + REQUIRE(maybe_container_id.has_value()); + CHECK(*maybe_container_id == *test_case.expected_container_id); + } else { + CHECK(!maybe_container_id.has_value()); + } +} diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index 197ab9df..4fc4b488 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -529,6 +529,19 @@ TRACER_CONFIG_TEST("TracerConfig::agent") { REQUIRE(agent->url.authority == test_case.expected_authority); } } + + SECTION("Admission Controller UID") { + const EnvGuard env_guard{"DD_EXTERNAL_ENV", + "c8e4eba8-3287-4cc2-ae1a-30e14be6e470"}; + auto finalized = finalize_config(config); + REQUIRE(finalized); + const auto* const agent = + std::get_if(&finalized->collector); + REQUIRE(agent); + REQUIRE(agent->admission_controller_uid); + CHECK(agent->admission_controller_uid == + "c8e4eba8-3287-4cc2-ae1a-30e14be6e470"); + } } TRACER_CONFIG_TEST("TracerConfig::trace_sampler") { From 6274290ed03ff92dfd7e07936f895586334689ca Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Mon, 12 May 2025 15:54:05 +0200 Subject: [PATCH 2/5] fix compilation --- src/datadog/platform_util.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/datadog/platform_util.cpp b/src/datadog/platform_util.cpp index ad643e1f..7c4f1349 100644 --- a/src/datadog/platform_util.cpp +++ b/src/datadog/platform_util.cpp @@ -1,5 +1,6 @@ #include "platform_util.h" +#include #include // clang-format off @@ -106,7 +107,7 @@ std::tuple get_windows_info() { // application manifest, which is the lowest version supported by the // application. Use `RtlGetVersion` to obtain the accurate OS version // regardless of the manifest. - using RtlGetVersion = auto (*)(LPOSVERSIONINFOEXW)->NTSTATUS; + using RtlGetVersion = auto(*)(LPOSVERSIONINFOEXW)->NTSTATUS; RtlGetVersion func = (RtlGetVersion)GetProcAddress(GetModuleHandleA("ntdll"), "RtlGetVersion"); From e1463ae4c7a5c946ef3e32f71b818a8cfde69452 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 13 May 2025 11:23:45 +0200 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Louis Tricot <75956635+dubloom@users.noreply.github.com> --- src/datadog/platform_util.cpp | 3 ++- src/datadog/platform_util.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/datadog/platform_util.cpp b/src/datadog/platform_util.cpp index 7c4f1349..199ae179 100644 --- a/src/datadog/platform_util.cpp +++ b/src/datadog/platform_util.cpp @@ -314,6 +314,7 @@ Optional get_inode(std::string_view path) { // whether the binary is running in host or not. However, it does not work when // running in a Docker in Docker environment. bool is_running_in_host_namespace() { + // linux procfs file that represents the cgroup namespace of the current process if (auto inode = get_inode("/proc/self/ns/cgroup")) { return *inode == HOST_CGROUP_NAMESPACE_INODE; } @@ -339,7 +340,7 @@ Optional get_cgroup_version() { Optional find_docker_container_id_from_cgroup() { constexpr std::string_view cgroup_path = "/proc/self/cgroup"; - auto cgroup_fd = std::ifstream(cgroup_path.data(), std::ios::in); + auto cgroup_fd = std::ifstream("/proc/self/cgroup", std::ios::in); if (!cgroup_fd.is_open()) return nullopt; return find_docker_container_id(cgroup_fd); diff --git a/src/datadog/platform_util.h b/src/datadog/platform_util.h index b5739345..d7fd0c3b 100644 --- a/src/datadog/platform_util.h +++ b/src/datadog/platform_util.h @@ -94,7 +94,7 @@ Optional find_docker_container_id(std::istream& source); /// Function to retrieve the container metadata. /// -/// @return A `ContainerMetadata` object containing metadata of the container in +/// @return A `ContainerID` object containing id of the container in /// which the current process is running. Optional get_id(); From e48984d0ec472ea94e6e61c5ec9d1a47e140a924 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 13 May 2025 11:24:40 +0200 Subject: [PATCH 4/5] Apply suggestions from code review --- src/datadog/platform_util.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/datadog/platform_util.cpp b/src/datadog/platform_util.cpp index 199ae179..4eede80d 100644 --- a/src/datadog/platform_util.cpp +++ b/src/datadog/platform_util.cpp @@ -338,8 +338,6 @@ Optional get_cgroup_version() { } Optional find_docker_container_id_from_cgroup() { - constexpr std::string_view cgroup_path = "/proc/self/cgroup"; - auto cgroup_fd = std::ifstream("/proc/self/cgroup", std::ios::in); if (!cgroup_fd.is_open()) return nullopt; From 29f6a506f924f3058966158495b1812a3769c5c2 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 13 May 2025 11:54:22 +0200 Subject: [PATCH 5/5] fix formatting --- src/datadog/platform_util.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/datadog/platform_util.cpp b/src/datadog/platform_util.cpp index 4eede80d..381a3aba 100644 --- a/src/datadog/platform_util.cpp +++ b/src/datadog/platform_util.cpp @@ -314,7 +314,8 @@ Optional get_inode(std::string_view path) { // whether the binary is running in host or not. However, it does not work when // running in a Docker in Docker environment. bool is_running_in_host_namespace() { - // linux procfs file that represents the cgroup namespace of the current process + // linux procfs file that represents the cgroup namespace of the current + // process. if (auto inode = get_inode("/proc/self/ns/cgroup")) { return *inode == HOST_CGROUP_NAMESPACE_INODE; }