From 5b68c4dbc4aa9262e6d06e83b9c57d9cf1c070d3 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Mon, 17 Jun 2024 13:18:46 -0400 Subject: [PATCH 01/25] wip: tracing --- bottlecap/Cargo.lock | 526 +++++++++++++++++++-- bottlecap/Cargo.toml | 8 + bottlecap/src/bin/bottlecap/main.rs | 23 + bottlecap/src/config/mod.rs | 4 + bottlecap/src/lib.rs | 1 + bottlecap/src/traces/config.rs | 212 +++++++++ bottlecap/src/traces/env_verifier.rs | 602 ++++++++++++++++++++++++ bottlecap/src/traces/http_utils.rs | 171 +++++++ bottlecap/src/traces/mini_agent.rs | 196 ++++++++ bottlecap/src/traces/mod.rs | 11 + bottlecap/src/traces/stats_flusher.rs | 91 ++++ bottlecap/src/traces/stats_processor.rs | 88 ++++ bottlecap/src/traces/trace_flusher.rs | 68 +++ bottlecap/src/traces/trace_processor.rs | 293 ++++++++++++ 14 files changed, 2257 insertions(+), 37 deletions(-) create mode 100644 bottlecap/src/traces/config.rs create mode 100644 bottlecap/src/traces/env_verifier.rs create mode 100644 bottlecap/src/traces/http_utils.rs create mode 100644 bottlecap/src/traces/mini_agent.rs create mode 100644 bottlecap/src/traces/mod.rs create mode 100644 bottlecap/src/traces/stats_flusher.rs create mode 100644 bottlecap/src/traces/stats_processor.rs create mode 100644 bottlecap/src/traces/trace_flusher.rs create mode 100644 bottlecap/src/traces/trace_processor.rs diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 4cd702b93..92a805e4f 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -53,7 +53,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -138,15 +138,23 @@ dependencies = [ name = "bottlecap" version = "0.1.0" dependencies = [ + "anyhow", + "async-trait", "base64 0.22.1", "chrono", "datadog-protos", + "datadog-trace-normalization", + "datadog-trace-obfuscation", + "datadog-trace-protobuf", + "datadog-trace-utils", + "ddcommon", "ddsketch-agent", "figment", "fnv", "hashbrown 0.14.5", "hex", "hmac", + "hyper 0.14.29", "log", "proptest", "protobuf", @@ -211,6 +219,22 @@ dependencies = [ "serde", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + [[package]] name = "cpufeatures" version = "0.2.12" @@ -220,6 +244,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -236,13 +269,92 @@ version = "0.1.0" source = "git+https://github.com/DataDog/saluki/#0e55b345e6d2a215474147542f5e776d4f593af9" dependencies = [ "bytes", - "prost", + "prost 0.12.6", "protobuf", "protobuf-codegen", "tonic", "tonic-build", ] +[[package]] +name = "datadog-trace-normalization" +version = "10.0.0" +dependencies = [ + "anyhow", + "datadog-trace-protobuf", +] + +[[package]] +name = "datadog-trace-obfuscation" +version = "10.0.0" +dependencies = [ + "anyhow", + "datadog-trace-protobuf", + "datadog-trace-utils", + "ddcommon", + "log", + "percent-encoding", + "regex", + "serde", + "serde_json", + "url", +] + +[[package]] +name = "datadog-trace-protobuf" +version = "10.0.0" +dependencies = [ + "prost 0.11.9", + "serde", + "serde_bytes", +] + +[[package]] +name = "datadog-trace-utils" +version = "10.0.0" +dependencies = [ + "anyhow", + "bytes", + "datadog-trace-normalization", + "datadog-trace-protobuf", + "ddcommon", + "flate2", + "futures", + "hyper 0.14.29", + "hyper-rustls 0.23.2", + "log", + "prost 0.11.9", + "rand", + "rmp-serde", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "ddcommon" +version = "10.0.0" +dependencies = [ + "anyhow", + "futures", + "futures-core", + "futures-util", + "hex", + "http 0.2.12", + "hyper 0.14.29", + "hyper-rustls 0.23.2", + "lazy_static", + "log", + "pin-project", + "regex", + "rustls 0.20.9", + "rustls-native-certs", + "serde", + "static_assertions", + "tokio", + "tokio-rustls 0.23.4", +] + [[package]] name = "ddsketch-agent" version = "0.1.0" @@ -314,6 +426,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "float_eq" version = "1.0.1" @@ -335,6 +457,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -342,6 +479,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -350,6 +488,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -368,10 +534,16 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -530,6 +702,35 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.3.1" @@ -550,6 +751,20 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +dependencies = [ + "http 0.2.12", + "hyper 0.14.29", + "rustls 0.20.9", + "rustls-native-certs", + "tokio", + "tokio-rustls 0.23.4", +] + [[package]] name = "hyper-rustls" version = "0.26.0" @@ -558,12 +773,12 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", "http 1.1.0", - "hyper", + "hyper 1.3.1", "hyper-util", - "rustls", + "rustls 0.22.4", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.25.0", "tower-service", ] @@ -578,7 +793,7 @@ dependencies = [ "futures-util", "http 1.1.0", "http-body 1.0.0", - "hyper", + "hyper 1.3.1", "pin-project-lite", "socket2", "tokio", @@ -629,6 +844,15 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -775,6 +999,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + [[package]] name = "ordered-float" version = "4.2.0" @@ -807,6 +1037,12 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pear" version = "0.2.9" @@ -827,7 +1063,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -863,7 +1099,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -891,7 +1127,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.66", ] [[package]] @@ -911,7 +1147,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "version_check", "yansi", ] @@ -936,6 +1172,16 @@ dependencies = [ "unarray", ] +[[package]] +name = "prost" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +dependencies = [ + "bytes", + "prost-derive 0.11.9", +] + [[package]] name = "prost" version = "0.12.6" @@ -943,7 +1189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.12.6", ] [[package]] @@ -954,19 +1200,32 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.12.1", "log", "multimap", "once_cell", "petgraph", "prettyplease", - "prost", + "prost 0.12.6", "prost-types", "regex", - "syn", + "syn 2.0.66", "tempfile", ] +[[package]] +name = "prost-derive" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +dependencies = [ + "anyhow", + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "prost-derive" version = "0.12.6" @@ -974,10 +1233,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools", + "itertools 0.12.1", "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -986,7 +1245,7 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ - "prost", + "prost 0.12.6", ] [[package]] @@ -1162,8 +1421,8 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.3.1", + "hyper-rustls 0.26.0", "hyper-util", "ipnet", "js-sys", @@ -1172,15 +1431,15 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", - "rustls-pemfile", + "rustls 0.22.4", + "rustls-pemfile 2.1.2", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.25.0", "tower-service", "url", "wasm-bindgen", @@ -1190,6 +1449,21 @@ dependencies = [ "winreg", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.8" @@ -1200,11 +1474,33 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin", - "untrusted", + "spin 0.9.8", + "untrusted 0.9.0", "windows-sys 0.52.0", ] +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -1224,6 +1520,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustls" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" +dependencies = [ + "log", + "ring 0.16.20", + "sct", + "webpki", +] + [[package]] name = "rustls" version = "0.22.4" @@ -1231,13 +1539,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", - "ring", + "ring 0.17.8", "rustls-pki-types", "rustls-webpki", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + [[package]] name = "rustls-pemfile" version = "2.1.2" @@ -1260,9 +1589,9 @@ version = "0.102.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" dependencies = [ - "ring", + "ring 0.17.8", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -1283,12 +1612,54 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring 0.17.8", + "untrusted 0.9.0", +] + +[[package]] +name = "security-framework" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.203" @@ -1298,6 +1669,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.203" @@ -1306,7 +1686,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1390,18 +1770,41 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "subtle" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.66" @@ -1448,7 +1851,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1501,7 +1904,18 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", +] + +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls 0.20.9", + "tokio", + "webpki", ] [[package]] @@ -1510,7 +1924,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ - "rustls", + "rustls 0.22.4", "rustls-pki-types", "tokio", ] @@ -1552,7 +1966,7 @@ dependencies = [ "http-body 0.4.6", "percent-encoding", "pin-project", - "prost", + "prost 0.12.6", "tokio", "tokio-stream", "tower-layer", @@ -1570,7 +1984,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1619,7 +2033,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] @@ -1712,6 +2126,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -1798,7 +2218,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -1832,7 +2252,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1853,6 +2273,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" +dependencies = [ + "ring 0.17.8", + "untrusted 0.9.0", +] + [[package]] name = "webpki-roots" version = "0.26.1" @@ -1874,6 +2304,28 @@ dependencies = [ "rustix", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.48.0" @@ -2046,7 +2498,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.66", ] [[package]] diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index ff98a5c28..bd653c2ec 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -5,12 +5,20 @@ edition = "2021" publish = false [dependencies] +async-trait = "0.1.64" +anyhow = "1.0" chrono = { version = "0.4.38", features = ["serde", "std", "now"], default-features = false} datadog-protos = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } +ddcommon = { path = "../../libdatadog/ddcommon" } +datadog-trace-protobuf = { path = "../../libdatadog/trace-protobuf" } +datadog-trace-utils = { path = "../../libdatadog/trace-utils" } +datadog-trace-normalization = { path = "../../libdatadog/trace-normalization" } +datadog-trace-obfuscation = { path = "../../libdatadog/trace-obfuscation" } figment = { version = "0.10.15", default-features = false, features = ["yaml", "env"] } fnv = { version = "1.0.7", default-features = false } hashbrown = { version = "0.14.3", default-features = false, features = ["inline-more"] } +hyper = { version = "0.14", default-features = false, features = ["server"] } log = { version = "0.4.21", default-features = false } protobuf = { version = "3.4.0", default-features = false } regex = { version = "1.10.4", default-features = false } diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index f5ba32652..8a0643f28 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -53,6 +53,10 @@ use bottlecap::{ events::{Status, TelemetryRecord}, listener::TelemetryListener, }, + traces::{ + config as TraceConfig, mini_agent, stats_flusher, stats_processor, trace_flusher, + trace_processor, + }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, EXTENSION_ID_HEADER, EXTENSION_NAME, EXTENSION_NAME_HEADER, EXTENSION_ROUTE, LAMBDA_RUNTIME_SLUG, TELEMETRY_PORT, @@ -261,6 +265,25 @@ async fn extension_loop_active( Arc::clone(&metrics_aggr), config.site.clone(), ); + let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher {}); + let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); + + let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher {}); + let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); + + let trace_config = TraceConfig::Config::new().unwrap(); + + let mini_agent = Box::new(mini_agent::MiniAgent { + config: Arc::new(trace_config), + trace_processor, + trace_flusher, + stats_processor, + stats_flusher, + }); + + if let Err(e) = mini_agent.start_mini_agent() { + error!("Error when starting serverless trace mini agent: {e}"); + } let lambda_enhanced_metrics = enhanced_metrics::new(Arc::clone(&metrics_aggr)); let dogstatsd_cancel_token = start_dogstatsd(event_bus.get_sender_copy(), &metrics_aggr).await; diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 590ec9e2e..a20bd3f05 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -33,6 +33,8 @@ pub struct Config { pub apm_enabled: bool, pub lambda_handler: String, pub serverless_flush_strategy: FlushStrategy, + pub trace_enabled: bool, + pub serverless_trace_enabled: bool, } impl Default for Config { @@ -57,6 +59,8 @@ impl Default for Config { // APM apm_enabled: false, lambda_handler: String::default(), + serverless_trace_enabled: true, + trace_enabled: true } } } diff --git a/bottlecap/src/lib.rs b/bottlecap/src/lib.rs index 180f81ef8..7b1c658e9 100644 --- a/bottlecap/src/lib.rs +++ b/bottlecap/src/lib.rs @@ -27,6 +27,7 @@ pub mod metrics; pub mod secrets; pub mod tags; pub mod telemetry; +pub mod traces; use std::{env, io}; diff --git a/bottlecap/src/traces/config.rs b/bottlecap/src/traces/config.rs new file mode 100644 index 000000000..4e0816eee --- /dev/null +++ b/bottlecap/src/traces/config.rs @@ -0,0 +1,212 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use ddcommon::Endpoint; +use std::borrow::Cow; +use std::env; +use std::str::FromStr; + +use datadog_trace_obfuscation::obfuscation_config; +use datadog_trace_utils::config_utils::{ + read_cloud_env, trace_intake_url, trace_intake_url_prefixed, trace_stats_url, + trace_stats_url_prefixed, +}; +use datadog_trace_utils::trace_utils; + +#[derive(Debug)] +pub struct Config { + pub dd_site: String, + pub env_type: trace_utils::EnvironmentType, + pub function_name: Option, + pub max_request_content_length: usize, + pub mini_agent_version: String, + pub obfuscation_config: obfuscation_config::ObfuscationConfig, + pub os: String, + /// how often to flush stats, in seconds + pub stats_flush_interval: u64, + /// how often to flush traces, in seconds + pub trace_flush_interval: u64, + pub trace_intake: Endpoint, + pub trace_stats_intake: Endpoint, + /// timeout for environment verification, in milliseconds + pub verify_env_timeout: u64, +} + +impl Config { + pub fn new() -> Result> { + let api_key: Cow = env::var("DD_API_KEY") + .map_err(|_| anyhow::anyhow!("DD_API_KEY environment variable is not set"))? + .into(); + + let (function_name, env_type) = read_cloud_env().ok_or_else(|| { + anyhow::anyhow!("Unable to identify environment. Shutting down Mini Agent.") + })?; + + let dd_site = env::var("DD_SITE").unwrap_or_else(|_| "datadoghq.com".to_string()); + + // construct the trace & trace stats intake urls based on DD_SITE env var (to flush traces & + // trace stats to) + let mut trace_intake_url = trace_intake_url(&dd_site); + let mut trace_stats_intake_url = trace_stats_url(&dd_site); + + // DD_APM_DD_URL env var will primarily be used for integration tests + // overrides the entire trace/trace stats intake url prefix + if let Ok(endpoint_prefix) = env::var("DD_APM_DD_URL") { + trace_intake_url = trace_intake_url_prefixed(&endpoint_prefix); + trace_stats_intake_url = trace_stats_url_prefixed(&endpoint_prefix); + }; + + let obfuscation_config = obfuscation_config::ObfuscationConfig::new().map_err(|err| { + anyhow::anyhow!( + "Error creating obfuscation config, Mini Agent will not start. Error: {err}", + ) + })?; + + let mini_agent_version: String = env!("CARGO_PKG_VERSION").to_string(); + + Ok(Config { + function_name: Some(function_name), + env_type, + os: env::consts::OS.to_string(), + max_request_content_length: 10 * 1024 * 1024, // 10MB in Bytes + trace_flush_interval: 3, + stats_flush_interval: 3, + verify_env_timeout: 100, + dd_site, + trace_intake: Endpoint { + url: hyper::Uri::from_str(&trace_intake_url).unwrap(), + api_key: Some(api_key.clone()), + }, + trace_stats_intake: Endpoint { + url: hyper::Uri::from_str(&trace_stats_intake_url).unwrap(), + api_key: Some(api_key), + }, + obfuscation_config, + mini_agent_version, + }) + } +} + +#[cfg(test)] +mod tests { + use duplicate::duplicate_item; + use serial_test::serial; + use std::env; + + use crate::config; + + #[test] + #[serial] + fn test_error_if_unable_to_identify_env() { + env::set_var("DD_API_KEY", "_not_a_real_key_"); + + let config = config::Config::new(); + assert!(config.is_err()); + assert_eq!( + config.unwrap_err().to_string(), + "Unable to identify environment. Shutting down Mini Agent." + ); + env::remove_var("DD_API_KEY"); + } + + #[test] + #[serial] + fn test_error_if_no_api_key_env_var() { + let config = config::Config::new(); + assert!(config.is_err()); + assert_eq!( + config.unwrap_err().to_string(), + "DD_API_KEY environment variable is not set" + ); + } + + #[test] + #[serial] + fn test_default_trace_and_trace_stats_urls() { + env::set_var("DD_API_KEY", "_not_a_real_key_"); + env::set_var("K_SERVICE", "function_name"); + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.trace_intake.url, + "https://trace.agent.datadoghq.com/api/v0.2/traces" + ); + assert_eq!( + config.trace_stats_intake.url, + "https://trace.agent.datadoghq.com/api/v0.2/stats" + ); + env::remove_var("DD_API_KEY"); + env::remove_var("K_SERVICE"); + } + + #[duplicate_item( + test_name dd_site expected_url; + [test_us1_trace_intake_url] ["datadoghq.com"] ["https://trace.agent.datadoghq.com/api/v0.2/traces"]; + [test_us3_trace_intake_url] ["us3.datadoghq.com"] ["https://trace.agent.us3.datadoghq.com/api/v0.2/traces"]; + [test_us5_trace_intake_url] ["us5.datadoghq.com"] ["https://trace.agent.us5.datadoghq.com/api/v0.2/traces"]; + [test_eu_trace_intake_url] ["datadoghq.eu"] ["https://trace.agent.datadoghq.eu/api/v0.2/traces"]; + [test_ap1_trace_intake_url] ["ap1.datadoghq.com"] ["https://trace.agent.ap1.datadoghq.com/api/v0.2/traces"]; + [test_gov_trace_intake_url] ["ddog-gov.com"] ["https://trace.agent.ddog-gov.com/api/v0.2/traces"]; + )] + #[test] + #[serial] + fn test_name() { + env::set_var("DD_API_KEY", "_not_a_real_key_"); + env::set_var("K_SERVICE", "function_name"); + env::set_var("DD_SITE", dd_site); + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.trace_intake.url, expected_url); + env::remove_var("DD_API_KEY"); + env::remove_var("DD_SITE"); + env::remove_var("K_SERVICE"); + } + + #[duplicate_item( + test_name dd_site expected_url; + [test_us1_trace_stats_intake_url] ["datadoghq.com"] ["https://trace.agent.datadoghq.com/api/v0.2/stats"]; + [test_us3_trace_stats_intake_url] ["us3.datadoghq.com"] ["https://trace.agent.us3.datadoghq.com/api/v0.2/stats"]; + [test_us5_trace_stats_intake_url] ["us5.datadoghq.com"] ["https://trace.agent.us5.datadoghq.com/api/v0.2/stats"]; + [test_eu_trace_stats_intake_url] ["datadoghq.eu"] ["https://trace.agent.datadoghq.eu/api/v0.2/stats"]; + [test_ap1_trace_stats_intake_url] ["ap1.datadoghq.com"] ["https://trace.agent.ap1.datadoghq.com/api/v0.2/stats"]; + [test_gov_trace_stats_intake_url] ["ddog-gov.com"] ["https://trace.agent.ddog-gov.com/api/v0.2/stats"]; + )] + #[test] + #[serial] + fn test_name() { + env::set_var("DD_API_KEY", "_not_a_real_key_"); + env::set_var("K_SERVICE", "function_name"); + env::set_var("DD_SITE", dd_site); + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.trace_stats_intake.url, expected_url); + env::remove_var("DD_API_KEY"); + env::remove_var("DD_SITE"); + env::remove_var("K_SERVICE"); + } + + #[test] + #[serial] + fn test_set_custom_trace_and_trace_stats_intake_url() { + env::set_var("DD_API_KEY", "_not_a_real_key_"); + env::set_var("K_SERVICE", "function_name"); + env::set_var("DD_APM_DD_URL", "http://127.0.0.1:3333"); + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.trace_intake.url, + "http://127.0.0.1:3333/api/v0.2/traces" + ); + assert_eq!( + config.trace_stats_intake.url, + "http://127.0.0.1:3333/api/v0.2/stats" + ); + env::remove_var("DD_API_KEY"); + env::remove_var("DD_APM_DD_URL"); + env::remove_var("K_SERVICE"); + } +} diff --git a/bottlecap/src/traces/env_verifier.rs b/bottlecap/src/traces/env_verifier.rs new file mode 100644 index 000000000..714456de0 --- /dev/null +++ b/bottlecap/src/traces/env_verifier.rs @@ -0,0 +1,602 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use async_trait::async_trait; +use hyper::{Body, Client, Method, Request, Response}; +use log::{debug, error}; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; +use std::process; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use datadog_trace_utils::trace_utils; + +const GCP_METADATA_URL: &str = "http://metadata.google.internal/computeMetadata/v1/?recursive=true"; +const AZURE_LINUX_FUNCTION_ROOT_PATH_STR: &str = "/home/site/wwwroot"; +const AZURE_WINDOWS_FUNCTION_ROOT_PATH_STR: &str = "C:\\home\\site\\wwwroot"; +const AZURE_HOST_JSON_NAME: &str = "host.json"; +const AZURE_FUNCTION_JSON_NAME: &str = "function.json"; + +#[derive(Default, Debug, Deserialize, Serialize, Eq, PartialEq)] +pub struct GCPMetadata { + pub instance: GCPInstance, + pub project: GCPProject, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq)] +pub struct GCPInstance { + pub region: String, +} +impl Default for GCPInstance { + fn default() -> Self { + Self { + region: "unknown".to_string(), + } + } +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct GCPProject { + pub project_id: String, +} +impl Default for GCPProject { + fn default() -> Self { + Self { + project_id: "unknown".to_string(), + } + } +} + +#[async_trait] +pub trait EnvVerifier { + /// Verifies the mini agent is running in the intended environment. if not, exit the process. + /// Returns MiniAgentMetadata, a struct of metadata collected from the environment. + async fn verify_environment( + &self, + verify_env_timeout: u64, + env_type: &trace_utils::EnvironmentType, + os: &str, + ) -> trace_utils::MiniAgentMetadata; +} + +pub struct ServerlessEnvVerifier { + gmc: Arc>, +} + +impl Default for ServerlessEnvVerifier { + fn default() -> Self { + Self::new() + } +} + +impl ServerlessEnvVerifier { + pub fn new() -> Self { + Self { + gmc: Arc::new(Box::new(GoogleMetadataClientWrapper {})), + } + } + + #[cfg(test)] + pub(crate) fn new_with_google_metadata_client( + gmc: Box, + ) -> Self { + Self { gmc: Arc::new(gmc) } + } + + async fn verify_gcp_environment_or_exit( + &self, + verify_env_timeout: u64, + ) -> trace_utils::MiniAgentMetadata { + let gcp_metadata_request = ensure_gcp_function_environment(self.gmc.as_ref().as_ref()); + let gcp_metadata = match tokio::time::timeout( + Duration::from_millis(verify_env_timeout), + gcp_metadata_request, + ) + .await + { + Ok(result) => match result { + Ok(metadata) => { + debug!("Successfully fetched Google Metadata."); + metadata + } + Err(err) => { + error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {err}"); + process::exit(1); + } + }, + Err(_) => { + error!("Google Metadata request timeout of {verify_env_timeout} ms exceeded. Using default values."); + GCPMetadata::default() + } + }; + trace_utils::MiniAgentMetadata { + gcp_project_id: Some(gcp_metadata.project.project_id), + gcp_region: Some(get_region_from_gcp_region_string( + gcp_metadata.instance.region, + )), + } + } +} + +#[async_trait] +impl EnvVerifier for ServerlessEnvVerifier { + async fn verify_environment( + &self, + verify_env_timeout: u64, + env_type: &trace_utils::EnvironmentType, + os: &str, + ) -> trace_utils::MiniAgentMetadata { + match env_type { + trace_utils::EnvironmentType::AzureFunction => { + verify_azure_environment_or_exit(os).await; + trace_utils::MiniAgentMetadata::default() + } + trace_utils::EnvironmentType::CloudFunction => { + return self + .verify_gcp_environment_or_exit(verify_env_timeout) + .await; + } + } + } +} + +/// The region found in GCP Metadata comes in the format: "projects/123123/regions/us-east1" +/// This function extracts just the region (us-east1) from this GCP region string. +/// If the string does not have 4 parts (separated by "/") or extraction fails, return "unknown" +fn get_region_from_gcp_region_string(str: String) -> String { + let split_str = str.split('/').collect::>(); + if split_str.len() != 4 { + return "unknown".to_string(); + } + match split_str.last() { + Some(res) => res.to_string(), + None => "unknown".to_string(), + } +} + +/// GoogleMetadataClient trait is used so we can mock a google metadata server response in unit +/// tests +#[async_trait] +pub(crate) trait GoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result>; +} +struct GoogleMetadataClientWrapper {} + +#[async_trait] +impl GoogleMetadataClient for GoogleMetadataClientWrapper { + async fn get_metadata(&self) -> anyhow::Result> { + let req = Request::builder() + .method(Method::POST) + .uri(GCP_METADATA_URL) + .header("Metadata-Flavor", "Google") + .body(Body::empty()) + .map_err(|err| anyhow::anyhow!(err.to_string()))?; + + let client = Client::new(); + match client.request(req).await { + Ok(res) => Ok(res), + Err(err) => anyhow::bail!(err.to_string()), + } + } +} + +/// Checks if we are running in a Google Cloud Function environment. +/// If true, returns Metadata from the Google Cloud environment. +/// Otherwise, returns an error with the verification failure reason. +async fn ensure_gcp_function_environment( + metadata_client: &(dyn GoogleMetadataClient + Send + Sync), +) -> anyhow::Result { + let response = metadata_client.get_metadata().await.map_err(|err| { + anyhow::anyhow!("Can't communicate with Google Metadata Server. Error: {err}") + })?; + + let (parts, body) = response.into_parts(); + let headers = parts.headers; + match headers.get("Server") { + Some(val) => { + if val != "Metadata Server for Serverless" { + anyhow::bail!("In Google Cloud, but not in a function environment.") + } + } + None => { + anyhow::bail!("In Google Cloud, but server identifier not found.") + } + } + + let gcp_metadata = match get_gcp_metadata_from_body(body).await { + Ok(res) => res, + Err(err) => { + error!("Failed to get GCP Function Metadata. Will not enrich spans. {err}"); + return Ok(GCPMetadata::default()); + } + }; + + Ok(gcp_metadata) +} + +async fn get_gcp_metadata_from_body(body: hyper::Body) -> anyhow::Result { + let bytes = hyper::body::to_bytes(body).await?; + let body_str = String::from_utf8(bytes.to_vec())?; + let gcp_metadata: GCPMetadata = serde_json::from_str(&body_str)?; + Ok(gcp_metadata) +} + +async fn verify_azure_environment_or_exit(os: &str) { + let now = Instant::now(); + match ensure_azure_function_environment(Box::new(AzureVerificationClientWrapper {}), os).await { + Ok(_) => { + debug!("Successfully verified Azure Function Environment."); + } + Err(e) => { + error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {e}"); + process::exit(1); + } + } + debug!( + "Time taken to verify Azure Functions env: {} ms", + now.elapsed().as_millis() + ); +} + +/// AzureVerificationClient trait is used so we can mock the azure function local url response in +/// unit tests +trait AzureVerificationClient { + fn get_function_root_files(&self, path: &Path) -> anyhow::Result>; +} +struct AzureVerificationClientWrapper {} + +impl AzureVerificationClient for AzureVerificationClientWrapper { + fn get_function_root_files(&self, path: &Path) -> anyhow::Result> { + let mut file_names: Vec = Vec::new(); + + let entries = fs::read_dir(path)?; + for entry in entries { + let entry = entry.map_err(|e| anyhow::anyhow!(e))?; + let entry_name = entry.file_name(); + if entry_name == "node_modules" { + continue; + } + + file_names.push(entry_name.to_string_lossy().to_string()); + + if entry.file_type()?.is_dir() { + let sub_entries = fs::read_dir(entry.path())?; + for sub_entry in sub_entries { + let sub_entry = sub_entry.map_err(|e| anyhow::anyhow!(e))?; + let sub_entry_name = sub_entry.file_name(); + file_names.push(sub_entry_name.to_string_lossy().to_string()); + } + } + } + Ok(file_names) + } +} + +/// Checks if we are running in an Azure Function environment. +/// If true, returns MiniAgentMetadata default. +/// Otherwise, returns an error with the verification failure reason. +async fn ensure_azure_function_environment( + verification_client: Box, + os: &str, +) -> anyhow::Result<()> { + let azure_linux_function_root_path = Path::new(AZURE_LINUX_FUNCTION_ROOT_PATH_STR); + let azure_windows_function_root_path = Path::new(AZURE_WINDOWS_FUNCTION_ROOT_PATH_STR); + let function_files = match os { + "linux" => verification_client.get_function_root_files(azure_linux_function_root_path), + "windows" => verification_client.get_function_root_files(azure_windows_function_root_path), + _ => { + anyhow::bail!("The Serverless Mini Agent does not support this platform.") + } + }; + + let function_files = function_files.map_err(|e| anyhow::anyhow!(e))?; + + let mut host_json_exists = false; + let mut function_json_exists = false; + for file in function_files { + if file == AZURE_HOST_JSON_NAME { + host_json_exists = true; + } + if file == AZURE_FUNCTION_JSON_NAME { + function_json_exists = true; + } + } + + if !host_json_exists && !function_json_exists { + anyhow::bail!("Failed to validate an Azure Function directory system."); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use async_trait::async_trait; + use datadog_trace_utils::trace_utils; + use hyper::{Body, Response, StatusCode}; + use serde_json::json; + use serial_test::serial; + use std::{fs, path::Path, time::Duration}; + + use crate::env_verifier::{ + ensure_azure_function_environment, ensure_gcp_function_environment, + get_region_from_gcp_region_string, AzureVerificationClient, AzureVerificationClientWrapper, + GCPInstance, GCPMetadata, GCPProject, GoogleMetadataClient, AZURE_FUNCTION_JSON_NAME, + AZURE_HOST_JSON_NAME, + }; + + use super::{EnvVerifier, ServerlessEnvVerifier}; + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_gcp_env_false_if_metadata_server_unreachable() { + struct MockGoogleMetadataClient {} + #[async_trait] + impl GoogleMetadataClient for MockGoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result> { + anyhow::bail!("Random Error") + } + } + let gmc = + Box::new(MockGoogleMetadataClient {}) as Box; + let res = ensure_gcp_function_environment(gmc.as_ref()).await; + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "Can't communicate with Google Metadata Server. Error: Random Error" + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_gcp_env_false_if_no_server_in_response_headers() { + struct MockGoogleMetadataClient {} + #[async_trait] + impl GoogleMetadataClient for MockGoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result> { + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty()) + .unwrap()) + } + } + let gmc = + Box::new(MockGoogleMetadataClient {}) as Box; + let res = ensure_gcp_function_environment(gmc.as_ref()).await; + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "In Google Cloud, but server identifier not found." + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_gcp_env_if_server_header_not_serverless() { + struct MockGoogleMetadataClient {} + #[async_trait] + impl GoogleMetadataClient for MockGoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result> { + Ok(Response::builder() + .status(StatusCode::OK) + .header("Server", "Metadata Server NOT for Serverless") + .body(Body::empty()) + .unwrap()) + } + } + let gmc = + Box::new(MockGoogleMetadataClient {}) as Box; + let res = ensure_gcp_function_environment(gmc.as_ref()).await; + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "In Google Cloud, but not in a function environment." + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_gcp_env_true_if_cloud_function_env() { + struct MockGoogleMetadataClient {} + #[async_trait] + impl GoogleMetadataClient for MockGoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result> { + Ok(Response::builder() + .status(StatusCode::OK) + .header("Server", "Metadata Server for Serverless") + .body(Body::from( + json!({ + "instance": { + "region": "projects/123123/regions/us-east1", + }, + "project": { + "projectId": "my-project" + } + }) + .to_string(), + )) + .unwrap()) + } + } + let gmc = + Box::new(MockGoogleMetadataClient {}) as Box; + let res = ensure_gcp_function_environment(gmc.as_ref()).await; + assert!(res.is_ok()); + assert_eq!( + res.unwrap(), + GCPMetadata { + instance: GCPInstance { + region: "projects/123123/regions/us-east1".to_string() + }, + project: GCPProject { + project_id: "my-project".to_string() + } + } + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_gcp_verify_environment_timeout_exceeded_gives_unknown_values() { + struct MockGoogleMetadataClient {} + #[async_trait] + impl GoogleMetadataClient for MockGoogleMetadataClient { + async fn get_metadata(&self) -> anyhow::Result> { + // Sleep for 5 seconds to let the timeout trigger + tokio::time::sleep(Duration::from_secs(5)).await; + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty()) + .unwrap()) + } + } + let gmc = + Box::new(MockGoogleMetadataClient {}) as Box; + let env_verifier = ServerlessEnvVerifier::new_with_google_metadata_client(gmc); + let res = env_verifier + .verify_environment(100, &trace_utils::EnvironmentType::CloudFunction, "linux") + .await; // set the verify_env_timeout to a small value to trigger the timeout + assert_eq!( + res, + trace_utils::MiniAgentMetadata { + gcp_project_id: Some("unknown".to_string()), + gcp_region: Some("unknown".to_string()), + } + ); + } + + #[test] + fn test_gcp_region_string_extraction_valid_string() { + let res = get_region_from_gcp_region_string("projects/123123/regions/us-east1".to_string()); + assert_eq!(res, "us-east1"); + } + + #[test] + fn test_gcp_region_string_extraction_wrong_number_of_parts() { + let res = get_region_from_gcp_region_string("invalid/parts/count".to_string()); + assert_eq!(res, "unknown"); + } + + #[test] + fn test_gcp_region_string_extraction_empty_string() { + let res = get_region_from_gcp_region_string("".to_string()); + assert_eq!(res, "unknown"); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_azure_env_windows_true() { + struct MockAzureVerificationClient {} + #[async_trait] + impl AzureVerificationClient for MockAzureVerificationClient { + fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { + Ok(vec!["host.json".to_string(), "function.json".to_string()]) + } + } + let res = + ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "windows") + .await; + assert!(res.is_ok()); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_azure_env_windows_false() { + struct MockAzureVerificationClient {} + #[async_trait] + impl AzureVerificationClient for MockAzureVerificationClient { + fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { + Ok(vec![ + "random_file.json".to_string(), + "random_file_1.json".to_string(), + ]) + } + } + let res = + ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "windows") + .await; + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "Failed to validate an Azure Function directory system." + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_azure_env_linux_true() { + struct MockAzureVerificationClient {} + #[async_trait] + impl AzureVerificationClient for MockAzureVerificationClient { + fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { + Ok(vec!["host.json".to_string(), "function.json".to_string()]) + } + } + let res = + ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "linux") + .await; + assert!(res.is_ok()); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_ensure_azure_env_linux_false() { + struct MockAzureVerificationClient {} + #[async_trait] + impl AzureVerificationClient for MockAzureVerificationClient { + fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { + Ok(vec![ + "random_file.json".to_string(), + "random_file_1.json".to_string(), + ]) + } + } + let res = + ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "linux") + .await; + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "Failed to validate an Azure Function directory system." + ); + } + + #[test] + #[serial] + fn test_get_function_root_files_returns_correct_files() { + let temp_dir = tempfile::tempdir().unwrap(); + let temp_dir_path = temp_dir.path(); + + fs::File::create(temp_dir_path.join(AZURE_HOST_JSON_NAME)).unwrap(); + fs::create_dir(temp_dir_path.join("HttpTrigger1")).unwrap(); + fs::File::create(temp_dir_path.join(format!("HttpTrigger1/{AZURE_FUNCTION_JSON_NAME}"))) + .unwrap(); + + let client = AzureVerificationClientWrapper {}; + + let files = client.get_function_root_files(temp_dir_path).unwrap(); + + assert!(files.contains(&AZURE_HOST_JSON_NAME.to_string())); + assert!(files.contains(&AZURE_FUNCTION_JSON_NAME.to_string())); + assert!(files.contains(&"HttpTrigger1".to_string())); + } + + #[test] + #[serial] + fn test_get_function_root_files_ignores_node_modules() { + let temp_dir = tempfile::tempdir().unwrap(); + let temp_dir_path = temp_dir.path(); + + fs::File::create(temp_dir_path.join(AZURE_HOST_JSON_NAME)).unwrap(); + fs::create_dir(temp_dir_path.join("node_modules")).unwrap(); + fs::File::create(temp_dir_path.join("node_modules/random.txt")).unwrap(); + + let client = AzureVerificationClientWrapper {}; + + let files = client.get_function_root_files(temp_dir_path).unwrap(); + + assert_eq!(files, vec![AZURE_HOST_JSON_NAME]); + } +} diff --git a/bottlecap/src/traces/http_utils.rs b/bottlecap/src/traces/http_utils.rs new file mode 100644 index 000000000..701af8d2f --- /dev/null +++ b/bottlecap/src/traces/http_utils.rs @@ -0,0 +1,171 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use hyper::{ + header, + http::{self, HeaderMap}, + Body, Response, StatusCode, +}; +use log::{error, info}; +use serde_json::json; + +/// Does two things: +/// 1. Logs the given message. A success status code (within 200-299) will cause an info log to be +/// written, +/// otherwise error will be written. +/// 2. Returns the given message in the body of JSON response with the given status code. +/// +/// Response body format: +/// { +/// "message": message +/// } +pub fn log_and_create_http_response( + message: &str, + status: StatusCode, +) -> http::Result> { + if status.is_success() { + info!("{message}"); + } else { + error!("{message}"); + } + let body = json!({ "message": message }).to_string(); + Response::builder().status(status).body(Body::from(body)) +} + +/// Takes a request's header map, and verifies that the "content-length" header is present, valid, +/// and less than the given max_content_length. +/// +/// Will return None if no issues are found. Otherwise logs an error (with the given prefix) and +/// returns and HTTP Response with the appropriate error status code. +pub fn verify_request_content_length( + header_map: &HeaderMap, + max_content_length: usize, + error_message_prefix: &str, +) -> Option>> { + let content_length_header = match header_map.get(header::CONTENT_LENGTH) { + Some(res) => res, + None => { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Missing Content-Length header"), + StatusCode::LENGTH_REQUIRED, + )); + } + }; + let header_as_string = match content_length_header.to_str() { + Ok(res) => res, + Err(_) => { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Invalid Content-Length header"), + StatusCode::BAD_REQUEST, + )); + } + }; + let content_length = match header_as_string.to_string().parse::() { + Ok(res) => res, + Err(_) => { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Invalid Content-Length header"), + StatusCode::BAD_REQUEST, + )); + } + }; + if content_length > max_content_length { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Payload too large"), + StatusCode::PAYLOAD_TOO_LARGE, + )); + } + None +} + +#[cfg(test)] +mod tests { + use hyper::header; + use hyper::Body; + use hyper::HeaderMap; + use hyper::Response; + use hyper::StatusCode; + + use super::verify_request_content_length; + + fn create_test_headers_with_content_length(val: &str) -> HeaderMap { + let mut map = HeaderMap::new(); + map.insert(header::CONTENT_LENGTH, val.parse().unwrap()); + map + } + + async fn get_response_body_as_string(response: Response) -> String { + let body = response.into_body(); + let bytes = hyper::body::to_bytes(body).await.unwrap(); + String::from_utf8(bytes.into_iter().collect()).unwrap() + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_request_content_length_missing() { + let verify_result = verify_request_content_length(&HeaderMap::new(), 1, "Test Prefix"); + assert!(verify_result.is_some()); + + let response = verify_result.unwrap().unwrap(); + assert_eq!(response.status(), StatusCode::LENGTH_REQUIRED); + assert_eq!( + get_response_body_as_string(response).await, + "{\"message\":\"Test Prefix: Missing Content-Length header\"}".to_string() + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_request_content_length_cant_convert_to_str() { + let verify_result = verify_request_content_length( + &create_test_headers_with_content_length("❤❤❤❤❤❤❤"), + 1, + "Test Prefix", + ); + assert!(verify_result.is_some()); + + let response = verify_result.unwrap().unwrap(); + assert_eq!(response.status(), StatusCode::BAD_REQUEST); + assert_eq!( + get_response_body_as_string(response).await, + "{\"message\":\"Test Prefix: Invalid Content-Length header\"}".to_string() + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_request_content_length_cant_convert_to_usize() { + let verify_result = verify_request_content_length( + &create_test_headers_with_content_length("not_an_int"), + 1, + "Test Prefix", + ); + assert!(verify_result.is_some()); + + let response = verify_result.unwrap().unwrap(); + assert_eq!(response.status(), StatusCode::BAD_REQUEST); + assert_eq!( + get_response_body_as_string(response).await, + "{\"message\":\"Test Prefix: Invalid Content-Length header\"}".to_string() + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_request_content_length_too_long() { + let verify_result = verify_request_content_length( + &create_test_headers_with_content_length("100"), + 1, + "Test Prefix", + ); + + assert!(verify_result.is_some()); + + let response = verify_result.unwrap().unwrap(); + assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE); + assert_eq!( + get_response_body_as_string(response).await, + "{\"message\":\"Test Prefix: Payload too large\"}".to_string() + ); + } +} diff --git a/bottlecap/src/traces/mini_agent.rs b/bottlecap/src/traces/mini_agent.rs new file mode 100644 index 000000000..2aefdb29d --- /dev/null +++ b/bottlecap/src/traces/mini_agent.rs @@ -0,0 +1,196 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use hyper::service::{make_service_fn, service_fn}; +use hyper::{http, Body, Method, Request, Response, Server, StatusCode}; +use log::{debug, error, info}; +use serde_json::json; +use std::convert::Infallible; +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::Instant; +use tokio::sync::mpsc::{self, Receiver, Sender}; + +use crate::traces::http_utils::log_and_create_http_response; +use crate::traces::{config as TraceConfig, stats_flusher, stats_processor, trace_flusher, trace_processor}; +use datadog_trace_protobuf::pb; +use datadog_trace_utils::trace_utils::SendData; + +const MINI_AGENT_PORT: usize = 8126; +const TRACE_ENDPOINT_PATH: &str = "/v0.4/traces"; +const STATS_ENDPOINT_PATH: &str = "/v0.6/stats"; +const INFO_ENDPOINT_PATH: &str = "/info"; +const TRACER_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; +const STATS_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; + +pub struct MiniAgent { + pub config: Arc, + pub trace_processor: Arc, + pub trace_flusher: Arc, + pub stats_processor: Arc, + pub stats_flusher: Arc, +} + +impl MiniAgent { + #[tokio::main] + pub async fn start_mini_agent(&self) -> Result<(), Box> { + let now = Instant::now(); + + // // verify we are in a google cloud funtion environment. if not, shut down the mini agent. + // let mini_agent_metadata = Arc::new( + // self.env_verifier + // .verify_environment( + // self.config.verify_env_timeout, + // &self.config.env_type, + // &self.config.os, + // ) + // .await, + // ); + + debug!( + "Time taken to fetch Mini Agent metadata: {} ms", + now.elapsed().as_millis() + ); + + // setup a channel to send processed traces to our flusher. tx is passed through each + // endpoint_handler to the trace processor, which uses it to send de-serialized + // processed trace payloads to our trace flusher. + let (trace_tx, trace_rx): (Sender, Receiver) = + mpsc::channel(TRACER_PAYLOAD_CHANNEL_BUFFER_SIZE); + + // start our trace flusher. receives trace payloads and handles buffering + deciding when to + // flush to backend. + let trace_flusher = self.trace_flusher.clone(); + let trace_config = self.config.clone(); + tokio::spawn(async move { + let trace_flusher = trace_flusher.clone(); + trace_flusher + .start_trace_flusher(trace_config.clone(), trace_rx) + .await; + }); + + // channels to send processed stats to our stats flusher. + let (stats_tx, stats_rx): ( + Sender, + Receiver, + ) = mpsc::channel(STATS_PAYLOAD_CHANNEL_BUFFER_SIZE); + + // start our stats flusher. + let stats_flusher = self.stats_flusher.clone(); + let stats_config = self.config.clone(); + tokio::spawn(async move { + let stats_flusher = stats_flusher.clone(); + stats_flusher + .start_stats_flusher(stats_config, stats_rx) + .await; + }); + + // setup our hyper http server, where the endpoint_handler handles incoming requests + let trace_processor = self.trace_processor.clone(); + let stats_processor = self.stats_processor.clone(); + let endpoint_config = self.config.clone(); + + let make_svc = make_service_fn(move |_| { + let trace_processor = trace_processor.clone(); + let trace_tx = trace_tx.clone(); + + let stats_processor = stats_processor.clone(); + let stats_tx = stats_tx.clone(); + + let endpoint_config = endpoint_config.clone(); + + let service = service_fn(move |req| { + MiniAgent::trace_endpoint_handler( + endpoint_config.clone(), + req, + trace_processor.clone(), + trace_tx.clone(), + stats_processor.clone(), + stats_tx.clone(), + ) + }); + + async move { Ok::<_, Infallible>(service) } + }); + + let addr = SocketAddr::from(([127, 0, 0, 1], MINI_AGENT_PORT as u16)); + let server_builder = Server::try_bind(&addr)?; + + let server = server_builder.serve(make_svc); + + info!("Mini Agent started: listening on port {MINI_AGENT_PORT}"); + debug!( + "Time taken start the Mini Agent: {} ms", + now.elapsed().as_millis() + ); + + // start hyper http server + if let Err(e) = server.await { + error!("Server error: {e}"); + return Err(e.into()); + } + + Ok(()) + } + + async fn trace_endpoint_handler( + config: Arc, + req: Request, + trace_processor: Arc, + trace_tx: Sender, + stats_processor: Arc, + stats_tx: Sender, + ) -> http::Result> { + match (req.method(), req.uri().path()) { + (&Method::PUT | &Method::POST, TRACE_ENDPOINT_PATH) => { + match trace_processor + .process_traces(config, req, trace_tx) + .await + { + Ok(res) => Ok(res), + Err(err) => log_and_create_http_response( + &format!("Error processing traces: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ), + } + } + (&Method::PUT | &Method::POST, STATS_ENDPOINT_PATH) => { + match stats_processor.process_stats(config, req, stats_tx).await { + Ok(res) => Ok(res), + Err(err) => log_and_create_http_response( + &format!("Error processing trace stats: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ), + } + } + (_, INFO_ENDPOINT_PATH) => match Self::info_handler() { + Ok(res) => Ok(res), + Err(err) => log_and_create_http_response( + &format!("Info endpoint error: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ), + }, + _ => { + let mut not_found = Response::default(); + *not_found.status_mut() = StatusCode::NOT_FOUND; + Ok(not_found) + } + } + } + + fn info_handler() -> http::Result> { + let response_json = json!( + { + "endpoints": [ + TRACE_ENDPOINT_PATH, + STATS_ENDPOINT_PATH, + INFO_ENDPOINT_PATH + ], + "client_drop_p0s": true, + } + ); + Response::builder() + .status(200) + .body(Body::from(response_json.to_string())) + } +} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs new file mode 100644 index 000000000..ccbff24b7 --- /dev/null +++ b/bottlecap/src/traces/mod.rs @@ -0,0 +1,11 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +pub mod config; +pub mod env_verifier; +pub mod http_utils; +pub mod mini_agent; +pub mod stats_flusher; +pub mod stats_processor; +pub mod trace_flusher; +pub mod trace_processor; diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs new file mode 100644 index 000000000..a10520ea6 --- /dev/null +++ b/bottlecap/src/traces/stats_flusher.rs @@ -0,0 +1,91 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use async_trait::async_trait; +use log::{debug, error, info}; +use std::{sync::Arc, time}; +use tokio::sync::{mpsc::Receiver, Mutex}; + +use datadog_trace_protobuf::pb; +use datadog_trace_utils::stats_utils; + +use crate::traces::config::Config as TraceConfig; + +#[async_trait] +pub trait StatsFlusher { + /// Starts a stats flusher that listens for stats payloads sent to the tokio mpsc Receiver, + /// implementing flushing logic that calls flush_stats. + async fn start_stats_flusher( + &self, + config: Arc, + mut rx: Receiver, + ); + /// Flushes stats to the Datadog trace stats intake. + async fn flush_stats(&self, config: Arc, traces: Vec); +} + +#[derive(Clone, Copy)] +pub struct ServerlessStatsFlusher {} + +#[async_trait] +impl StatsFlusher for ServerlessStatsFlusher { + async fn start_stats_flusher( + &self, + config: Arc, + mut rx: Receiver, + ) { + let buffer: Arc>> = Arc::new(Mutex::new(Vec::new())); + + let buffer_producer = buffer.clone(); + let buffer_consumer = buffer.clone(); + + tokio::spawn(async move { + while let Some(stats_payload) = rx.recv().await { + let mut buffer = buffer_producer.lock().await; + buffer.push(stats_payload); + } + }); + + loop { + tokio::time::sleep(time::Duration::from_secs(config.stats_flush_interval)).await; + + let mut buffer = buffer_consumer.lock().await; + if !buffer.is_empty() { + self.flush_stats(config.clone(), buffer.to_vec()).await; + buffer.clear(); + } + } + } + + async fn flush_stats(&self, config: Arc, stats: Vec) { + if stats.is_empty() { + return; + } + info!("Flushing {} stats", stats.len()); + + let stats_payload = stats_utils::construct_stats_payload(stats); + + debug!("Stats payload to be sent: {stats_payload:?}"); + + let serialized_stats_payload = match stats_utils::serialize_stats_payload(stats_payload) { + Ok(res) => res, + Err(err) => { + error!("Failed to serialize stats payload, dropping stats: {err}"); + return; + } + }; + + match stats_utils::send_stats_payload( + serialized_stats_payload, + &config.trace_stats_intake, + config.trace_stats_intake.api_key.as_ref().unwrap(), + ) + .await + { + Ok(_) => info!("Successfully flushed stats"), + Err(e) => { + error!("Error sending stats: {e:?}") + } + } + } +} diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs new file mode 100644 index 000000000..afe6c8e53 --- /dev/null +++ b/bottlecap/src/traces/stats_processor.rs @@ -0,0 +1,88 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +use async_trait::async_trait; +use hyper::{http, Body, Request, Response, StatusCode}; +use log::info; +use tokio::sync::mpsc::Sender; + +use datadog_trace_protobuf::pb; +use datadog_trace_utils::stats_utils; + +use crate::traces::config::Config as TraceConfig; +use crate::traces::http_utils::{self, log_and_create_http_response}; + +#[async_trait] +pub trait StatsProcessor { + /// Deserializes trace stats from a hyper request body and sends them through + /// the provided tokio mpsc Sender. + async fn process_stats( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result>; +} + +#[derive(Clone,Copy)] +pub struct ServerlessStatsProcessor {} + +#[async_trait] +impl StatsProcessor for ServerlessStatsProcessor { + async fn process_stats( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result> { + info!("Recieved trace stats to process"); + let (parts, body) = req.into_parts(); + + if let Some(response) = http_utils::verify_request_content_length( + &parts.headers, + config.max_request_content_length, + "Error processing trace stats", + ) { + return response; + } + + // deserialize trace stats from the request body, convert to protobuf structs (see + // trace-protobuf crate) + let mut stats: pb::ClientStatsPayload = + match stats_utils::get_stats_from_request_body(body).await { + Ok(res) => res, + Err(err) => { + return log_and_create_http_response( + &format!("Error deserializing trace stats from request body: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + + let start = SystemTime::now(); + let timestamp = start + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + stats.stats[0].start = timestamp as u64; + + // send trace payload to our trace flusher + match tx.send(stats).await { + Ok(_) => { + return log_and_create_http_response( + "Successfully buffered stats to be flushed.", + StatusCode::ACCEPTED, + ); + } + Err(err) => { + return log_and_create_http_response( + &format!("Error sending stats to the stats flusher: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } +} diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs new file mode 100644 index 000000000..12717e28e --- /dev/null +++ b/bottlecap/src/traces/trace_flusher.rs @@ -0,0 +1,68 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use async_trait::async_trait; +use log::{error, info}; +use std::{sync::Arc, time}; +use tokio::sync::{mpsc::Receiver, Mutex}; + +use datadog_trace_utils::trace_utils; +use datadog_trace_utils::trace_utils::SendData; + +use crate::traces::config::Config as TraceConfig; + +#[async_trait] +pub trait TraceFlusher { + /// Starts a trace flusher that listens for trace payloads sent to the tokio mpsc Receiver, + /// implementing flushing logic that calls flush_traces. + async fn start_trace_flusher(&self, config: Arc, mut rx: Receiver); + /// Flushes traces to the Datadog trace intake. + async fn flush_traces(&self, traces: Vec); +} + +#[derive(Clone, Copy)] +pub struct ServerlessTraceFlusher {} + +#[async_trait] +impl TraceFlusher for ServerlessTraceFlusher { + async fn start_trace_flusher(&self, config: Arc, mut rx: Receiver) { + let buffer: Arc>> = Arc::new(Mutex::new(Vec::new())); + + let buffer_producer = buffer.clone(); + let buffer_consumer = buffer.clone(); + + tokio::spawn(async move { + while let Some(tracer_payload) = rx.recv().await { + let mut buffer = buffer_producer.lock().await; + buffer.push(tracer_payload); + } + }); + + loop { + tokio::time::sleep(time::Duration::from_secs(config.trace_flush_interval)).await; + + let mut buffer = buffer_consumer.lock().await; + if !buffer.is_empty() { + self.flush_traces(buffer.to_vec()).await; + buffer.clear(); + } + } + } + + async fn flush_traces(&self, traces: Vec) { + if traces.is_empty() { + return; + } + info!("Flushing {} traces", traces.len()); + + for traces in trace_utils::coalesce_send_data(traces) { + match traces.send().await.last_result { + Ok(_) => info!("Successfully flushed traces"), + Err(e) => { + error!("Error sending trace: {e:?}") + // TODO: Retries + } + } + } + } +} diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs new file mode 100644 index 000000000..605ab4484 --- /dev/null +++ b/bottlecap/src/traces/trace_processor.rs @@ -0,0 +1,293 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use async_trait::async_trait; +use hyper::{http, Body, Request, Response, StatusCode}; +use log::info; +use tokio::sync::mpsc::Sender; + +use datadog_trace_obfuscation::obfuscate::obfuscate_span; +use datadog_trace_utils::trace_utils::SendData; +use datadog_trace_utils::trace_utils::{self}; + +use crate::traces::{ + config::Config as TraceConfig, + http_utils::{self, log_and_create_http_response}, +}; + +#[async_trait] +pub trait TraceProcessor { + /// Deserializes traces from a hyper request body and sends them through the provided tokio mpsc + /// Sender. + async fn process_traces( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result>; +} + +#[derive(Clone, Copy)] +pub struct ServerlessTraceProcessor {} + +#[async_trait] +impl TraceProcessor for ServerlessTraceProcessor { + async fn process_traces( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result> { + info!("Recieved traces to process"); + let (parts, body) = req.into_parts(); + + if let Some(response) = http_utils::verify_request_content_length( + &parts.headers, + config.max_request_content_length, + "Error processing traces", + ) { + return response; + } + + let tracer_header_tags = (&parts.headers).into(); + + // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf + // crate) + let (body_size, traces) = match trace_utils::get_traces_from_request_body(body).await { + Ok(res) => res, + Err(err) => { + return log_and_create_http_response( + &format!("Error deserializing trace from request body: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + + let payload = trace_utils::collect_trace_chunks( + traces, + &tracer_header_tags, + |chunk, root_span_index| { + trace_utils::set_serverless_root_span_tags( + &mut chunk.spans[root_span_index], + config.function_name.clone(), + &config.env_type, + ); + for span in chunk.spans.iter_mut() { + // trace_utils::enrich_span_with_mini_agent_metadata(span, &mini_agent_metadata); + // trace_utils::enrich_span_with_azure_metadata( + // span, + // config.mini_agent_version.as_str(), + // ); + obfuscate_span(span, &config.obfuscation_config); + } + }, + true, // In mini agent, we always send agentless + ); + + let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); + + // send trace payload to our trace flusher + match tx.send(send_data).await { + Ok(_) => { + return log_and_create_http_response( + "Successfully buffered traces to be flushed.", + StatusCode::ACCEPTED, + ); + } + Err(err) => { + return log_and_create_http_response( + &format!("Error sending traces to the trace flusher: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } +} + +#[cfg(test)] +mod tests { + use datadog_trace_obfuscation::obfuscation_config::ObfuscationConfig; + use hyper::Request; + use std::{ + collections::HashMap, + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, + }; + use tokio::sync::mpsc::{self, Receiver, Sender}; + + use crate::traces::{ + config::Config, + trace_processor::{self, TraceProcessor}, + }; + use datadog_trace_protobuf::pb; + use datadog_trace_utils::{ + test_utils::{create_test_json_span, create_test_span}, + trace_utils, + }; + use ddcommon::Endpoint; + + fn get_current_timestamp_nanos() -> i64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos() as i64 + } + + fn create_test_config() -> Config { + Config { + function_name: Some("dummy_function_name".to_string()), + max_request_content_length: 10 * 1024 * 1024, + trace_flush_interval: 3, + stats_flush_interval: 3, + verify_env_timeout: 100, + trace_intake: Endpoint { + url: hyper::Uri::from_static("https://trace.agent.notdog.com/traces"), + api_key: Some("dummy_api_key".into()), + }, + trace_stats_intake: Endpoint { + url: hyper::Uri::from_static("https://trace.agent.notdog.com/stats"), + api_key: Some("dummy_api_key".into()), + }, + dd_site: "datadoghq.com".to_string(), + env_type: trace_utils::EnvironmentType::CloudFunction, + os: "linux".to_string(), + obfuscation_config: ObfuscationConfig::new().unwrap(), + mini_agent_version: "0.1.0".to_string(), + } + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_process_trace() { + let (tx, mut rx): ( + Sender, + Receiver, + ) = mpsc::channel(1); + + let start = get_current_timestamp_nanos(); + + let json_span = create_test_json_span(11, 222, 333, start); + + let bytes = rmp_serde::to_vec(&vec![vec![json_span]]).unwrap(); + let request = Request::builder() + .header("datadog-meta-tracer-version", "4.0.0") + .header("datadog-meta-lang", "nodejs") + .header("datadog-meta-lang-version", "v19.7.0") + .header("datadog-meta-lang-interpreter", "v8") + .header("datadog-container-id", "33") + .header("content-length", "100") + .body(hyper::body::Body::from(bytes)) + .unwrap(); + + let trace_processor = trace_processor::ServerlessTraceProcessor {}; + let res = trace_processor + .process_traces( + Arc::new(create_test_config()), + request, + tx, + Arc::new(trace_utils::MiniAgentMetadata::default()), + ) + .await; + assert!(res.is_ok()); + + let tracer_payload = rx.recv().await; + + assert!(tracer_payload.is_some()); + + let expected_tracer_payload = pb::TracerPayload { + container_id: "33".to_string(), + language_name: "nodejs".to_string(), + language_version: "v19.7.0".to_string(), + tracer_version: "4.0.0".to_string(), + runtime_id: "test-runtime-id-value".to_string(), + chunks: vec![pb::TraceChunk { + priority: i8::MIN as i32, + origin: "".to_string(), + spans: vec![create_test_span(11, 222, 333, start, true)], + tags: HashMap::new(), + dropped_trace: false, + }], + tags: HashMap::new(), + env: "test-env".to_string(), + hostname: "".to_string(), + app_version: "".to_string(), + }; + + assert_eq!( + expected_tracer_payload, + tracer_payload.unwrap().get_payloads()[0] + ); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn test_process_trace_top_level_span_set() { + let (tx, mut rx): ( + Sender, + Receiver, + ) = mpsc::channel(1); + + let start = get_current_timestamp_nanos(); + + let json_trace = vec![ + create_test_json_span(11, 333, 222, start), + create_test_json_span(11, 222, 0, start), + create_test_json_span(11, 444, 333, start), + ]; + + let bytes = rmp_serde::to_vec(&vec![json_trace]).unwrap(); + let request = Request::builder() + .header("datadog-meta-tracer-version", "4.0.0") + .header("datadog-meta-lang", "nodejs") + .header("datadog-meta-lang-version", "v19.7.0") + .header("datadog-meta-lang-interpreter", "v8") + .header("datadog-container-id", "33") + .header("content-length", "100") + .body(hyper::body::Body::from(bytes)) + .unwrap(); + + let trace_processor = trace_processor::ServerlessTraceProcessor {}; + let res = trace_processor + .process_traces( + Arc::new(create_test_config()), + request, + tx, + Arc::new(trace_utils::MiniAgentMetadata::default()), + ) + .await; + assert!(res.is_ok()); + + let tracer_payload = rx.recv().await; + + assert!(tracer_payload.is_some()); + + let expected_tracer_payload = pb::TracerPayload { + container_id: "33".to_string(), + language_name: "nodejs".to_string(), + language_version: "v19.7.0".to_string(), + tracer_version: "4.0.0".to_string(), + runtime_id: "test-runtime-id-value".to_string(), + chunks: vec![pb::TraceChunk { + priority: i8::MIN as i32, + origin: "".to_string(), + spans: vec![ + create_test_span(11, 333, 222, start, false), + create_test_span(11, 222, 0, start, true), + create_test_span(11, 444, 333, start, false), + ], + tags: HashMap::new(), + dropped_trace: false, + }], + tags: HashMap::new(), + env: "test-env".to_string(), + hostname: "".to_string(), + app_version: "".to_string(), + }; + assert_eq!( + expected_tracer_payload, + tracer_payload.unwrap().get_payloads()[0] + ); + } +} From c532836dc9df5772593fa8ffffa0977c3956dcf4 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 18 Jun 2024 13:33:07 -0400 Subject: [PATCH 02/25] feat: tracing WIP --- bottlecap/Cargo.lock | 5 +++ bottlecap/Cargo.toml | 10 +++--- bottlecap/src/bin/bottlecap/main.rs | 34 +++++++++++++------- bottlecap/src/traces/env_verifier.rs | 3 ++ bottlecap/src/traces/mini_agent.rs | 12 +++----- bottlecap/src/traces/trace_flusher.rs | 41 ++++++++++++------------- bottlecap/src/traces/trace_processor.rs | 2 +- 7 files changed, 61 insertions(+), 46 deletions(-) diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 92a805e4f..85ba7c8d7 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -279,6 +279,7 @@ dependencies = [ [[package]] name = "datadog-trace-normalization" version = "10.0.0" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -287,6 +288,7 @@ dependencies = [ [[package]] name = "datadog-trace-obfuscation" version = "10.0.0" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -303,6 +305,7 @@ dependencies = [ [[package]] name = "datadog-trace-protobuf" version = "10.0.0" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" dependencies = [ "prost 0.11.9", "serde", @@ -312,6 +315,7 @@ dependencies = [ [[package]] name = "datadog-trace-utils" version = "10.0.0" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" dependencies = [ "anyhow", "bytes", @@ -334,6 +338,7 @@ dependencies = [ [[package]] name = "ddcommon" version = "10.0.0" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" dependencies = [ "anyhow", "futures", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index bd653c2ec..3b78be698 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -10,11 +10,11 @@ anyhow = "1.0" chrono = { version = "0.4.38", features = ["serde", "std", "now"], default-features = false} datadog-protos = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } -ddcommon = { path = "../../libdatadog/ddcommon" } -datadog-trace-protobuf = { path = "../../libdatadog/trace-protobuf" } -datadog-trace-utils = { path = "../../libdatadog/trace-utils" } -datadog-trace-normalization = { path = "../../libdatadog/trace-normalization" } -datadog-trace-obfuscation = { path = "../../libdatadog/trace-obfuscation" } +ddcommon = { version = "10.0", git = "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } +datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } +datadog-trace-utils = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent"} +datadog-trace-normalization = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } +datadog-trace-obfuscation = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } figment = { version = "0.10.15", default-features = false, features = ["yaml", "env"] } fnv = { version = "1.0.7", default-features = false } hashbrown = { version = "0.14.3", default-features = false, features = ["inline-more"] } diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 8a0643f28..9252467db 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -22,10 +22,11 @@ use std::{ process::Command, sync::{Arc, Mutex}, }; +use tokio::sync::Mutex as TokioMutex; use telemetry::listener::TelemetryListenerConfig; use tracing::{debug, error, info}; use tracing_subscriber::EnvFilter; - +use datadog_trace_utils::trace_utils::SendData; use bottlecap::{ base_url, config::{self, AwsConfig, Config}, @@ -54,7 +55,7 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - config as TraceConfig, mini_agent, stats_flusher, stats_processor, trace_flusher, + config as TraceConfig, mini_agent, stats_flusher, stats_processor, trace_flusher::{self, TraceFlusher}, trace_processor, }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, @@ -265,25 +266,36 @@ async fn extension_loop_active( Arc::clone(&metrics_aggr), config.site.clone(), ); - let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher {}); + + let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())) }); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher {}); let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); - let trace_config = TraceConfig::Config::new().unwrap(); + let trace_config = match TraceConfig::Config::new() { + Ok(config) => config, + Err(e) => { + error!("Error loading trace config: {e:?}"); + panic!("{e}"); + } + }; + + let trace_flusher_clone = trace_flusher.clone(); let mini_agent = Box::new(mini_agent::MiniAgent { config: Arc::new(trace_config), trace_processor, - trace_flusher, + trace_flusher: trace_flusher_clone, stats_processor, stats_flusher, }); - - if let Err(e) = mini_agent.start_mini_agent() { - error!("Error when starting serverless trace mini agent: {e}"); - } + tokio::spawn(async move { + let res = mini_agent.start_mini_agent().await; + if let Err(e) = res { + error!("Error starting mini agent: {e:?}"); + } + }); let lambda_enhanced_metrics = enhanced_metrics::new(Arc::clone(&metrics_aggr)); let dogstatsd_cancel_token = start_dogstatsd(event_bus.get_sender_copy(), &metrics_aggr).await; @@ -387,7 +399,7 @@ async fn extension_loop_active( // pass the invocation deadline to // flush tasks here, so they can // retry if we have more time - tokio::join!(logs_flusher.flush(), metrics_flusher.flush()); + tokio::join!(logs_flusher.flush(), metrics_flusher.flush(), trace_flusher.manual_flush()); break; } TelemetryRecord::PlatformReport { @@ -433,7 +445,7 @@ async fn extension_loop_active( if shutdown { dogstatsd_cancel_token.cancel(); telemetry_listener_cancel_token.cancel(); - tokio::join!(logs_flusher.flush(), metrics_flusher.flush()); + tokio::join!(logs_flusher.flush(), metrics_flusher.flush(), trace_flusher.manual_flush()); return Ok(()); } } diff --git a/bottlecap/src/traces/env_verifier.rs b/bottlecap/src/traces/env_verifier.rs index 714456de0..8363d224f 100644 --- a/bottlecap/src/traces/env_verifier.rs +++ b/bottlecap/src/traces/env_verifier.rs @@ -139,6 +139,9 @@ impl EnvVerifier for ServerlessEnvVerifier { .verify_gcp_environment_or_exit(verify_env_timeout) .await; } + trace_utils::EnvironmentType::LambdaFunction => { + trace_utils::MiniAgentMetadata::default() + } } } } diff --git a/bottlecap/src/traces/mini_agent.rs b/bottlecap/src/traces/mini_agent.rs index 2aefdb29d..30c82c2e9 100644 --- a/bottlecap/src/traces/mini_agent.rs +++ b/bottlecap/src/traces/mini_agent.rs @@ -3,7 +3,7 @@ use hyper::service::{make_service_fn, service_fn}; use hyper::{http, Body, Method, Request, Response, Server, StatusCode}; -use log::{debug, error, info}; +use tracing::{debug, error, info}; use serde_json::json; use std::convert::Infallible; use std::net::SocketAddr; @@ -32,7 +32,6 @@ pub struct MiniAgent { } impl MiniAgent { - #[tokio::main] pub async fn start_mini_agent(&self) -> Result<(), Box> { let now = Instant::now(); @@ -47,7 +46,7 @@ impl MiniAgent { // .await, // ); - debug!( + println!( "Time taken to fetch Mini Agent metadata: {} ms", now.elapsed().as_millis() ); @@ -61,11 +60,10 @@ impl MiniAgent { // start our trace flusher. receives trace payloads and handles buffering + deciding when to // flush to backend. let trace_flusher = self.trace_flusher.clone(); - let trace_config = self.config.clone(); tokio::spawn(async move { let trace_flusher = trace_flusher.clone(); trace_flusher - .start_trace_flusher(trace_config.clone(), trace_rx) + .start_trace_flusher(trace_rx) .await; }); @@ -118,8 +116,8 @@ impl MiniAgent { let server = server_builder.serve(make_svc); - info!("Mini Agent started: listening on port {MINI_AGENT_PORT}"); - debug!( + println!("Mini Agent started: listening on port {MINI_AGENT_PORT}"); + println!( "Time taken start the Mini Agent: {} ms", now.elapsed().as_millis() ); diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs index 12717e28e..9371c27f5 100644 --- a/bottlecap/src/traces/trace_flusher.rs +++ b/bottlecap/src/traces/trace_flusher.rs @@ -2,53 +2,50 @@ // SPDX-License-Identifier: Apache-2.0 use async_trait::async_trait; -use log::{error, info}; -use std::{sync::Arc, time}; +use tracing::{error, info}; +use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; -use datadog_trace_utils::trace_utils; -use datadog_trace_utils::trace_utils::SendData; +use datadog_trace_utils::trace_utils::{self, SendData}; -use crate::traces::config::Config as TraceConfig; #[async_trait] pub trait TraceFlusher { /// Starts a trace flusher that listens for trace payloads sent to the tokio mpsc Receiver, /// implementing flushing logic that calls flush_traces. - async fn start_trace_flusher(&self, config: Arc, mut rx: Receiver); + async fn start_trace_flusher(&self, mut rx: Receiver); /// Flushes traces to the Datadog trace intake. async fn flush_traces(&self, traces: Vec); + + async fn manual_flush(&self); } -#[derive(Clone, Copy)] -pub struct ServerlessTraceFlusher {} +#[derive(Clone)] +pub struct ServerlessTraceFlusher { + pub buffer: Arc>>, +} #[async_trait] impl TraceFlusher for ServerlessTraceFlusher { - async fn start_trace_flusher(&self, config: Arc, mut rx: Receiver) { - let buffer: Arc>> = Arc::new(Mutex::new(Vec::new())); - - let buffer_producer = buffer.clone(); - let buffer_consumer = buffer.clone(); - + async fn start_trace_flusher(&self, mut rx: Receiver) { + let buffer_producer = self.buffer.clone(); tokio::spawn(async move { while let Some(tracer_payload) = rx.recv().await { let mut buffer = buffer_producer.lock().await; buffer.push(tracer_payload); } }); + } - loop { - tokio::time::sleep(time::Duration::from_secs(config.trace_flush_interval)).await; - - let mut buffer = buffer_consumer.lock().await; - if !buffer.is_empty() { - self.flush_traces(buffer.to_vec()).await; - buffer.clear(); - } + async fn manual_flush(&self) { + let mut buffer = self.buffer.lock().await; + if !buffer.is_empty() { + self.flush_traces(buffer.to_vec()).await; + buffer.clear(); } } + async fn flush_traces(&self, traces: Vec) { if traces.is_empty() { return; diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 605ab4484..0f056c8ed 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; -use log::info; +use tracing::info; use tokio::sync::mpsc::Sender; use datadog_trace_obfuscation::obfuscate::obfuscate_span; From 8b54c2b5872d4dacc5f8f88a689fb23031c6b130 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 18 Jun 2024 13:45:55 -0400 Subject: [PATCH 03/25] feat: rename mini agent to trace agent --- bottlecap/src/bin/bottlecap/main.rs | 7 +- bottlecap/src/traces/config.rs | 10 +- bottlecap/src/traces/env_verifier.rs | 605 ------------------ bottlecap/src/traces/mod.rs | 3 +- .../traces/{mini_agent.rs => trace_agent.rs} | 20 +- bottlecap/src/traces/trace_processor.rs | 1 - 6 files changed, 19 insertions(+), 627 deletions(-) delete mode 100644 bottlecap/src/traces/env_verifier.rs rename bottlecap/src/traces/{mini_agent.rs => trace_agent.rs} (92%) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 9252467db..855d5c526 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -26,7 +26,6 @@ use tokio::sync::Mutex as TokioMutex; use telemetry::listener::TelemetryListenerConfig; use tracing::{debug, error, info}; use tracing_subscriber::EnvFilter; -use datadog_trace_utils::trace_utils::SendData; use bottlecap::{ base_url, config::{self, AwsConfig, Config}, @@ -55,7 +54,7 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - config as TraceConfig, mini_agent, stats_flusher, stats_processor, trace_flusher::{self, TraceFlusher}, + config as TraceConfig, trace_agent, stats_flusher, stats_processor, trace_flusher::{self, TraceFlusher}, trace_processor, }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, @@ -283,7 +282,7 @@ async fn extension_loop_active( let trace_flusher_clone = trace_flusher.clone(); - let mini_agent = Box::new(mini_agent::MiniAgent { + let trace_agent = Box::new(trace_agent::TraceAgent { config: Arc::new(trace_config), trace_processor, trace_flusher: trace_flusher_clone, @@ -291,7 +290,7 @@ async fn extension_loop_active( stats_flusher, }); tokio::spawn(async move { - let res = mini_agent.start_mini_agent().await; + let res = trace_agent.start_trace_agent().await; if let Err(e) = res { error!("Error starting mini agent: {e:?}"); } diff --git a/bottlecap/src/traces/config.rs b/bottlecap/src/traces/config.rs index 4e0816eee..b5b8601fc 100644 --- a/bottlecap/src/traces/config.rs +++ b/bottlecap/src/traces/config.rs @@ -19,7 +19,7 @@ pub struct Config { pub env_type: trace_utils::EnvironmentType, pub function_name: Option, pub max_request_content_length: usize, - pub mini_agent_version: String, + pub trace_agent_version: String, pub obfuscation_config: obfuscation_config::ObfuscationConfig, pub os: String, /// how often to flush stats, in seconds @@ -39,7 +39,7 @@ impl Config { .into(); let (function_name, env_type) = read_cloud_env().ok_or_else(|| { - anyhow::anyhow!("Unable to identify environment. Shutting down Mini Agent.") + anyhow::anyhow!("Unable to identify environment. Shutting down Trace Agent.") })?; let dd_site = env::var("DD_SITE").unwrap_or_else(|_| "datadoghq.com".to_string()); @@ -58,11 +58,11 @@ impl Config { let obfuscation_config = obfuscation_config::ObfuscationConfig::new().map_err(|err| { anyhow::anyhow!( - "Error creating obfuscation config, Mini Agent will not start. Error: {err}", + "Error creating obfuscation config, Trace Agent will not start. Error: {err}", ) })?; - let mini_agent_version: String = env!("CARGO_PKG_VERSION").to_string(); + let trace_agent_version: String = env!("CARGO_PKG_VERSION").to_string(); Ok(Config { function_name: Some(function_name), @@ -82,7 +82,7 @@ impl Config { api_key: Some(api_key), }, obfuscation_config, - mini_agent_version, + trace_agent_version, }) } } diff --git a/bottlecap/src/traces/env_verifier.rs b/bottlecap/src/traces/env_verifier.rs deleted file mode 100644 index 8363d224f..000000000 --- a/bottlecap/src/traces/env_verifier.rs +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -use async_trait::async_trait; -use hyper::{Body, Client, Method, Request, Response}; -use log::{debug, error}; -use serde::{Deserialize, Serialize}; -use std::fs; -use std::path::Path; -use std::process; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use datadog_trace_utils::trace_utils; - -const GCP_METADATA_URL: &str = "http://metadata.google.internal/computeMetadata/v1/?recursive=true"; -const AZURE_LINUX_FUNCTION_ROOT_PATH_STR: &str = "/home/site/wwwroot"; -const AZURE_WINDOWS_FUNCTION_ROOT_PATH_STR: &str = "C:\\home\\site\\wwwroot"; -const AZURE_HOST_JSON_NAME: &str = "host.json"; -const AZURE_FUNCTION_JSON_NAME: &str = "function.json"; - -#[derive(Default, Debug, Deserialize, Serialize, Eq, PartialEq)] -pub struct GCPMetadata { - pub instance: GCPInstance, - pub project: GCPProject, -} - -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq)] -pub struct GCPInstance { - pub region: String, -} -impl Default for GCPInstance { - fn default() -> Self { - Self { - region: "unknown".to_string(), - } - } -} - -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct GCPProject { - pub project_id: String, -} -impl Default for GCPProject { - fn default() -> Self { - Self { - project_id: "unknown".to_string(), - } - } -} - -#[async_trait] -pub trait EnvVerifier { - /// Verifies the mini agent is running in the intended environment. if not, exit the process. - /// Returns MiniAgentMetadata, a struct of metadata collected from the environment. - async fn verify_environment( - &self, - verify_env_timeout: u64, - env_type: &trace_utils::EnvironmentType, - os: &str, - ) -> trace_utils::MiniAgentMetadata; -} - -pub struct ServerlessEnvVerifier { - gmc: Arc>, -} - -impl Default for ServerlessEnvVerifier { - fn default() -> Self { - Self::new() - } -} - -impl ServerlessEnvVerifier { - pub fn new() -> Self { - Self { - gmc: Arc::new(Box::new(GoogleMetadataClientWrapper {})), - } - } - - #[cfg(test)] - pub(crate) fn new_with_google_metadata_client( - gmc: Box, - ) -> Self { - Self { gmc: Arc::new(gmc) } - } - - async fn verify_gcp_environment_or_exit( - &self, - verify_env_timeout: u64, - ) -> trace_utils::MiniAgentMetadata { - let gcp_metadata_request = ensure_gcp_function_environment(self.gmc.as_ref().as_ref()); - let gcp_metadata = match tokio::time::timeout( - Duration::from_millis(verify_env_timeout), - gcp_metadata_request, - ) - .await - { - Ok(result) => match result { - Ok(metadata) => { - debug!("Successfully fetched Google Metadata."); - metadata - } - Err(err) => { - error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {err}"); - process::exit(1); - } - }, - Err(_) => { - error!("Google Metadata request timeout of {verify_env_timeout} ms exceeded. Using default values."); - GCPMetadata::default() - } - }; - trace_utils::MiniAgentMetadata { - gcp_project_id: Some(gcp_metadata.project.project_id), - gcp_region: Some(get_region_from_gcp_region_string( - gcp_metadata.instance.region, - )), - } - } -} - -#[async_trait] -impl EnvVerifier for ServerlessEnvVerifier { - async fn verify_environment( - &self, - verify_env_timeout: u64, - env_type: &trace_utils::EnvironmentType, - os: &str, - ) -> trace_utils::MiniAgentMetadata { - match env_type { - trace_utils::EnvironmentType::AzureFunction => { - verify_azure_environment_or_exit(os).await; - trace_utils::MiniAgentMetadata::default() - } - trace_utils::EnvironmentType::CloudFunction => { - return self - .verify_gcp_environment_or_exit(verify_env_timeout) - .await; - } - trace_utils::EnvironmentType::LambdaFunction => { - trace_utils::MiniAgentMetadata::default() - } - } - } -} - -/// The region found in GCP Metadata comes in the format: "projects/123123/regions/us-east1" -/// This function extracts just the region (us-east1) from this GCP region string. -/// If the string does not have 4 parts (separated by "/") or extraction fails, return "unknown" -fn get_region_from_gcp_region_string(str: String) -> String { - let split_str = str.split('/').collect::>(); - if split_str.len() != 4 { - return "unknown".to_string(); - } - match split_str.last() { - Some(res) => res.to_string(), - None => "unknown".to_string(), - } -} - -/// GoogleMetadataClient trait is used so we can mock a google metadata server response in unit -/// tests -#[async_trait] -pub(crate) trait GoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result>; -} -struct GoogleMetadataClientWrapper {} - -#[async_trait] -impl GoogleMetadataClient for GoogleMetadataClientWrapper { - async fn get_metadata(&self) -> anyhow::Result> { - let req = Request::builder() - .method(Method::POST) - .uri(GCP_METADATA_URL) - .header("Metadata-Flavor", "Google") - .body(Body::empty()) - .map_err(|err| anyhow::anyhow!(err.to_string()))?; - - let client = Client::new(); - match client.request(req).await { - Ok(res) => Ok(res), - Err(err) => anyhow::bail!(err.to_string()), - } - } -} - -/// Checks if we are running in a Google Cloud Function environment. -/// If true, returns Metadata from the Google Cloud environment. -/// Otherwise, returns an error with the verification failure reason. -async fn ensure_gcp_function_environment( - metadata_client: &(dyn GoogleMetadataClient + Send + Sync), -) -> anyhow::Result { - let response = metadata_client.get_metadata().await.map_err(|err| { - anyhow::anyhow!("Can't communicate with Google Metadata Server. Error: {err}") - })?; - - let (parts, body) = response.into_parts(); - let headers = parts.headers; - match headers.get("Server") { - Some(val) => { - if val != "Metadata Server for Serverless" { - anyhow::bail!("In Google Cloud, but not in a function environment.") - } - } - None => { - anyhow::bail!("In Google Cloud, but server identifier not found.") - } - } - - let gcp_metadata = match get_gcp_metadata_from_body(body).await { - Ok(res) => res, - Err(err) => { - error!("Failed to get GCP Function Metadata. Will not enrich spans. {err}"); - return Ok(GCPMetadata::default()); - } - }; - - Ok(gcp_metadata) -} - -async fn get_gcp_metadata_from_body(body: hyper::Body) -> anyhow::Result { - let bytes = hyper::body::to_bytes(body).await?; - let body_str = String::from_utf8(bytes.to_vec())?; - let gcp_metadata: GCPMetadata = serde_json::from_str(&body_str)?; - Ok(gcp_metadata) -} - -async fn verify_azure_environment_or_exit(os: &str) { - let now = Instant::now(); - match ensure_azure_function_environment(Box::new(AzureVerificationClientWrapper {}), os).await { - Ok(_) => { - debug!("Successfully verified Azure Function Environment."); - } - Err(e) => { - error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {e}"); - process::exit(1); - } - } - debug!( - "Time taken to verify Azure Functions env: {} ms", - now.elapsed().as_millis() - ); -} - -/// AzureVerificationClient trait is used so we can mock the azure function local url response in -/// unit tests -trait AzureVerificationClient { - fn get_function_root_files(&self, path: &Path) -> anyhow::Result>; -} -struct AzureVerificationClientWrapper {} - -impl AzureVerificationClient for AzureVerificationClientWrapper { - fn get_function_root_files(&self, path: &Path) -> anyhow::Result> { - let mut file_names: Vec = Vec::new(); - - let entries = fs::read_dir(path)?; - for entry in entries { - let entry = entry.map_err(|e| anyhow::anyhow!(e))?; - let entry_name = entry.file_name(); - if entry_name == "node_modules" { - continue; - } - - file_names.push(entry_name.to_string_lossy().to_string()); - - if entry.file_type()?.is_dir() { - let sub_entries = fs::read_dir(entry.path())?; - for sub_entry in sub_entries { - let sub_entry = sub_entry.map_err(|e| anyhow::anyhow!(e))?; - let sub_entry_name = sub_entry.file_name(); - file_names.push(sub_entry_name.to_string_lossy().to_string()); - } - } - } - Ok(file_names) - } -} - -/// Checks if we are running in an Azure Function environment. -/// If true, returns MiniAgentMetadata default. -/// Otherwise, returns an error with the verification failure reason. -async fn ensure_azure_function_environment( - verification_client: Box, - os: &str, -) -> anyhow::Result<()> { - let azure_linux_function_root_path = Path::new(AZURE_LINUX_FUNCTION_ROOT_PATH_STR); - let azure_windows_function_root_path = Path::new(AZURE_WINDOWS_FUNCTION_ROOT_PATH_STR); - let function_files = match os { - "linux" => verification_client.get_function_root_files(azure_linux_function_root_path), - "windows" => verification_client.get_function_root_files(azure_windows_function_root_path), - _ => { - anyhow::bail!("The Serverless Mini Agent does not support this platform.") - } - }; - - let function_files = function_files.map_err(|e| anyhow::anyhow!(e))?; - - let mut host_json_exists = false; - let mut function_json_exists = false; - for file in function_files { - if file == AZURE_HOST_JSON_NAME { - host_json_exists = true; - } - if file == AZURE_FUNCTION_JSON_NAME { - function_json_exists = true; - } - } - - if !host_json_exists && !function_json_exists { - anyhow::bail!("Failed to validate an Azure Function directory system."); - } - Ok(()) -} - -#[cfg(test)] -mod tests { - use async_trait::async_trait; - use datadog_trace_utils::trace_utils; - use hyper::{Body, Response, StatusCode}; - use serde_json::json; - use serial_test::serial; - use std::{fs, path::Path, time::Duration}; - - use crate::env_verifier::{ - ensure_azure_function_environment, ensure_gcp_function_environment, - get_region_from_gcp_region_string, AzureVerificationClient, AzureVerificationClientWrapper, - GCPInstance, GCPMetadata, GCPProject, GoogleMetadataClient, AZURE_FUNCTION_JSON_NAME, - AZURE_HOST_JSON_NAME, - }; - - use super::{EnvVerifier, ServerlessEnvVerifier}; - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_gcp_env_false_if_metadata_server_unreachable() { - struct MockGoogleMetadataClient {} - #[async_trait] - impl GoogleMetadataClient for MockGoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result> { - anyhow::bail!("Random Error") - } - } - let gmc = - Box::new(MockGoogleMetadataClient {}) as Box; - let res = ensure_gcp_function_environment(gmc.as_ref()).await; - assert!(res.is_err()); - assert_eq!( - res.unwrap_err().to_string(), - "Can't communicate with Google Metadata Server. Error: Random Error" - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_gcp_env_false_if_no_server_in_response_headers() { - struct MockGoogleMetadataClient {} - #[async_trait] - impl GoogleMetadataClient for MockGoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result> { - Ok(Response::builder() - .status(StatusCode::OK) - .body(Body::empty()) - .unwrap()) - } - } - let gmc = - Box::new(MockGoogleMetadataClient {}) as Box; - let res = ensure_gcp_function_environment(gmc.as_ref()).await; - assert!(res.is_err()); - assert_eq!( - res.unwrap_err().to_string(), - "In Google Cloud, but server identifier not found." - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_gcp_env_if_server_header_not_serverless() { - struct MockGoogleMetadataClient {} - #[async_trait] - impl GoogleMetadataClient for MockGoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result> { - Ok(Response::builder() - .status(StatusCode::OK) - .header("Server", "Metadata Server NOT for Serverless") - .body(Body::empty()) - .unwrap()) - } - } - let gmc = - Box::new(MockGoogleMetadataClient {}) as Box; - let res = ensure_gcp_function_environment(gmc.as_ref()).await; - assert!(res.is_err()); - assert_eq!( - res.unwrap_err().to_string(), - "In Google Cloud, but not in a function environment." - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_gcp_env_true_if_cloud_function_env() { - struct MockGoogleMetadataClient {} - #[async_trait] - impl GoogleMetadataClient for MockGoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result> { - Ok(Response::builder() - .status(StatusCode::OK) - .header("Server", "Metadata Server for Serverless") - .body(Body::from( - json!({ - "instance": { - "region": "projects/123123/regions/us-east1", - }, - "project": { - "projectId": "my-project" - } - }) - .to_string(), - )) - .unwrap()) - } - } - let gmc = - Box::new(MockGoogleMetadataClient {}) as Box; - let res = ensure_gcp_function_environment(gmc.as_ref()).await; - assert!(res.is_ok()); - assert_eq!( - res.unwrap(), - GCPMetadata { - instance: GCPInstance { - region: "projects/123123/regions/us-east1".to_string() - }, - project: GCPProject { - project_id: "my-project".to_string() - } - } - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_gcp_verify_environment_timeout_exceeded_gives_unknown_values() { - struct MockGoogleMetadataClient {} - #[async_trait] - impl GoogleMetadataClient for MockGoogleMetadataClient { - async fn get_metadata(&self) -> anyhow::Result> { - // Sleep for 5 seconds to let the timeout trigger - tokio::time::sleep(Duration::from_secs(5)).await; - Ok(Response::builder() - .status(StatusCode::OK) - .body(Body::empty()) - .unwrap()) - } - } - let gmc = - Box::new(MockGoogleMetadataClient {}) as Box; - let env_verifier = ServerlessEnvVerifier::new_with_google_metadata_client(gmc); - let res = env_verifier - .verify_environment(100, &trace_utils::EnvironmentType::CloudFunction, "linux") - .await; // set the verify_env_timeout to a small value to trigger the timeout - assert_eq!( - res, - trace_utils::MiniAgentMetadata { - gcp_project_id: Some("unknown".to_string()), - gcp_region: Some("unknown".to_string()), - } - ); - } - - #[test] - fn test_gcp_region_string_extraction_valid_string() { - let res = get_region_from_gcp_region_string("projects/123123/regions/us-east1".to_string()); - assert_eq!(res, "us-east1"); - } - - #[test] - fn test_gcp_region_string_extraction_wrong_number_of_parts() { - let res = get_region_from_gcp_region_string("invalid/parts/count".to_string()); - assert_eq!(res, "unknown"); - } - - #[test] - fn test_gcp_region_string_extraction_empty_string() { - let res = get_region_from_gcp_region_string("".to_string()); - assert_eq!(res, "unknown"); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_azure_env_windows_true() { - struct MockAzureVerificationClient {} - #[async_trait] - impl AzureVerificationClient for MockAzureVerificationClient { - fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { - Ok(vec!["host.json".to_string(), "function.json".to_string()]) - } - } - let res = - ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "windows") - .await; - assert!(res.is_ok()); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_azure_env_windows_false() { - struct MockAzureVerificationClient {} - #[async_trait] - impl AzureVerificationClient for MockAzureVerificationClient { - fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { - Ok(vec![ - "random_file.json".to_string(), - "random_file_1.json".to_string(), - ]) - } - } - let res = - ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "windows") - .await; - assert!(res.is_err()); - assert_eq!( - res.unwrap_err().to_string(), - "Failed to validate an Azure Function directory system." - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_azure_env_linux_true() { - struct MockAzureVerificationClient {} - #[async_trait] - impl AzureVerificationClient for MockAzureVerificationClient { - fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { - Ok(vec!["host.json".to_string(), "function.json".to_string()]) - } - } - let res = - ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "linux") - .await; - assert!(res.is_ok()); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_ensure_azure_env_linux_false() { - struct MockAzureVerificationClient {} - #[async_trait] - impl AzureVerificationClient for MockAzureVerificationClient { - fn get_function_root_files(&self, _path: &Path) -> anyhow::Result> { - Ok(vec![ - "random_file.json".to_string(), - "random_file_1.json".to_string(), - ]) - } - } - let res = - ensure_azure_function_environment(Box::new(MockAzureVerificationClient {}), "linux") - .await; - assert!(res.is_err()); - assert_eq!( - res.unwrap_err().to_string(), - "Failed to validate an Azure Function directory system." - ); - } - - #[test] - #[serial] - fn test_get_function_root_files_returns_correct_files() { - let temp_dir = tempfile::tempdir().unwrap(); - let temp_dir_path = temp_dir.path(); - - fs::File::create(temp_dir_path.join(AZURE_HOST_JSON_NAME)).unwrap(); - fs::create_dir(temp_dir_path.join("HttpTrigger1")).unwrap(); - fs::File::create(temp_dir_path.join(format!("HttpTrigger1/{AZURE_FUNCTION_JSON_NAME}"))) - .unwrap(); - - let client = AzureVerificationClientWrapper {}; - - let files = client.get_function_root_files(temp_dir_path).unwrap(); - - assert!(files.contains(&AZURE_HOST_JSON_NAME.to_string())); - assert!(files.contains(&AZURE_FUNCTION_JSON_NAME.to_string())); - assert!(files.contains(&"HttpTrigger1".to_string())); - } - - #[test] - #[serial] - fn test_get_function_root_files_ignores_node_modules() { - let temp_dir = tempfile::tempdir().unwrap(); - let temp_dir_path = temp_dir.path(); - - fs::File::create(temp_dir_path.join(AZURE_HOST_JSON_NAME)).unwrap(); - fs::create_dir(temp_dir_path.join("node_modules")).unwrap(); - fs::File::create(temp_dir_path.join("node_modules/random.txt")).unwrap(); - - let client = AzureVerificationClientWrapper {}; - - let files = client.get_function_root_files(temp_dir_path).unwrap(); - - assert_eq!(files, vec![AZURE_HOST_JSON_NAME]); - } -} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index ccbff24b7..3ccb2f36d 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -2,9 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 pub mod config; -pub mod env_verifier; pub mod http_utils; -pub mod mini_agent; +pub mod trace_agent; pub mod stats_flusher; pub mod stats_processor; pub mod trace_flusher; diff --git a/bottlecap/src/traces/mini_agent.rs b/bottlecap/src/traces/trace_agent.rs similarity index 92% rename from bottlecap/src/traces/mini_agent.rs rename to bottlecap/src/traces/trace_agent.rs index 30c82c2e9..7dfcda65e 100644 --- a/bottlecap/src/traces/mini_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -16,14 +16,14 @@ use crate::traces::{config as TraceConfig, stats_flusher, stats_processor, trace use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; -const MINI_AGENT_PORT: usize = 8126; +const trace_agent_PORT: usize = 8126; const TRACE_ENDPOINT_PATH: &str = "/v0.4/traces"; const STATS_ENDPOINT_PATH: &str = "/v0.6/stats"; const INFO_ENDPOINT_PATH: &str = "/info"; const TRACER_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; const STATS_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; -pub struct MiniAgent { +pub struct TraceAgent { pub config: Arc, pub trace_processor: Arc, pub trace_flusher: Arc, @@ -31,12 +31,12 @@ pub struct MiniAgent { pub stats_flusher: Arc, } -impl MiniAgent { - pub async fn start_mini_agent(&self) -> Result<(), Box> { +impl TraceAgent { + pub async fn start_trace_agent(&self) -> Result<(), Box> { let now = Instant::now(); // // verify we are in a google cloud funtion environment. if not, shut down the mini agent. - // let mini_agent_metadata = Arc::new( + // let trace_agent_metadata = Arc::new( // self.env_verifier // .verify_environment( // self.config.verify_env_timeout, @@ -47,7 +47,7 @@ impl MiniAgent { // ); println!( - "Time taken to fetch Mini Agent metadata: {} ms", + "Time taken to fetch Trace Agent metadata: {} ms", now.elapsed().as_millis() ); @@ -98,7 +98,7 @@ impl MiniAgent { let endpoint_config = endpoint_config.clone(); let service = service_fn(move |req| { - MiniAgent::trace_endpoint_handler( + TraceAgent::trace_endpoint_handler( endpoint_config.clone(), req, trace_processor.clone(), @@ -111,14 +111,14 @@ impl MiniAgent { async move { Ok::<_, Infallible>(service) } }); - let addr = SocketAddr::from(([127, 0, 0, 1], MINI_AGENT_PORT as u16)); + let addr = SocketAddr::from(([127, 0, 0, 1], trace_agent_PORT as u16)); let server_builder = Server::try_bind(&addr)?; let server = server_builder.serve(make_svc); - println!("Mini Agent started: listening on port {MINI_AGENT_PORT}"); + println!("Trace Agent started: listening on port {trace_agent_PORT}"); println!( - "Time taken start the Mini Agent: {} ms", + "Time taken start the Trace Agent: {} ms", now.elapsed().as_millis() ); diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 0f056c8ed..1f197a95e 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -184,7 +184,6 @@ mod tests { let trace_processor = trace_processor::ServerlessTraceProcessor {}; let res = trace_processor .process_traces( - Arc::new(create_test_config()), request, tx, Arc::new(trace_utils::MiniAgentMetadata::default()), From c58c786be822b86b2429b9b623ed0c9786500d5d Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 18 Jun 2024 15:11:02 -0400 Subject: [PATCH 04/25] feat: fmt --- bottlecap/src/bin/bottlecap/main.rs | 55 +++++++++++++++---------- bottlecap/src/config/mod.rs | 2 +- bottlecap/src/traces/mod.rs | 2 +- bottlecap/src/traces/stats_processor.rs | 2 +- bottlecap/src/traces/trace_agent.rs | 15 +++---- bottlecap/src/traces/trace_flusher.rs | 6 +-- bottlecap/src/traces/trace_processor.rs | 7 +++- scripts/Dockerfile.bottlecap.build | 8 ++-- 8 files changed, 54 insertions(+), 43 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 855d5c526..19c26adcc 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -10,22 +10,6 @@ #![deny(missing_copy_implementations)] #![deny(missing_debug_implementations)] -use decrypt::resolve_secrets; -use std::{ - collections::hash_map, - collections::HashMap, - env, - io::Error, - io::Result, - os::unix::process::CommandExt, - path::Path, - process::Command, - sync::{Arc, Mutex}, -}; -use tokio::sync::Mutex as TokioMutex; -use telemetry::listener::TelemetryListenerConfig; -use tracing::{debug, error, info}; -use tracing_subscriber::EnvFilter; use bottlecap::{ base_url, config::{self, AwsConfig, Config}, @@ -54,13 +38,30 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - config as TraceConfig, trace_agent, stats_flusher, stats_processor, trace_flusher::{self, TraceFlusher}, - trace_processor, + config as TraceConfig, stats_flusher, stats_processor, trace_agent, + trace_flusher::{self, TraceFlusher}, + trace_processor, }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, EXTENSION_ID_HEADER, EXTENSION_NAME, EXTENSION_NAME_HEADER, EXTENSION_ROUTE, LAMBDA_RUNTIME_SLUG, TELEMETRY_PORT, }; +use decrypt::resolve_secrets; +use std::{ + collections::hash_map, + collections::HashMap, + env, + io::Error, + io::Result, + os::unix::process::CommandExt, + path::Path, + process::Command, + sync::{Arc, Mutex}, +}; +use telemetry::listener::TelemetryListenerConfig; +use tokio::sync::Mutex as TokioMutex; +use tracing::{debug, error, info}; +use tracing_subscriber::EnvFilter; use reqwest::Client; use serde::Deserialize; @@ -266,7 +267,9 @@ async fn extension_loop_active( config.site.clone(), ); - let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())) }); + let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { + buffer: Arc::new(TokioMutex::new(Vec::new())), + }); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher {}); @@ -292,7 +295,7 @@ async fn extension_loop_active( tokio::spawn(async move { let res = trace_agent.start_trace_agent().await; if let Err(e) = res { - error!("Error starting mini agent: {e:?}"); + error!("Error starting trace agent: {e:?}"); } }); let lambda_enhanced_metrics = enhanced_metrics::new(Arc::clone(&metrics_aggr)); @@ -398,7 +401,11 @@ async fn extension_loop_active( // pass the invocation deadline to // flush tasks here, so they can // retry if we have more time - tokio::join!(logs_flusher.flush(), metrics_flusher.flush(), trace_flusher.manual_flush()); + tokio::join!( + logs_flusher.flush(), + metrics_flusher.flush(), + trace_flusher.manual_flush() + ); break; } TelemetryRecord::PlatformReport { @@ -444,7 +451,11 @@ async fn extension_loop_active( if shutdown { dogstatsd_cancel_token.cancel(); telemetry_listener_cancel_token.cancel(); - tokio::join!(logs_flusher.flush(), metrics_flusher.flush(), trace_flusher.manual_flush()); + tokio::join!( + logs_flusher.flush(), + metrics_flusher.flush(), + trace_flusher.manual_flush() + ); return Ok(()); } } diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index a20bd3f05..738a39bb5 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -60,7 +60,7 @@ impl Default for Config { apm_enabled: false, lambda_handler: String::default(), serverless_trace_enabled: true, - trace_enabled: true + trace_enabled: true, } } } diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 3ccb2f36d..dd6b476eb 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -3,8 +3,8 @@ pub mod config; pub mod http_utils; -pub mod trace_agent; pub mod stats_flusher; pub mod stats_processor; +pub mod trace_agent; pub mod trace_flusher; pub mod trace_processor; diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index afe6c8e53..ea48d6462 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -27,7 +27,7 @@ pub trait StatsProcessor { ) -> http::Result>; } -#[derive(Clone,Copy)] +#[derive(Clone, Copy)] pub struct ServerlessStatsProcessor {} #[async_trait] diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 7dfcda65e..612cff4e5 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -3,16 +3,18 @@ use hyper::service::{make_service_fn, service_fn}; use hyper::{http, Body, Method, Request, Response, Server, StatusCode}; -use tracing::{debug, error, info}; use serde_json::json; use std::convert::Infallible; use std::net::SocketAddr; use std::sync::Arc; use std::time::Instant; use tokio::sync::mpsc::{self, Receiver, Sender}; +use tracing::{debug, error, info}; use crate::traces::http_utils::log_and_create_http_response; -use crate::traces::{config as TraceConfig, stats_flusher, stats_processor, trace_flusher, trace_processor}; +use crate::traces::{ + config as TraceConfig, stats_flusher, stats_processor, trace_flusher, trace_processor, +}; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; @@ -62,9 +64,7 @@ impl TraceAgent { let trace_flusher = self.trace_flusher.clone(); tokio::spawn(async move { let trace_flusher = trace_flusher.clone(); - trace_flusher - .start_trace_flusher(trace_rx) - .await; + trace_flusher.start_trace_flusher(trace_rx).await; }); // channels to send processed stats to our stats flusher. @@ -141,10 +141,7 @@ impl TraceAgent { ) -> http::Result> { match (req.method(), req.uri().path()) { (&Method::PUT | &Method::POST, TRACE_ENDPOINT_PATH) => { - match trace_processor - .process_traces(config, req, trace_tx) - .await - { + match trace_processor.process_traces(config, req, trace_tx).await { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs index 9371c27f5..a91cedd9d 100644 --- a/bottlecap/src/traces/trace_flusher.rs +++ b/bottlecap/src/traces/trace_flusher.rs @@ -2,13 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 use async_trait::async_trait; -use tracing::{error, info}; use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; +use tracing::{error, info}; use datadog_trace_utils::trace_utils::{self, SendData}; - #[async_trait] pub trait TraceFlusher { /// Starts a trace flusher that listens for trace payloads sent to the tokio mpsc Receiver, @@ -16,7 +15,7 @@ pub trait TraceFlusher { async fn start_trace_flusher(&self, mut rx: Receiver); /// Flushes traces to the Datadog trace intake. async fn flush_traces(&self, traces: Vec); - + async fn manual_flush(&self); } @@ -45,7 +44,6 @@ impl TraceFlusher for ServerlessTraceFlusher { } } - async fn flush_traces(&self, traces: Vec) { if traces.is_empty() { return; diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 1f197a95e..4e4f01801 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -5,8 +5,8 @@ use std::sync::Arc; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; -use tracing::info; use tokio::sync::mpsc::Sender; +use tracing::{debug, info}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; use datadog_trace_utils::trace_utils::SendData; @@ -74,7 +74,12 @@ impl TraceProcessor for ServerlessTraceProcessor { config.function_name.clone(), &config.env_type, ); + chunk.spans.retain(|span| { + return (span.name != "dns.lookup" && span.resource != "0.0.0.0") + || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); + }); for span in chunk.spans.iter_mut() { + debug!("ASTUYVE span is {:?}", span); // trace_utils::enrich_span_with_mini_agent_metadata(span, &mini_agent_metadata); // trace_utils::enrich_span_with_azure_metadata( // span, diff --git a/scripts/Dockerfile.bottlecap.build b/scripts/Dockerfile.bottlecap.build index 41c251d25..fd1da8e00 100644 --- a/scripts/Dockerfile.bottlecap.build +++ b/scripts/Dockerfile.bottlecap.build @@ -6,9 +6,9 @@ RUN yum install -y curl gcc gcc-c++ make unzip openssl openssl-devel # Install Protocol Buffers compiler by hand, since AL2 does not have a recent enough version. COPY ./scripts/install-protoc.sh / RUN chmod +x /install-protoc.sh && /install-protoc.sh -RUN curl https://sh.rustup.rs -sSf | \ - sh -s -- --default-toolchain nightly-$PLATFORM-unknown-linux-gnu -y - ENV PATH=/root/.cargo/bin:$PATH +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \ + sh -s -- --default-toolchain nightly-$PLATFORM-unknown-linux-gnu -y --verbose +ENV PATH=/root/.cargo/bin:$PATH RUN rustup component add rust-src --toolchain nightly-$PLATFORM-unknown-linux-gnu RUN mkdir -p /tmp/dd COPY ./bottlecap/src /tmp/dd/bottlecap/src @@ -16,7 +16,7 @@ COPY ./bottlecap/Cargo.toml /tmp/dd/bottlecap/Cargo.toml COPY ./bottlecap/Cargo.lock /tmp/dd/bottlecap/Cargo.lock ENV RUSTFLAGS="-C panic=abort -Zlocation-detail=none" WORKDIR /tmp/dd/bottlecap -RUN --mount=type=cache,target=/usr/local/cargo/registry cargo +nightly build -Z build-std=std,panic_abort -Z build-std-features=panic_immediate_abort --release --target $PLATFORM-unknown-linux-gnu +RUN --mount=type=cache,target=/usr/local/cargo/registry cargo +nightly build --release --target $PLATFORM-unknown-linux-gnu RUN cp /tmp/dd/bottlecap/target/$PLATFORM-unknown-linux-gnu/release/bottlecap /tmp/dd/bottlecap/bottlecap # zip the extension From 7a98a1f73ec165342e3c32027702287347c092d9 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 18 Jun 2024 15:14:15 -0400 Subject: [PATCH 05/25] feat: Fix formatting after rename --- bottlecap/src/traces/trace_agent.rs | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 612cff4e5..b2ec6d544 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -18,7 +18,7 @@ use crate::traces::{ use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; -const trace_agent_PORT: usize = 8126; +const TRACE_AGENT_PORT: usize = 8126; const TRACE_ENDPOINT_PATH: &str = "/v0.4/traces"; const STATS_ENDPOINT_PATH: &str = "/v0.6/stats"; const INFO_ENDPOINT_PATH: &str = "/info"; @@ -36,19 +36,7 @@ pub struct TraceAgent { impl TraceAgent { pub async fn start_trace_agent(&self) -> Result<(), Box> { let now = Instant::now(); - - // // verify we are in a google cloud funtion environment. if not, shut down the mini agent. - // let trace_agent_metadata = Arc::new( - // self.env_verifier - // .verify_environment( - // self.config.verify_env_timeout, - // &self.config.env_type, - // &self.config.os, - // ) - // .await, - // ); - - println!( + info!( "Time taken to fetch Trace Agent metadata: {} ms", now.elapsed().as_millis() ); @@ -111,13 +99,13 @@ impl TraceAgent { async move { Ok::<_, Infallible>(service) } }); - let addr = SocketAddr::from(([127, 0, 0, 1], trace_agent_PORT as u16)); + let addr = SocketAddr::from(([127, 0, 0, 1], TRACE_AGENT_PORT as u16)); let server_builder = Server::try_bind(&addr)?; let server = server_builder.serve(make_svc); - println!("Trace Agent started: listening on port {trace_agent_PORT}"); - println!( + info!("Trace Agent started: listening on port {TRACE_AGENT_PORT}"); + info!( "Time taken start the Trace Agent: {} ms", now.elapsed().as_millis() ); From a44be55f5b4cb78178b47d6b11df1bf79de1d94e Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 18 Jun 2024 16:00:25 -0400 Subject: [PATCH 06/25] fix: remove extra tokio task --- bottlecap/src/traces/trace_agent.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index b2ec6d544..0db4980cb 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -50,10 +50,7 @@ impl TraceAgent { // start our trace flusher. receives trace payloads and handles buffering + deciding when to // flush to backend. let trace_flusher = self.trace_flusher.clone(); - tokio::spawn(async move { - let trace_flusher = trace_flusher.clone(); - trace_flusher.start_trace_flusher(trace_rx).await; - }); + trace_flusher.start_trace_flusher(trace_rx).await; // channels to send processed stats to our stats flusher. let (stats_tx, stats_rx): ( From 442039daa0b90be21324cf84502184bf6da1944a Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 20 Jun 2024 11:00:54 -0400 Subject: [PATCH 07/25] feat: allow tracing --- bottlecap/src/config/mod.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 738a39bb5..79f631393 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -35,6 +35,11 @@ pub struct Config { pub serverless_flush_strategy: FlushStrategy, pub trace_enabled: bool, pub serverless_trace_enabled: bool, + pub capture_lambda_payload: bool, + // Deprecated or ignored, just here so we don't failover + pub flush_to_log: bool, + pub logs_injection: bool, + pub merge_xray_traces: bool, } impl Default for Config { @@ -61,6 +66,10 @@ impl Default for Config { lambda_handler: String::default(), serverless_trace_enabled: true, trace_enabled: true, + capture_lambda_payload: false, + flush_to_log: false, + logs_injection: false, + merge_xray_traces: false, } } } From 8a02ce8c39decf417f3ad7eabdbbb17fe89ca991 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 25 Jun 2024 09:05:12 -0400 Subject: [PATCH 08/25] feat: working v5 traces --- bottlecap/Cargo.lock | 13 + bottlecap/Cargo.toml | 3 + bottlecap/src/traces/trace_agent.rs | 18 +- bottlecap/src/traces/trace_processor.rs | 372 +++++++++++++++++++++++- 4 files changed, 399 insertions(+), 7 deletions(-) diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 85ba7c8d7..7a87e8f75 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -160,6 +160,9 @@ dependencies = [ "protobuf", "regex", "reqwest", + "rmp", + "rmp-serde", + "rmpv", "serde", "serde_json", "sha2", @@ -1506,6 +1509,16 @@ dependencies = [ "serde", ] +[[package]] +name = "rmpv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58450723cd9ee93273ce44a20b6ec4efe17f8ed2e3631474387bfdecf18bb2a9" +dependencies = [ + "num-traits", + "rmp", +] + [[package]] name = "rustc-demangle" version = "0.1.24" diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 3b78be698..80b23953d 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -37,6 +37,9 @@ hmac = { version = "0.12.1", default-features = false } sha2 = { version = "0.10.8", default-features = false } hex = { version = "0.4.3", default-features = false, features = ["std"] } base64 = { version = "0.22.0", default-features = false } +rmp-serde = { version = "1.3.0", default-features = false } +rmpv = { version = "1.3.0", default-features = false } +rmp = { version = "0.8.14", default-features = false } [dev-dependencies] figment = { version = "0.10.15", default-features = false, features = ["yaml", "env", "test"] } diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 0db4980cb..591fb4cfe 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -19,7 +19,8 @@ use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; const TRACE_AGENT_PORT: usize = 8126; -const TRACE_ENDPOINT_PATH: &str = "/v0.4/traces"; +const V4_TRACE_ENDPOINT_PATH: &str = "/v0.4/traces"; +const V5_TRACE_ENDPOINT_PATH: &str = "/v0.5/traces"; const STATS_ENDPOINT_PATH: &str = "/v0.6/stats"; const INFO_ENDPOINT_PATH: &str = "/info"; const TRACER_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; @@ -125,8 +126,17 @@ impl TraceAgent { stats_tx: Sender, ) -> http::Result> { match (req.method(), req.uri().path()) { - (&Method::PUT | &Method::POST, TRACE_ENDPOINT_PATH) => { - match trace_processor.process_traces(config, req, trace_tx).await { + (&Method::PUT | &Method::POST, V4_TRACE_ENDPOINT_PATH) => { + match trace_processor.process_traces_v4(config, req, trace_tx).await { + Ok(res) => Ok(res), + Err(err) => log_and_create_http_response( + &format!("Error processing traces: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ), + } + } + (&Method::PUT | &Method::POST, V5_TRACE_ENDPOINT_PATH) => { + match trace_processor.process_traces_v5(config, req, trace_tx).await { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), @@ -162,7 +172,7 @@ impl TraceAgent { let response_json = json!( { "endpoints": [ - TRACE_ENDPOINT_PATH, + V4_TRACE_ENDPOINT_PATH, STATS_ENDPOINT_PATH, INFO_ENDPOINT_PATH ], diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 4e4f01801..6a53bb79e 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -2,15 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; +use rmp; use async_trait::async_trait; -use hyper::{http, Body, Request, Response, StatusCode}; +use hyper::{http, Body, Request, Response, body::Buf, StatusCode}; +use rmpv::decode::read_value; +use rmpv::Value; +use rmp::decode::read_str_len; use tokio::sync::mpsc::Sender; use tracing::{debug, info}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; +use datadog_trace_protobuf::pb::{self, Span, TraceChunk}; use crate::traces::{ config::Config as TraceConfig, @@ -21,7 +26,14 @@ use crate::traces::{ pub trait TraceProcessor { /// Deserializes traces from a hyper request body and sends them through the provided tokio mpsc /// Sender. - async fn process_traces( + async fn process_traces_v4( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result>; + + async fn process_traces_v5( &self, config: Arc, req: Request, @@ -34,7 +46,361 @@ pub struct ServerlessTraceProcessor {} #[async_trait] impl TraceProcessor for ServerlessTraceProcessor { - async fn process_traces( + async fn process_traces_v5( + &self, + config: Arc, + req: Request, + tx: Sender, + ) -> http::Result> { + info!("Recieved traces to process"); + let (parts, body) = req.into_parts(); + + if let Some(response) = http_utils::verify_request_content_length( + &parts.headers, + config.max_request_content_length, + "Error processing traces and verifying length", + ) { + return response; + } + + println!("astuyve no error in verifying request content length"); + + let tracer_header_tags = (&parts.headers).into(); + + // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf + // crate) + let buffer = hyper::body::aggregate(body).await.unwrap(); + let body_size = buffer.remaining(); + let mut reader = buffer.reader(); + let wrapper_size = rmp::decode::read_array_len(&mut reader).unwrap(); + assert!(wrapper_size == 2); //todo conver to http error/response + + // START read dict + let dict_size = match rmp::decode::read_array_len(&mut reader) { + Ok(res) => res, + Err(err) => { + println!("ASTUYVE error reading dict size: {err}"); + return log_and_create_http_response( + &format!("ASTUYVE Error reading dict size: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + let mut dict: Vec = Default::default(); + for _ in 0..dict_size { + let val: Value = read_value(&mut reader).unwrap(); + match val { + Value::String(s) => { + // dict.push(s.to_string()); + let s = s.to_string().replace(&['\"'][..], ""); + dict.push(s); + } + _ => { + return log_and_create_http_response( + &format!("Value in string dict is not a string: {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + // START read traces + + let traces_size = match rmp::decode::read_array_len(&mut reader) { + Ok(res) => res, + Err(err) => { + return log_and_create_http_response( + &format!("Error reading traces size: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + let mut traces: Vec> = Default::default(); + + for _ in 0..traces_size { + let spans_size = match rmp::decode::read_array_len(&mut reader) { + Ok(res) => res, + Err(err) => { + println!("ASTUYVE error reading spans size: {err}"); + return log_and_create_http_response( + &format!("ASTUYVE Error reading spans size: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + let mut trace: Vec = Default::default(); + + for _ in 0..spans_size { + let mut span: Span = Default::default(); + let span_size = rmp::decode::read_array_len(&mut reader).unwrap(); + assert!(span_size == 12); //todo convert to http error/response + //0 - service + match read_value(&mut reader).unwrap() { + Value::Integer(s) => { + let string_id = s.as_i64().unwrap() as usize; + span.service = dict[string_id].to_string(); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span service is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + // 1 - name + match read_value(&mut reader).unwrap() { + Value::Integer(s) => { + let string_id = s.as_i64().unwrap() as usize; + span.name = dict[string_id].to_string(); + println!("ASTUYVE span name is {:?}", span.name); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span name is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + // 2 - resource + match read_value(&mut reader).unwrap() { + Value::Integer(s) => { + let string_id = s.as_i64().unwrap() as usize; + span.resource = dict[string_id].to_string(); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span resource is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + // 3 - trace_id + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.trace_id = i.as_u64().unwrap(); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span resource is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + // 4 - span_id + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.span_id = i.as_u64().unwrap(); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span span_id is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + // 5 - parent_id + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.parent_id = i.as_u64().unwrap() + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span parent_id is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + //6 - start + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.start = i.as_i64().unwrap() + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span start is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + //7 - duration + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.duration = i.as_i64().unwrap() + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span duration is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; + //8 - error + match read_value(&mut reader).unwrap() { + Value::Integer(i) => { + span.error = i.as_i64().unwrap() as i32 + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span error is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + //9 - meta + match read_value(&mut reader).unwrap() { + Value::Map(meta) => { + for (k, v) in meta.iter() { + match k { + Value::Integer(k) => { + match v { + Value::Integer(v) => { + let key_id = k.as_i64().unwrap() as usize; + let val_id = v.as_i64().unwrap() as usize; + let key = dict[key_id].to_string(); + let val = dict[val_id].to_string(); + span.meta.insert(key, val); + } + _ => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span meta value is not a string {v}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + _ => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span meta key is not a string {k}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span meta is not a map {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + // 10 - metrics + match read_value(&mut reader).unwrap() { + Value::Map(metrics) => { + for (k, v) in metrics.iter() { + match k { + Value::Integer(k) => { + match v { + Value::Integer(v) => { + let key_id = k.as_i64().unwrap() as usize; + let key = dict[key_id].to_string(); + span.metrics.insert(key, v.as_f64().unwrap()); + }, + Value::F64(v) => { + let key_id = k.as_i64().unwrap() as usize; + let key = dict[key_id].to_string(); + span.metrics.insert(key, *v); + }, + _ => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span metrics value is not a float {v}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + _ => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span metrics key is not a string {k}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span metrics is not a map {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + + // 11 - type + match read_value(&mut reader).unwrap() { + Value::Integer(s) => { + let string_id = s.as_i64().unwrap() as usize; + span.r#type = dict[string_id].to_string(); + }, + val => { + return log_and_create_http_response( + &format!("ASTUYVE Value in span type is not a string {val}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + println!("ASTUYVE span is {:?}", span); + trace.push(span); + } + traces.push(trace); + } + + + + + + // let value: Value = read_value(&mut reader).unwrap(); + // println!("ASTUYVE rest of value after two read_array_len is {:?}", value); + // let dict = &value.as_array().unwrap()[0]; + // println!("ASTUYVE string dict is {:?}", dict); + // let compressed_traces = &value.as_array().unwrap()[1]; + // println!("ASTUYVE compressed traces is {:?}", compressed_traces); + + let payload = trace_utils::collect_trace_chunks( + traces, + &tracer_header_tags, + |chunk, root_span_index| { + trace_utils::set_serverless_root_span_tags( + &mut chunk.spans[root_span_index], + config.function_name.clone(), + &config.env_type, + ); + chunk.spans.retain(|span| { + return (span.name != "dns.lookup" && span.resource != "0.0.0.0") + || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); + }); + for span in chunk.spans.iter_mut() { + debug!("ASTUYVE span is {:?}", span); + // trace_utils::enrich_span_with_mini_agent_metadata(span, &mini_agent_metadata); + // trace_utils::enrich_span_with_azure_metadata( + // span, + // config.mini_agent_version.as_str(), + // ); + obfuscate_span(span, &config.obfuscation_config); + } + }, + true, // In mini agent, we always send agentless + ); + + let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); + + // send trace payload to our trace flusher + match tx.send(send_data).await { + Ok(_) => { + return log_and_create_http_response( + "Successfully buffered traces to be flushed.", + StatusCode::ACCEPTED, + ); + } + Err(err) => { + return log_and_create_http_response( + &format!("Error sending traces to the trace flusher: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + } + } + + async fn process_traces_v4( &self, config: Arc, req: Request, From d029b5061e297be5fd9f25008bcbdf0988bf7d54 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Tue, 25 Jun 2024 16:37:45 -0400 Subject: [PATCH 09/25] feat: Update to use my branch of libdatadog so we have v5 support --- bottlecap/Cargo.lock | 239 ++++++++++++-------- bottlecap/Cargo.toml | 2 +- bottlecap/src/traces/trace_processor.rs | 281 +----------------------- 3 files changed, 151 insertions(+), 371 deletions(-) diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 7a87e8f75..b559e4c1a 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -53,7 +53,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -79,9 +79,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -121,9 +121,9 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "block-buffer" @@ -184,9 +184,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" [[package]] name = "byteorder" @@ -202,9 +202,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "c891175c3fb232128f48de6590095e59198bbeb8620c310be349bfc3afd12c7b" [[package]] name = "cfg-if" @@ -269,7 +269,7 @@ dependencies = [ [[package]] name = "datadog-protos" version = "0.1.0" -source = "git+https://github.com/DataDog/saluki/#0e55b345e6d2a215474147542f5e776d4f593af9" +source = "git+https://github.com/DataDog/saluki/#08fab4086401e3e5e3f1ad57013761040908870d" dependencies = [ "bytes", "prost 0.12.6", @@ -282,7 +282,7 @@ dependencies = [ [[package]] name = "datadog-trace-normalization" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -291,7 +291,7 @@ dependencies = [ [[package]] name = "datadog-trace-obfuscation" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -308,7 +308,7 @@ dependencies = [ [[package]] name = "datadog-trace-protobuf" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" dependencies = [ "prost 0.11.9", "serde", @@ -318,7 +318,7 @@ dependencies = [ [[package]] name = "datadog-trace-utils" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" dependencies = [ "anyhow", "bytes", @@ -332,7 +332,9 @@ dependencies = [ "log", "prost 0.11.9", "rand", + "rmp", "rmp-serde", + "rmpv", "serde", "serde_json", "tokio", @@ -341,7 +343,7 @@ dependencies = [ [[package]] name = "ddcommon" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#b92f4eb4d1966e7525281d7421a00b992e52d855" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" dependencies = [ "anyhow", "futures", @@ -366,7 +368,7 @@ dependencies = [ [[package]] name = "ddsketch-agent" version = "0.1.0" -source = "git+https://github.com/DataDog/saluki/#0e55b345e6d2a215474147542f5e776d4f593af9" +source = "git+https://github.com/DataDog/saluki/#08fab4086401e3e5e3f1ad57013761040908870d" dependencies = [ "datadog-protos", "float_eq", @@ -521,7 +523,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -693,12 +695,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http 1.1.0", "http-body 1.0.0", "pin-project-lite", @@ -706,9 +708,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" [[package]] name = "httpdate" @@ -775,19 +777,20 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.26.0" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", "hyper 1.3.1", "hyper-util", - "rustls 0.22.4", + "rustls 0.23.10", "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls 0.26.0", "tower-service", + "webpki-roots", ] [[package]] @@ -887,9 +890,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" @@ -936,9 +939,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "mime" @@ -948,9 +951,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] @@ -994,9 +997,9 @@ dependencies = [ [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -1071,7 +1074,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1107,7 +1110,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1135,14 +1138,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -1155,16 +1158,16 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", "version_check", "yansi", ] [[package]] name = "proptest" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" +checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" dependencies = [ "bit-set", "bit-vec", @@ -1174,7 +1177,7 @@ dependencies = [ "rand", "rand_chacha", "rand_xorshift", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", "rusty-fork", "tempfile", "unarray", @@ -1217,7 +1220,7 @@ dependencies = [ "prost 0.12.6", "prost-types", "regex", - "syn 2.0.66", + "syn 2.0.68", "tempfile", ] @@ -1244,7 +1247,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1314,6 +1317,53 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quinn" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.10", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +dependencies = [ + "bytes", + "rand", + "ring 0.17.8", + "rustc-hash", + "rustls 0.23.10", + "slab", + "thiserror", + "tinyvec", + "tracing", +] + +[[package]] +name = "quinn-udp" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +dependencies = [ + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.36" @@ -1364,23 +1414,23 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -1394,13 +1444,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -1411,15 +1461,15 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" dependencies = [ "base64 0.22.1", "bytes", @@ -1430,7 +1480,7 @@ dependencies = [ "http-body 1.0.0", "http-body-util", "hyper 1.3.1", - "hyper-rustls 0.26.0", + "hyper-rustls 0.27.2", "hyper-util", "ipnet", "js-sys", @@ -1439,7 +1489,8 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.22.4", + "quinn", + "rustls 0.23.10", "rustls-pemfile 2.1.2", "rustls-pki-types", "serde", @@ -1447,7 +1498,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls 0.26.0", "tower-service", "url", "wasm-bindgen", @@ -1525,6 +1576,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.38.34" @@ -1552,11 +1609,11 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.4" +version = "0.23.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" dependencies = [ - "log", + "once_cell", "ring 0.17.8", "rustls-pki-types", "rustls-webpki", @@ -1689,9 +1746,9 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" dependencies = [ "serde", ] @@ -1704,14 +1761,14 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" dependencies = [ "itoa", "ryu", @@ -1808,9 +1865,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -1825,9 +1882,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" dependencies = [ "proc-macro2", "quote", @@ -1836,9 +1893,9 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "tempfile" @@ -1869,7 +1926,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1884,9 +1941,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "c55115c6fbe2d2bef26eb09ad74bde02d8255476fc0c7b515ef09fbb35742d82" dependencies = [ "tinyvec_macros", ] @@ -1922,7 +1979,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1938,11 +1995,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.22.4", + "rustls 0.23.10", "rustls-pki-types", "tokio", ] @@ -2002,7 +2059,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2051,7 +2108,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2158,9 +2215,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -2236,7 +2293,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", "wasm-bindgen-shared", ] @@ -2270,7 +2327,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2303,9 +2360,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.1" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" dependencies = [ "rustls-pki-types", ] @@ -2516,7 +2573,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 80b23953d..2347fe830 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -11,7 +11,7 @@ chrono = { version = "0.4.38", features = ["serde", "std", "now"], default-featu datadog-protos = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddcommon = { version = "10.0", git = "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } -datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } +datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog", branch = "aj/bottlecap-mini-agent" } datadog-trace-utils = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent"} datadog-trace-normalization = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } datadog-trace-obfuscation = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 6a53bb79e..5e455338e 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -69,292 +69,15 @@ impl TraceProcessor for ServerlessTraceProcessor { // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf // crate) - let buffer = hyper::body::aggregate(body).await.unwrap(); - let body_size = buffer.remaining(); - let mut reader = buffer.reader(); - let wrapper_size = rmp::decode::read_array_len(&mut reader).unwrap(); - assert!(wrapper_size == 2); //todo conver to http error/response - - // START read dict - let dict_size = match rmp::decode::read_array_len(&mut reader) { + let (body_size, traces) = match trace_utils::v5_get_traces_from_request_body(body).await { Ok(res) => res, Err(err) => { - println!("ASTUYVE error reading dict size: {err}"); return log_and_create_http_response( - &format!("ASTUYVE Error reading dict size: {err}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - let mut dict: Vec = Default::default(); - for _ in 0..dict_size { - let val: Value = read_value(&mut reader).unwrap(); - match val { - Value::String(s) => { - // dict.push(s.to_string()); - let s = s.to_string().replace(&['\"'][..], ""); - dict.push(s); - } - _ => { - return log_and_create_http_response( - &format!("Value in string dict is not a string: {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - // START read traces - - let traces_size = match rmp::decode::read_array_len(&mut reader) { - Ok(res) => res, - Err(err) => { - return log_and_create_http_response( - &format!("Error reading traces size: {err}"), + &format!("Error deserializing trace from request body: {err}"), StatusCode::INTERNAL_SERVER_ERROR, ); } }; - let mut traces: Vec> = Default::default(); - - for _ in 0..traces_size { - let spans_size = match rmp::decode::read_array_len(&mut reader) { - Ok(res) => res, - Err(err) => { - println!("ASTUYVE error reading spans size: {err}"); - return log_and_create_http_response( - &format!("ASTUYVE Error reading spans size: {err}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - let mut trace: Vec = Default::default(); - - for _ in 0..spans_size { - let mut span: Span = Default::default(); - let span_size = rmp::decode::read_array_len(&mut reader).unwrap(); - assert!(span_size == 12); //todo convert to http error/response - //0 - service - match read_value(&mut reader).unwrap() { - Value::Integer(s) => { - let string_id = s.as_i64().unwrap() as usize; - span.service = dict[string_id].to_string(); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span service is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - // 1 - name - match read_value(&mut reader).unwrap() { - Value::Integer(s) => { - let string_id = s.as_i64().unwrap() as usize; - span.name = dict[string_id].to_string(); - println!("ASTUYVE span name is {:?}", span.name); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span name is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - // 2 - resource - match read_value(&mut reader).unwrap() { - Value::Integer(s) => { - let string_id = s.as_i64().unwrap() as usize; - span.resource = dict[string_id].to_string(); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span resource is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - // 3 - trace_id - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.trace_id = i.as_u64().unwrap(); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span resource is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - // 4 - span_id - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.span_id = i.as_u64().unwrap(); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span span_id is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - // 5 - parent_id - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.parent_id = i.as_u64().unwrap() - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span parent_id is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - //6 - start - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.start = i.as_i64().unwrap() - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span start is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - //7 - duration - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.duration = i.as_i64().unwrap() - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span duration is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - //8 - error - match read_value(&mut reader).unwrap() { - Value::Integer(i) => { - span.error = i.as_i64().unwrap() as i32 - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span error is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - //9 - meta - match read_value(&mut reader).unwrap() { - Value::Map(meta) => { - for (k, v) in meta.iter() { - match k { - Value::Integer(k) => { - match v { - Value::Integer(v) => { - let key_id = k.as_i64().unwrap() as usize; - let val_id = v.as_i64().unwrap() as usize; - let key = dict[key_id].to_string(); - let val = dict[val_id].to_string(); - span.meta.insert(key, val); - } - _ => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span meta value is not a string {v}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - _ => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span meta key is not a string {k}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span meta is not a map {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - // 10 - metrics - match read_value(&mut reader).unwrap() { - Value::Map(metrics) => { - for (k, v) in metrics.iter() { - match k { - Value::Integer(k) => { - match v { - Value::Integer(v) => { - let key_id = k.as_i64().unwrap() as usize; - let key = dict[key_id].to_string(); - span.metrics.insert(key, v.as_f64().unwrap()); - }, - Value::F64(v) => { - let key_id = k.as_i64().unwrap() as usize; - let key = dict[key_id].to_string(); - span.metrics.insert(key, *v); - }, - _ => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span metrics value is not a float {v}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - _ => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span metrics key is not a string {k}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span metrics is not a map {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - - // 11 - type - match read_value(&mut reader).unwrap() { - Value::Integer(s) => { - let string_id = s.as_i64().unwrap() as usize; - span.r#type = dict[string_id].to_string(); - }, - val => { - return log_and_create_http_response( - &format!("ASTUYVE Value in span type is not a string {val}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - println!("ASTUYVE span is {:?}", span); - trace.push(span); - } - traces.push(trace); - } - - - - - - // let value: Value = read_value(&mut reader).unwrap(); - // println!("ASTUYVE rest of value after two read_array_len is {:?}", value); - // let dict = &value.as_array().unwrap()[0]; - // println!("ASTUYVE string dict is {:?}", dict); - // let compressed_traces = &value.as_array().unwrap()[1]; - // println!("ASTUYVE compressed traces is {:?}", compressed_traces); - let payload = trace_utils::collect_trace_chunks( traces, &tracer_header_tags, From e95c8eb9a7350c4e7b33a8cc2d339bec6ac1c14e Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Wed, 26 Jun 2024 16:22:46 -0400 Subject: [PATCH 10/25] feat: Update w/ libdatadog to pass trace encoding version --- bottlecap/Cargo.lock | 62 +++++++++---------------- bottlecap/src/traces/trace_processor.rs | 3 ++ 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index b559e4c1a..1c57f301d 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -151,7 +151,7 @@ dependencies = [ "ddsketch-agent", "figment", "fnv", - "hashbrown 0.14.5", + "hashbrown", "hex", "hmac", "hyper 0.14.29", @@ -202,9 +202,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" -version = "1.0.100" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c891175c3fb232128f48de6590095e59198bbeb8620c310be349bfc3afd12c7b" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" [[package]] name = "cfg-if" @@ -282,7 +282,7 @@ dependencies = [ [[package]] name = "datadog-trace-normalization" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -291,7 +291,7 @@ dependencies = [ [[package]] name = "datadog-trace-obfuscation" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -308,7 +308,7 @@ dependencies = [ [[package]] name = "datadog-trace-protobuf" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" dependencies = [ "prost 0.11.9", "serde", @@ -318,7 +318,7 @@ dependencies = [ [[package]] name = "datadog-trace-utils" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" dependencies = [ "anyhow", "bytes", @@ -343,7 +343,7 @@ dependencies = [ [[package]] name = "ddcommon" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#bdc97be7bc4d3d2f05e8c6d02d77cdd4aa099ca6" +source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" dependencies = [ "anyhow", "futures", @@ -388,9 +388,9 @@ dependencies = [ [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "equivalent" @@ -595,19 +595,13 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.2.6", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" @@ -823,16 +817,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.2.6" @@ -840,7 +824,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown", ] [[package]] @@ -1090,7 +1074,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.2.6", + "indexmap", ] [[package]] @@ -1261,9 +1245,9 @@ dependencies = [ [[package]] name = "protobuf" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58678a64de2fced2bdec6bca052a6716a0efe692d6e3f53d1bda6a1def64cfc0" +checksum = "df67496db1a89596beaced1579212e9b7c53c22dca1d9745de00ead76573d514" dependencies = [ "bytes", "once_cell", @@ -1273,9 +1257,9 @@ dependencies = [ [[package]] name = "protobuf-codegen" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32777b0b3f6538d9d2e012b3fad85c7e4b9244b5958d04a6415f4333782b7a77" +checksum = "eab09155fad2d39333d3796f67845d43e29b266eea74f7bc93f153f707f126dc" dependencies = [ "anyhow", "once_cell", @@ -1288,12 +1272,12 @@ dependencies = [ [[package]] name = "protobuf-parse" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96cb37955261126624a25b5e6bda40ae34cf3989d52a783087ca6091b29b5642" +checksum = "1a16027030d4ec33e423385f73bb559821827e9ec18c50e7874e4d6de5a4e96f" dependencies = [ "anyhow", - "indexmap 1.9.3", + "indexmap", "log", "protobuf", "protobuf-support", @@ -1304,9 +1288,9 @@ dependencies = [ [[package]] name = "protobuf-support" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ed294a835b0f30810e13616b1cd34943c6d1e84a8f3b0dcfe466d256c3e7e7" +checksum = "70e2d30ab1878b2e72d1e2fc23ff5517799c9929e2cf81a8516f9f4dcf2b9cf3" dependencies = [ "thiserror", ] @@ -1793,7 +1777,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.2.6", + "indexmap", "itoa", "ryu", "serde", diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 5e455338e..ca73dc9aa 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; +use datadog_trace_utils::tracer_payload::TraceEncoding; use rmp; use async_trait::async_trait; @@ -102,6 +103,7 @@ impl TraceProcessor for ServerlessTraceProcessor { } }, true, // In mini agent, we always send agentless + TraceEncoding::V07 ); let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); @@ -178,6 +180,7 @@ impl TraceProcessor for ServerlessTraceProcessor { } }, true, // In mini agent, we always send agentless + TraceEncoding::V07 ); let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); From dcb19eb3446b4e1675a6321939c764e7d24c6b95 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 11:14:19 -0400 Subject: [PATCH 11/25] feat: update w/ merged libdatadog changes --- bottlecap/Cargo.lock | 14 ++++---- bottlecap/Cargo.toml | 10 +++--- bottlecap/src/traces/stats_flusher.rs | 43 ++++++++++++------------- bottlecap/src/traces/trace_processor.rs | 2 +- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 1c57f301d..cceb50c68 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -269,7 +269,7 @@ dependencies = [ [[package]] name = "datadog-protos" version = "0.1.0" -source = "git+https://github.com/DataDog/saluki/#08fab4086401e3e5e3f1ad57013761040908870d" +source = "git+https://github.com/DataDog/saluki/#ecebc9a69134b01e9e3f2a17ae271289fc67f02d" dependencies = [ "bytes", "prost 0.12.6", @@ -282,7 +282,7 @@ dependencies = [ [[package]] name = "datadog-trace-normalization" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" +source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -291,7 +291,7 @@ dependencies = [ [[package]] name = "datadog-trace-obfuscation" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" +source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -308,7 +308,7 @@ dependencies = [ [[package]] name = "datadog-trace-protobuf" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" +source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" dependencies = [ "prost 0.11.9", "serde", @@ -318,7 +318,7 @@ dependencies = [ [[package]] name = "datadog-trace-utils" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" +source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" dependencies = [ "anyhow", "bytes", @@ -343,7 +343,7 @@ dependencies = [ [[package]] name = "ddcommon" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog?branch=aj/bottlecap-mini-agent#5aebb909d3766a53e25251907ca26b2d09614ea7" +source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" dependencies = [ "anyhow", "futures", @@ -368,7 +368,7 @@ dependencies = [ [[package]] name = "ddsketch-agent" version = "0.1.0" -source = "git+https://github.com/DataDog/saluki/#08fab4086401e3e5e3f1ad57013761040908870d" +source = "git+https://github.com/DataDog/saluki/#ecebc9a69134b01e9e3f2a17ae271289fc67f02d" dependencies = [ "datadog-protos", "float_eq", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 2347fe830..f38f006ff 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -10,11 +10,11 @@ anyhow = "1.0" chrono = { version = "0.4.38", features = ["serde", "std", "now"], default-features = false} datadog-protos = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } -ddcommon = { version = "10.0", git = "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } -datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog", branch = "aj/bottlecap-mini-agent" } -datadog-trace-utils = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent"} -datadog-trace-normalization = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } -datadog-trace-obfuscation = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog", branch="aj/bottlecap-mini-agent" } +ddcommon = { version = "10.0", git = "https://github.com/DataDog/libdatadog" } +datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog" } +datadog-trace-utils = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } +datadog-trace-normalization = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } +datadog-trace-obfuscation = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } figment = { version = "0.10.15", default-features = false, features = ["yaml", "env"] } fnv = { version = "1.0.7", default-features = false } hashbrown = { version = "0.14.3", default-features = false, features = ["inline-more"] } diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index a10520ea6..a0ff215fe 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -8,8 +8,8 @@ use tokio::sync::{mpsc::Receiver, Mutex}; use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; - -use crate::traces::config::Config as TraceConfig; +use datadog_trace_utils::config_utils::trace_stats_url; +use crate::config; #[async_trait] pub trait StatsFlusher { @@ -17,27 +17,27 @@ pub trait StatsFlusher { /// implementing flushing logic that calls flush_stats. async fn start_stats_flusher( &self, - config: Arc, mut rx: Receiver, ); /// Flushes stats to the Datadog trace stats intake. - async fn flush_stats(&self, config: Arc, traces: Vec); + async fn flush_stats(&self, traces: Vec); + + async fn manual_flush(&self); } -#[derive(Clone, Copy)] -pub struct ServerlessStatsFlusher {} +#[derive(Clone)] +pub struct ServerlessStatsFlusher { + pub buffer: Arc>>, + pub config: Arc, +} #[async_trait] impl StatsFlusher for ServerlessStatsFlusher { async fn start_stats_flusher( &self, - config: Arc, mut rx: Receiver, ) { - let buffer: Arc>> = Arc::new(Mutex::new(Vec::new())); - - let buffer_producer = buffer.clone(); - let buffer_consumer = buffer.clone(); + let buffer_producer = self.buffer.clone(); tokio::spawn(async move { while let Some(stats_payload) = rx.recv().await { @@ -45,19 +45,16 @@ impl StatsFlusher for ServerlessStatsFlusher { buffer.push(stats_payload); } }); + } - loop { - tokio::time::sleep(time::Duration::from_secs(config.stats_flush_interval)).await; - - let mut buffer = buffer_consumer.lock().await; - if !buffer.is_empty() { - self.flush_stats(config.clone(), buffer.to_vec()).await; - buffer.clear(); - } + async fn manual_flush(&self) { + let mut buffer = self.buffer.lock().await; + if !buffer.is_empty() { + self.flush_stats(buffer.to_vec()).await; + buffer.clear(); } } - - async fn flush_stats(&self, config: Arc, stats: Vec) { + async fn flush_stats(&self, stats: Vec) { if stats.is_empty() { return; } @@ -77,8 +74,8 @@ impl StatsFlusher for ServerlessStatsFlusher { match stats_utils::send_stats_payload( serialized_stats_payload, - &config.trace_stats_intake, - config.trace_stats_intake.api_key.as_ref().unwrap(), + trace_stats_url(&self.config.site), + &self.config.api_key, ) .await { diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index ca73dc9aa..046fc2a54 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -70,7 +70,7 @@ impl TraceProcessor for ServerlessTraceProcessor { // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf // crate) - let (body_size, traces) = match trace_utils::v5_get_traces_from_request_body(body).await { + let (body_size, traces) = match trace_utils::get_v05_traces_from_request_body(body).await { Ok(res) => res, Err(err) => { return log_and_create_http_response( From d53eb852e26ec157a479c87dfee07cda5b2bce5b Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 13:09:54 -0400 Subject: [PATCH 12/25] feat: Refactor trace agent, reduce code duplication, enum for trace version. Pass trace provider. Manual stats flushing. Custom create endpoint until we clean up that code in libdatadog. --- bottlecap/src/bin/bottlecap/main.rs | 18 +++- bottlecap/src/tags/lambda/tags.rs | 5 + bottlecap/src/tags/provider.rs | 12 +++ bottlecap/src/traces/stats_flusher.rs | 13 ++- bottlecap/src/traces/trace_agent.rs | 20 +++- bottlecap/src/traces/trace_processor.rs | 132 ++++++------------------ 6 files changed, 88 insertions(+), 112 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 19c26adcc..3a0b089d0 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -38,8 +38,9 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - config as TraceConfig, stats_flusher, stats_processor, trace_agent, + config as TraceConfig, stats_processor, trace_agent, trace_flusher::{self, TraceFlusher}, + stats_flusher::{self, StatsFlusher}, trace_processor, }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, @@ -272,7 +273,10 @@ async fn extension_loop_active( }); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); - let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher {}); + let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { + buffer: Arc::new(TokioMutex::new(Vec::new())), + config: Arc::clone(config), + }); let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); let trace_config = match TraceConfig::Config::new() { @@ -284,13 +288,15 @@ async fn extension_loop_active( }; let trace_flusher_clone = trace_flusher.clone(); + let stats_flusher_clone = stats_flusher.clone(); let trace_agent = Box::new(trace_agent::TraceAgent { config: Arc::new(trace_config), trace_processor, trace_flusher: trace_flusher_clone, stats_processor, - stats_flusher, + stats_flusher: stats_flusher_clone, + tags_provider, }); tokio::spawn(async move { let res = trace_agent.start_trace_agent().await; @@ -404,7 +410,8 @@ async fn extension_loop_active( tokio::join!( logs_flusher.flush(), metrics_flusher.flush(), - trace_flusher.manual_flush() + trace_flusher.manual_flush(), + stats_flusher.manual_flush() ); break; } @@ -454,7 +461,8 @@ async fn extension_loop_active( tokio::join!( logs_flusher.flush(), metrics_flusher.flush(), - trace_flusher.manual_flush() + trace_flusher.manual_flush(), + stats_flusher.manual_flush() ); return Ok(()); } diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index 2739b6345..eaf820482 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -155,6 +155,11 @@ impl Lambda { pub fn get_function_arn(&self) -> Option<&String> { self.tags_map.get(FUNCTION_ARN_KEY) } + + #[must_use] + pub fn get_tags_map(&self) -> &hash_map::HashMap { + &self.tags_map + } } #[cfg(test)] diff --git a/bottlecap/src/tags/provider.rs b/bottlecap/src/tags/provider.rs index c1b15ed7a..c5ac6c6b6 100644 --- a/bottlecap/src/tags/provider.rs +++ b/bottlecap/src/tags/provider.rs @@ -46,11 +46,17 @@ impl Provider { pub fn get_canonical_id(&self) -> Option { self.tag_provider.get_canonical_id() } + + #[must_use] + pub fn get_tags_map(&self) -> &hash_map::HashMap { + self.tag_provider.get_tags_map() + } } trait GetTags { fn get_tags_vec(&self) -> Vec; fn get_canonical_id(&self) -> Option; + fn get_tags_map(&self) -> &hash_map::HashMap; } impl GetTags for TagProvider { @@ -65,6 +71,12 @@ impl GetTags for TagProvider { TagProvider::Lambda(lambda_tags) => lambda_tags.get_function_arn().cloned(), } } + + fn get_tags_map(&self) -> &hash_map::HashMap { + match self { + TagProvider::Lambda(lambda_tags) => lambda_tags.get_tags_map(), + } + } } #[cfg(test)] diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index a0ff215fe..90399942a 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -3,12 +3,14 @@ use async_trait::async_trait; use log::{debug, error, info}; -use std::{sync::Arc, time}; +use std::sync::Arc; +use std::str::FromStr; use tokio::sync::{mpsc::Receiver, Mutex}; use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; use datadog_trace_utils::config_utils::trace_stats_url; +use ddcommon::Endpoint; use crate::config; #[async_trait] @@ -72,9 +74,16 @@ impl StatsFlusher for ServerlessStatsFlusher { } }; + let stats_url = trace_stats_url(&self.config.site); + + let endpoint = Endpoint { + url: hyper::Uri::from_str(&stats_url).unwrap(), + api_key: Some(self.config.api_key.clone().into()), + }; + match stats_utils::send_stats_payload( serialized_stats_payload, - trace_stats_url(&self.config.site), + &endpoint, &self.config.api_key, ) .await diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 591fb4cfe..2d5cc4d04 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -15,6 +15,7 @@ use crate::traces::http_utils::log_and_create_http_response; use crate::traces::{ config as TraceConfig, stats_flusher, stats_processor, trace_flusher, trace_processor, }; +use crate::tags::provider; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; @@ -32,6 +33,13 @@ pub struct TraceAgent { pub trace_flusher: Arc, pub stats_processor: Arc, pub stats_flusher: Arc, + pub tags_provider: Arc, +} + +#[derive(Clone, Copy)] +pub enum ApiVersion { + V04, + V05, } impl TraceAgent { @@ -61,11 +69,11 @@ impl TraceAgent { // start our stats flusher. let stats_flusher = self.stats_flusher.clone(); - let stats_config = self.config.clone(); + // let stats_config = self.config.clone(); tokio::spawn(async move { let stats_flusher = stats_flusher.clone(); stats_flusher - .start_stats_flusher(stats_config, stats_rx) + .start_stats_flusher(stats_rx) .await; }); @@ -73,6 +81,7 @@ impl TraceAgent { let trace_processor = self.trace_processor.clone(); let stats_processor = self.stats_processor.clone(); let endpoint_config = self.config.clone(); + let tags_provider = self.tags_provider.clone(); let make_svc = make_service_fn(move |_| { let trace_processor = trace_processor.clone(); @@ -82,6 +91,7 @@ impl TraceAgent { let stats_tx = stats_tx.clone(); let endpoint_config = endpoint_config.clone(); + let tags_provider = tags_provider.clone(); let service = service_fn(move |req| { TraceAgent::trace_endpoint_handler( @@ -91,6 +101,7 @@ impl TraceAgent { trace_tx.clone(), stats_processor.clone(), stats_tx.clone(), + tags_provider.clone() ) }); @@ -124,10 +135,11 @@ impl TraceAgent { trace_tx: Sender, stats_processor: Arc, stats_tx: Sender, + tags_provider: Arc ) -> http::Result> { match (req.method(), req.uri().path()) { (&Method::PUT | &Method::POST, V4_TRACE_ENDPOINT_PATH) => { - match trace_processor.process_traces_v4(config, req, trace_tx).await { + match trace_processor.process_traces(config, req, trace_tx, tags_provider, ApiVersion::V04).await { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), @@ -136,7 +148,7 @@ impl TraceAgent { } } (&Method::PUT | &Method::POST, V5_TRACE_ENDPOINT_PATH) => { - match trace_processor.process_traces_v5(config, req, trace_tx).await { + match trace_processor.process_traces(config, req, trace_tx, tags_provider, ApiVersion::V05).await { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 046fc2a54..ee13fb7f1 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; +use crate::tags::provider; use datadog_trace_utils::tracer_payload::TraceEncoding; use rmp; @@ -23,22 +24,19 @@ use crate::traces::{ http_utils::{self, log_and_create_http_response}, }; +use super::trace_agent::ApiVersion; + #[async_trait] pub trait TraceProcessor { /// Deserializes traces from a hyper request body and sends them through the provided tokio mpsc /// Sender. - async fn process_traces_v4( - &self, - config: Arc, - req: Request, - tx: Sender, - ) -> http::Result>; - - async fn process_traces_v5( + async fn process_traces( &self, config: Arc, req: Request, tx: Sender, + tags_provider: Arc, + version: ApiVersion ) -> http::Result>; } @@ -47,11 +45,13 @@ pub struct ServerlessTraceProcessor {} #[async_trait] impl TraceProcessor for ServerlessTraceProcessor { - async fn process_traces_v5( + async fn process_traces( &self, config: Arc, req: Request, tx: Sender, + tags_provider: Arc, + version: ApiVersion, ) -> http::Result> { info!("Recieved traces to process"); let (parts, body) = req.into_parts(); @@ -59,103 +59,36 @@ impl TraceProcessor for ServerlessTraceProcessor { if let Some(response) = http_utils::verify_request_content_length( &parts.headers, config.max_request_content_length, - "Error processing traces and verifying length", + "Error processing traces", ) { return response; } - println!("astuyve no error in verifying request content length"); - let tracer_header_tags = (&parts.headers).into(); // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf // crate) - let (body_size, traces) = match trace_utils::get_v05_traces_from_request_body(body).await { - Ok(res) => res, - Err(err) => { - return log_and_create_http_response( - &format!("Error deserializing trace from request body: {err}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - }; - let payload = trace_utils::collect_trace_chunks( - traces, - &tracer_header_tags, - |chunk, root_span_index| { - trace_utils::set_serverless_root_span_tags( - &mut chunk.spans[root_span_index], - config.function_name.clone(), - &config.env_type, - ); - chunk.spans.retain(|span| { - return (span.name != "dns.lookup" && span.resource != "0.0.0.0") - || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); - }); - for span in chunk.spans.iter_mut() { - debug!("ASTUYVE span is {:?}", span); - // trace_utils::enrich_span_with_mini_agent_metadata(span, &mini_agent_metadata); - // trace_utils::enrich_span_with_azure_metadata( - // span, - // config.mini_agent_version.as_str(), - // ); - obfuscate_span(span, &config.obfuscation_config); + let (body_size, traces) = match version { + ApiVersion::V04 => match trace_utils::get_traces_from_request_body(body).await { + Ok(res) => res, + Err(err) => { + return log_and_create_http_response( + &format!("Error deserializing trace from request body: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); } }, - true, // In mini agent, we always send agentless - TraceEncoding::V07 - ); - - let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); - - // send trace payload to our trace flusher - match tx.send(send_data).await { - Ok(_) => { - return log_and_create_http_response( - "Successfully buffered traces to be flushed.", - StatusCode::ACCEPTED, - ); - } - Err(err) => { - return log_and_create_http_response( - &format!("Error sending traces to the trace flusher: {err}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - } - - async fn process_traces_v4( - &self, - config: Arc, - req: Request, - tx: Sender, - ) -> http::Result> { - info!("Recieved traces to process"); - let (parts, body) = req.into_parts(); - - if let Some(response) = http_utils::verify_request_content_length( - &parts.headers, - config.max_request_content_length, - "Error processing traces", - ) { - return response; - } - - let tracer_header_tags = (&parts.headers).into(); - - // deserialize traces from the request body, convert to protobuf structs (see trace-protobuf - // crate) - let (body_size, traces) = match trace_utils::get_traces_from_request_body(body).await { - Ok(res) => res, - Err(err) => { - return log_and_create_http_response( - &format!("Error deserializing trace from request body: {err}"), - StatusCode::INTERNAL_SERVER_ERROR, - ); + ApiVersion::V05 => match trace_utils::get_v05_traces_from_request_body(body).await { + Ok(res) => res, + Err(err) => { + return log_and_create_http_response( + &format!("Error deserializing trace from request body: {err}"), + StatusCode::INTERNAL_SERVER_ERROR, + ); + } } }; - + let payload = trace_utils::collect_trace_chunks( traces, &tracer_header_tags, @@ -170,16 +103,13 @@ impl TraceProcessor for ServerlessTraceProcessor { || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); }); for span in chunk.spans.iter_mut() { - debug!("ASTUYVE span is {:?}", span); - // trace_utils::enrich_span_with_mini_agent_metadata(span, &mini_agent_metadata); - // trace_utils::enrich_span_with_azure_metadata( - // span, - // config.mini_agent_version.as_str(), - // ); + tags_provider.get_tags_map().iter().for_each(|(k, v)| { + span.meta.insert(k.clone(), v.clone()); + }); obfuscate_span(span, &config.obfuscation_config); } }, - true, // In mini agent, we always send agentless + true, TraceEncoding::V07 ); From 7171b61dcd139f04864b3c52161f78b362a98861 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 15:58:16 -0400 Subject: [PATCH 13/25] feat: Unify config, remove trace config. Tests pass --- bottlecap/src/bin/bottlecap/main.rs | 19 +- bottlecap/src/traces/config.rs | 212 ------------------- bottlecap/src/traces/mod.rs | 1 - bottlecap/src/traces/stats_processor.rs | 7 +- bottlecap/src/traces/trace_agent.rs | 12 +- bottlecap/src/traces/trace_processor.rs | 266 ++++++++++++------------ 6 files changed, 154 insertions(+), 363 deletions(-) delete mode 100644 bottlecap/src/traces/config.rs diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 3a0b089d0..db74f75ee 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -38,7 +38,7 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ - config as TraceConfig, stats_processor, trace_agent, + stats_processor, trace_agent, trace_flusher::{self, TraceFlusher}, stats_flusher::{self, StatsFlusher}, trace_processor, @@ -47,6 +47,7 @@ use bottlecap::{ EXTENSION_ID_HEADER, EXTENSION_NAME, EXTENSION_NAME_HEADER, EXTENSION_ROUTE, LAMBDA_RUNTIME_SLUG, TELEMETRY_PORT, }; +use datadog_trace_obfuscation::obfuscation_config; use decrypt::resolve_secrets; use std::{ collections::hash_map, @@ -271,7 +272,11 @@ async fn extension_loop_active( let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())), }); - let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor {}); + let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { + obfuscation_config: Arc::new(obfuscation_config::ObfuscationConfig::new().map_err( + |e| Error::new(std::io::ErrorKind::InvalidData, e.to_string()), + )?), + }); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())), @@ -279,19 +284,11 @@ async fn extension_loop_active( }); let stats_processor = Arc::new(stats_processor::ServerlessStatsProcessor {}); - let trace_config = match TraceConfig::Config::new() { - Ok(config) => config, - Err(e) => { - error!("Error loading trace config: {e:?}"); - panic!("{e}"); - } - }; - let trace_flusher_clone = trace_flusher.clone(); let stats_flusher_clone = stats_flusher.clone(); let trace_agent = Box::new(trace_agent::TraceAgent { - config: Arc::new(trace_config), + config: Arc::clone(config), trace_processor, trace_flusher: trace_flusher_clone, stats_processor, diff --git a/bottlecap/src/traces/config.rs b/bottlecap/src/traces/config.rs deleted file mode 100644 index b5b8601fc..000000000 --- a/bottlecap/src/traces/config.rs +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -use ddcommon::Endpoint; -use std::borrow::Cow; -use std::env; -use std::str::FromStr; - -use datadog_trace_obfuscation::obfuscation_config; -use datadog_trace_utils::config_utils::{ - read_cloud_env, trace_intake_url, trace_intake_url_prefixed, trace_stats_url, - trace_stats_url_prefixed, -}; -use datadog_trace_utils::trace_utils; - -#[derive(Debug)] -pub struct Config { - pub dd_site: String, - pub env_type: trace_utils::EnvironmentType, - pub function_name: Option, - pub max_request_content_length: usize, - pub trace_agent_version: String, - pub obfuscation_config: obfuscation_config::ObfuscationConfig, - pub os: String, - /// how often to flush stats, in seconds - pub stats_flush_interval: u64, - /// how often to flush traces, in seconds - pub trace_flush_interval: u64, - pub trace_intake: Endpoint, - pub trace_stats_intake: Endpoint, - /// timeout for environment verification, in milliseconds - pub verify_env_timeout: u64, -} - -impl Config { - pub fn new() -> Result> { - let api_key: Cow = env::var("DD_API_KEY") - .map_err(|_| anyhow::anyhow!("DD_API_KEY environment variable is not set"))? - .into(); - - let (function_name, env_type) = read_cloud_env().ok_or_else(|| { - anyhow::anyhow!("Unable to identify environment. Shutting down Trace Agent.") - })?; - - let dd_site = env::var("DD_SITE").unwrap_or_else(|_| "datadoghq.com".to_string()); - - // construct the trace & trace stats intake urls based on DD_SITE env var (to flush traces & - // trace stats to) - let mut trace_intake_url = trace_intake_url(&dd_site); - let mut trace_stats_intake_url = trace_stats_url(&dd_site); - - // DD_APM_DD_URL env var will primarily be used for integration tests - // overrides the entire trace/trace stats intake url prefix - if let Ok(endpoint_prefix) = env::var("DD_APM_DD_URL") { - trace_intake_url = trace_intake_url_prefixed(&endpoint_prefix); - trace_stats_intake_url = trace_stats_url_prefixed(&endpoint_prefix); - }; - - let obfuscation_config = obfuscation_config::ObfuscationConfig::new().map_err(|err| { - anyhow::anyhow!( - "Error creating obfuscation config, Trace Agent will not start. Error: {err}", - ) - })?; - - let trace_agent_version: String = env!("CARGO_PKG_VERSION").to_string(); - - Ok(Config { - function_name: Some(function_name), - env_type, - os: env::consts::OS.to_string(), - max_request_content_length: 10 * 1024 * 1024, // 10MB in Bytes - trace_flush_interval: 3, - stats_flush_interval: 3, - verify_env_timeout: 100, - dd_site, - trace_intake: Endpoint { - url: hyper::Uri::from_str(&trace_intake_url).unwrap(), - api_key: Some(api_key.clone()), - }, - trace_stats_intake: Endpoint { - url: hyper::Uri::from_str(&trace_stats_intake_url).unwrap(), - api_key: Some(api_key), - }, - obfuscation_config, - trace_agent_version, - }) - } -} - -#[cfg(test)] -mod tests { - use duplicate::duplicate_item; - use serial_test::serial; - use std::env; - - use crate::config; - - #[test] - #[serial] - fn test_error_if_unable_to_identify_env() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - - let config = config::Config::new(); - assert!(config.is_err()); - assert_eq!( - config.unwrap_err().to_string(), - "Unable to identify environment. Shutting down Mini Agent." - ); - env::remove_var("DD_API_KEY"); - } - - #[test] - #[serial] - fn test_error_if_no_api_key_env_var() { - let config = config::Config::new(); - assert!(config.is_err()); - assert_eq!( - config.unwrap_err().to_string(), - "DD_API_KEY environment variable is not set" - ); - } - - #[test] - #[serial] - fn test_default_trace_and_trace_stats_urls() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.trace_intake.url, - "https://trace.agent.datadoghq.com/api/v0.2/traces" - ); - assert_eq!( - config.trace_stats_intake.url, - "https://trace.agent.datadoghq.com/api/v0.2/stats" - ); - env::remove_var("DD_API_KEY"); - env::remove_var("K_SERVICE"); - } - - #[duplicate_item( - test_name dd_site expected_url; - [test_us1_trace_intake_url] ["datadoghq.com"] ["https://trace.agent.datadoghq.com/api/v0.2/traces"]; - [test_us3_trace_intake_url] ["us3.datadoghq.com"] ["https://trace.agent.us3.datadoghq.com/api/v0.2/traces"]; - [test_us5_trace_intake_url] ["us5.datadoghq.com"] ["https://trace.agent.us5.datadoghq.com/api/v0.2/traces"]; - [test_eu_trace_intake_url] ["datadoghq.eu"] ["https://trace.agent.datadoghq.eu/api/v0.2/traces"]; - [test_ap1_trace_intake_url] ["ap1.datadoghq.com"] ["https://trace.agent.ap1.datadoghq.com/api/v0.2/traces"]; - [test_gov_trace_intake_url] ["ddog-gov.com"] ["https://trace.agent.ddog-gov.com/api/v0.2/traces"]; - )] - #[test] - #[serial] - fn test_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_SITE", dd_site); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.trace_intake.url, expected_url); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_SITE"); - env::remove_var("K_SERVICE"); - } - - #[duplicate_item( - test_name dd_site expected_url; - [test_us1_trace_stats_intake_url] ["datadoghq.com"] ["https://trace.agent.datadoghq.com/api/v0.2/stats"]; - [test_us3_trace_stats_intake_url] ["us3.datadoghq.com"] ["https://trace.agent.us3.datadoghq.com/api/v0.2/stats"]; - [test_us5_trace_stats_intake_url] ["us5.datadoghq.com"] ["https://trace.agent.us5.datadoghq.com/api/v0.2/stats"]; - [test_eu_trace_stats_intake_url] ["datadoghq.eu"] ["https://trace.agent.datadoghq.eu/api/v0.2/stats"]; - [test_ap1_trace_stats_intake_url] ["ap1.datadoghq.com"] ["https://trace.agent.ap1.datadoghq.com/api/v0.2/stats"]; - [test_gov_trace_stats_intake_url] ["ddog-gov.com"] ["https://trace.agent.ddog-gov.com/api/v0.2/stats"]; - )] - #[test] - #[serial] - fn test_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_SITE", dd_site); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.trace_stats_intake.url, expected_url); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_SITE"); - env::remove_var("K_SERVICE"); - } - - #[test] - #[serial] - fn test_set_custom_trace_and_trace_stats_intake_url() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_APM_DD_URL", "http://127.0.0.1:3333"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.trace_intake.url, - "http://127.0.0.1:3333/api/v0.2/traces" - ); - assert_eq!( - config.trace_stats_intake.url, - "http://127.0.0.1:3333/api/v0.2/stats" - ); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_APM_DD_URL"); - env::remove_var("K_SERVICE"); - } -} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index dd6b476eb..479303f28 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,7 +1,6 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -pub mod config; pub mod http_utils; pub mod stats_flusher; pub mod stats_processor; diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index ea48d6462..d5c02ec60 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -1,7 +1,6 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use async_trait::async_trait; @@ -12,8 +11,8 @@ use tokio::sync::mpsc::Sender; use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; -use crate::traces::config::Config as TraceConfig; use crate::traces::http_utils::{self, log_and_create_http_response}; +use super::trace_agent::MAX_CONTENT_LENGTH; #[async_trait] pub trait StatsProcessor { @@ -21,7 +20,6 @@ pub trait StatsProcessor { /// the provided tokio mpsc Sender. async fn process_stats( &self, - config: Arc, req: Request, tx: Sender, ) -> http::Result>; @@ -34,7 +32,6 @@ pub struct ServerlessStatsProcessor {} impl StatsProcessor for ServerlessStatsProcessor { async fn process_stats( &self, - config: Arc, req: Request, tx: Sender, ) -> http::Result> { @@ -43,7 +40,7 @@ impl StatsProcessor for ServerlessStatsProcessor { if let Some(response) = http_utils::verify_request_content_length( &parts.headers, - config.max_request_content_length, + MAX_CONTENT_LENGTH, "Error processing trace stats", ) { return response; diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 2d5cc4d04..9996a3049 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -12,10 +12,9 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; use tracing::{debug, error, info}; use crate::traces::http_utils::log_and_create_http_response; -use crate::traces::{ - config as TraceConfig, stats_flusher, stats_processor, trace_flusher, trace_processor, -}; +use crate::traces::{stats_flusher, stats_processor, trace_flusher, trace_processor}; use crate::tags::provider; +use crate::config; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; @@ -26,9 +25,10 @@ const STATS_ENDPOINT_PATH: &str = "/v0.6/stats"; const INFO_ENDPOINT_PATH: &str = "/info"; const TRACER_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; const STATS_PAYLOAD_CHANNEL_BUFFER_SIZE: usize = 10; +pub const MAX_CONTENT_LENGTH: usize = 10 * 1024 * 1024; pub struct TraceAgent { - pub config: Arc, + pub config: Arc, pub trace_processor: Arc, pub trace_flusher: Arc, pub stats_processor: Arc, @@ -129,7 +129,7 @@ impl TraceAgent { } async fn trace_endpoint_handler( - config: Arc, + config: Arc, req: Request, trace_processor: Arc, trace_tx: Sender, @@ -157,7 +157,7 @@ impl TraceAgent { } } (&Method::PUT | &Method::POST, STATS_ENDPOINT_PATH) => { - match stats_processor.process_stats(config, req, stats_tx).await { + match stats_processor.process_stats(req, stats_tx).await { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing trace stats: {err}"), diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index ee13fb7f1..d95dfa2b9 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -2,29 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; +use std::str::FromStr; +use ddcommon::Endpoint; use crate::tags::provider; +use datadog_trace_obfuscation::obfuscation_config; +use datadog_trace_utils::config_utils::trace_intake_url; use datadog_trace_utils::tracer_payload::TraceEncoding; -use rmp; use async_trait::async_trait; -use hyper::{http, Body, Request, Response, body::Buf, StatusCode}; -use rmpv::decode::read_value; -use rmpv::Value; -use rmp::decode::read_str_len; +use hyper::{http, Body, Request, Response, StatusCode}; use tokio::sync::mpsc::Sender; -use tracing::{debug, info}; +use tracing::info; use datadog_trace_obfuscation::obfuscate::obfuscate_span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; -use datadog_trace_protobuf::pb::{self, Span, TraceChunk}; +use crate::config; +use crate::traces::http_utils::{self, log_and_create_http_response}; -use crate::traces::{ - config::Config as TraceConfig, - http_utils::{self, log_and_create_http_response}, -}; - -use super::trace_agent::ApiVersion; +use super::trace_agent::{ApiVersion, MAX_CONTENT_LENGTH}; #[async_trait] pub trait TraceProcessor { @@ -32,7 +28,7 @@ pub trait TraceProcessor { /// Sender. async fn process_traces( &self, - config: Arc, + config: Arc, req: Request, tx: Sender, tags_provider: Arc, @@ -40,14 +36,16 @@ pub trait TraceProcessor { ) -> http::Result>; } -#[derive(Clone, Copy)] -pub struct ServerlessTraceProcessor {} +#[derive(Clone)] +pub struct ServerlessTraceProcessor { + pub obfuscation_config: Arc, +} #[async_trait] impl TraceProcessor for ServerlessTraceProcessor { async fn process_traces( &self, - config: Arc, + config: Arc, req: Request, tx: Sender, tags_provider: Arc, @@ -58,7 +56,7 @@ impl TraceProcessor for ServerlessTraceProcessor { if let Some(response) = http_utils::verify_request_content_length( &parts.headers, - config.max_request_content_length, + MAX_CONTENT_LENGTH, "Error processing traces", ) { return response; @@ -92,12 +90,12 @@ impl TraceProcessor for ServerlessTraceProcessor { let payload = trace_utils::collect_trace_chunks( traces, &tracer_header_tags, - |chunk, root_span_index| { - trace_utils::set_serverless_root_span_tags( - &mut chunk.spans[root_span_index], - config.function_name.clone(), - &config.env_type, - ); + |chunk, _root_span_index| { + // trace_utils::set_serverless_root_span_tags( + // &mut chunk.spans[root_span_index], + // config.function_name.clone(), + // &config.env_type, + // ); chunk.spans.retain(|span| { return (span.name != "dns.lookup" && span.resource != "0.0.0.0") || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); @@ -106,14 +104,22 @@ impl TraceProcessor for ServerlessTraceProcessor { tags_provider.get_tags_map().iter().for_each(|(k, v)| { span.meta.insert(k.clone(), v.clone()); }); - obfuscate_span(span, &config.obfuscation_config); + // TODO(astuyve) generalize this and delegate to an enum + span.meta.insert("origin".to_string(), "lambda".to_string()); + span.meta.insert("_dd.origin".to_string(), "lambda".to_string()); + obfuscate_span(span, &self.obfuscation_config); } }, true, TraceEncoding::V07 ); + let intake_url = trace_intake_url(&config.site); + let endpoint = Endpoint { + url: hyper::Uri::from_str(&intake_url).unwrap(), + api_key: Some(config.api_key.clone().into()), + }; - let send_data = SendData::new(body_size, payload, tracer_header_tags, &config.trace_intake); + let send_data = SendData::new(body_size, payload, tracer_header_tags, &endpoint); // send trace payload to our trace flusher match tx.send(send_data).await { @@ -143,17 +149,17 @@ mod tests { time::{SystemTime, UNIX_EPOCH}, }; use tokio::sync::mpsc::{self, Receiver, Sender}; + use serde_json::json; - use crate::traces::{ - config::Config, - trace_processor::{self, TraceProcessor}, - }; + use crate::traces::trace_processor::{self, TraceProcessor}; + use crate::config::Config; + use crate::LAMBDA_RUNTIME_SLUG; + use crate::tags::provider::Provider; use datadog_trace_protobuf::pb; use datadog_trace_utils::{ - test_utils::{create_test_json_span, create_test_span}, trace_utils, + tracer_payload::TracerPayloadCollection }; - use ddcommon::Endpoint; fn get_current_timestamp_nanos() -> i64 { SystemTime::now() @@ -162,29 +168,93 @@ mod tests { .as_nanos() as i64 } - fn create_test_config() -> Config { - Config { - function_name: Some("dummy_function_name".to_string()), - max_request_content_length: 10 * 1024 * 1024, - trace_flush_interval: 3, - stats_flush_interval: 3, - verify_env_timeout: 100, - trace_intake: Endpoint { - url: hyper::Uri::from_static("https://trace.agent.notdog.com/traces"), - api_key: Some("dummy_api_key".into()), - }, - trace_stats_intake: Endpoint { - url: hyper::Uri::from_static("https://trace.agent.notdog.com/stats"), - api_key: Some("dummy_api_key".into()), - }, - dd_site: "datadoghq.com".to_string(), - env_type: trace_utils::EnvironmentType::CloudFunction, - os: "linux".to_string(), - obfuscation_config: ObfuscationConfig::new().unwrap(), - mini_agent_version: "0.1.0".to_string(), + fn create_test_config() -> Arc { + let config = Arc::new(Config { + service: Some("test-service".to_string()), + tags: Some("test:tag,env:test".to_string()), + ..Config::default() + }); + config + } + + fn create_tags_provider(config: Arc) -> Arc { + let mut metadata = HashMap::new(); + metadata.insert( + "function_arn".to_string(), + "arn:aws:lambda:us-west-2:123456789012:function:my-function".to_string(), + ); + let provider = Provider::new(config, LAMBDA_RUNTIME_SLUG.to_string(), &metadata); + Arc::new(provider) + } + fn create_test_span( + trace_id: u64, + span_id: u64, + parent_id: u64, + start: i64, + is_top_level: bool, + tags_provider: Arc, + ) -> pb::Span { + let mut meta: HashMap = tags_provider.get_tags_map().clone(); + meta.insert("runtime-id".to_string(), "test-runtime-id-value".to_string()); + + let mut span = pb::Span { + trace_id, + span_id, + service: "test-service".to_string(), + name: "test_name".to_string(), + resource: "test-resource".to_string(), + parent_id, + start, + duration: 5, + error: 0, + meta: meta.clone(), + metrics: HashMap::new(), + r#type: "".to_string(), + meta_struct: HashMap::new(), + span_links: vec![], + }; + if is_top_level { + span.metrics.insert("_top_level".to_string(), 1.0); + span.meta + .insert("_dd.origin".to_string(), "lambda".to_string()); + span.meta + .insert("origin".to_string(), "lambda".to_string()); + span.meta.insert( + "functionname".to_string(), + "my-function".to_string(), + ); + span.r#type = "".to_string(); } + span } + fn create_test_json_span( + trace_id: u64, + span_id: u64, + parent_id: u64, + start: i64, + ) -> serde_json::Value { + json!( + { + "trace_id": trace_id, + "span_id": span_id, + "service": "test-service", + "name": "test_name", + "resource": "test-resource", + "parent_id": parent_id, + "start": start, + "duration": 5, + "error": 0, + "meta": { + "service": "test-service", + "env": "test-env", + "runtime-id": "test-runtime-id-value", + }, + "metrics": {}, + "meta_struct": {}, + } + ) + } #[tokio::test] #[cfg_attr(miri, ignore)] async fn test_process_trace() { @@ -208,12 +278,18 @@ mod tests { .body(hyper::body::Body::from(bytes)) .unwrap(); - let trace_processor = trace_processor::ServerlessTraceProcessor {}; + let trace_processor = trace_processor::ServerlessTraceProcessor { + obfuscation_config: Arc::new(ObfuscationConfig::new().unwrap()) + }; + let config = create_test_config(); + let tags_provider = create_tags_provider(config.clone()); let res = trace_processor .process_traces( + config, request, tx, - Arc::new(trace_utils::MiniAgentMetadata::default()), + tags_provider.clone(), + crate::traces::trace_agent::ApiVersion::V04, ) .await; assert!(res.is_ok()); @@ -231,7 +307,7 @@ mod tests { chunks: vec![pb::TraceChunk { priority: i8::MIN as i32, origin: "".to_string(), - spans: vec![create_test_span(11, 222, 333, start, true)], + spans: vec![create_test_span(11, 222, 333, start, true, tags_provider)], tags: HashMap::new(), dropped_trace: false, }], @@ -241,79 +317,13 @@ mod tests { app_version: "".to_string(), }; - assert_eq!( - expected_tracer_payload, - tracer_payload.unwrap().get_payloads()[0] - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_process_trace_top_level_span_set() { - let (tx, mut rx): ( - Sender, - Receiver, - ) = mpsc::channel(1); - - let start = get_current_timestamp_nanos(); + let received_payload = + if let TracerPayloadCollection::V07(payload) = tracer_payload.unwrap().get_payloads() { + Some(payload[0].clone()) + } else { + None + }; - let json_trace = vec![ - create_test_json_span(11, 333, 222, start), - create_test_json_span(11, 222, 0, start), - create_test_json_span(11, 444, 333, start), - ]; - - let bytes = rmp_serde::to_vec(&vec![json_trace]).unwrap(); - let request = Request::builder() - .header("datadog-meta-tracer-version", "4.0.0") - .header("datadog-meta-lang", "nodejs") - .header("datadog-meta-lang-version", "v19.7.0") - .header("datadog-meta-lang-interpreter", "v8") - .header("datadog-container-id", "33") - .header("content-length", "100") - .body(hyper::body::Body::from(bytes)) - .unwrap(); - - let trace_processor = trace_processor::ServerlessTraceProcessor {}; - let res = trace_processor - .process_traces( - Arc::new(create_test_config()), - request, - tx, - Arc::new(trace_utils::MiniAgentMetadata::default()), - ) - .await; - assert!(res.is_ok()); - - let tracer_payload = rx.recv().await; - - assert!(tracer_payload.is_some()); - - let expected_tracer_payload = pb::TracerPayload { - container_id: "33".to_string(), - language_name: "nodejs".to_string(), - language_version: "v19.7.0".to_string(), - tracer_version: "4.0.0".to_string(), - runtime_id: "test-runtime-id-value".to_string(), - chunks: vec![pb::TraceChunk { - priority: i8::MIN as i32, - origin: "".to_string(), - spans: vec![ - create_test_span(11, 333, 222, start, false), - create_test_span(11, 222, 0, start, true), - create_test_span(11, 444, 333, start, false), - ], - tags: HashMap::new(), - dropped_trace: false, - }], - tags: HashMap::new(), - env: "test-env".to_string(), - hostname: "".to_string(), - app_version: "".to_string(), - }; - assert_eq!( - expected_tracer_payload, - tracer_payload.unwrap().get_payloads()[0] - ); + assert_eq!(expected_tracer_payload, received_payload.unwrap()); } } From ed76cf3e66d8c57712b6619961dce5c8267dbba8 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 15:58:31 -0400 Subject: [PATCH 14/25] feat: fmt --- bottlecap/src/bin/bottlecap/main.rs | 9 ++--- bottlecap/src/tags/provider.rs | 4 +-- bottlecap/src/traces/stats_flusher.rs | 22 +++++------- bottlecap/src/traces/stats_processor.rs | 2 +- bottlecap/src/traces/trace_agent.rs | 22 +++++++----- bottlecap/src/traces/trace_processor.rs | 46 ++++++++++++------------- 6 files changed, 51 insertions(+), 54 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index db74f75ee..4657e2f93 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -38,9 +38,9 @@ use bottlecap::{ listener::TelemetryListener, }, traces::{ + stats_flusher::{self, StatsFlusher}, stats_processor, trace_agent, trace_flusher::{self, TraceFlusher}, - stats_flusher::{self, StatsFlusher}, trace_processor, }, DOGSTATSD_PORT, EXTENSION_ACCEPT_FEATURE_HEADER, EXTENSION_FEATURES, EXTENSION_HOST, @@ -273,9 +273,10 @@ async fn extension_loop_active( buffer: Arc::new(TokioMutex::new(Vec::new())), }); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { - obfuscation_config: Arc::new(obfuscation_config::ObfuscationConfig::new().map_err( - |e| Error::new(std::io::ErrorKind::InvalidData, e.to_string()), - )?), + obfuscation_config: Arc::new( + obfuscation_config::ObfuscationConfig::new() + .map_err(|e| Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?, + ), }); let stats_flusher = Arc::new(stats_flusher::ServerlessStatsFlusher { diff --git a/bottlecap/src/tags/provider.rs b/bottlecap/src/tags/provider.rs index c5ac6c6b6..a3a6881df 100644 --- a/bottlecap/src/tags/provider.rs +++ b/bottlecap/src/tags/provider.rs @@ -46,7 +46,7 @@ impl Provider { pub fn get_canonical_id(&self) -> Option { self.tag_provider.get_canonical_id() } - + #[must_use] pub fn get_tags_map(&self) -> &hash_map::HashMap { self.tag_provider.get_tags_map() @@ -71,7 +71,7 @@ impl GetTags for TagProvider { TagProvider::Lambda(lambda_tags) => lambda_tags.get_function_arn().cloned(), } } - + fn get_tags_map(&self) -> &hash_map::HashMap { match self { TagProvider::Lambda(lambda_tags) => lambda_tags.get_tags_map(), diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 90399942a..a0783ca83 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -3,24 +3,21 @@ use async_trait::async_trait; use log::{debug, error, info}; -use std::sync::Arc; use std::str::FromStr; +use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; +use crate::config; use datadog_trace_protobuf::pb; -use datadog_trace_utils::stats_utils; use datadog_trace_utils::config_utils::trace_stats_url; +use datadog_trace_utils::stats_utils; use ddcommon::Endpoint; -use crate::config; #[async_trait] pub trait StatsFlusher { /// Starts a stats flusher that listens for stats payloads sent to the tokio mpsc Receiver, /// implementing flushing logic that calls flush_stats. - async fn start_stats_flusher( - &self, - mut rx: Receiver, - ); + async fn start_stats_flusher(&self, mut rx: Receiver); /// Flushes stats to the Datadog trace stats intake. async fn flush_stats(&self, traces: Vec); @@ -35,10 +32,7 @@ pub struct ServerlessStatsFlusher { #[async_trait] impl StatsFlusher for ServerlessStatsFlusher { - async fn start_stats_flusher( - &self, - mut rx: Receiver, - ) { + async fn start_stats_flusher(&self, mut rx: Receiver) { let buffer_producer = self.buffer.clone(); tokio::spawn(async move { @@ -77,9 +71,9 @@ impl StatsFlusher for ServerlessStatsFlusher { let stats_url = trace_stats_url(&self.config.site); let endpoint = Endpoint { - url: hyper::Uri::from_str(&stats_url).unwrap(), - api_key: Some(self.config.api_key.clone().into()), - }; + url: hyper::Uri::from_str(&stats_url).unwrap(), + api_key: Some(self.config.api_key.clone().into()), + }; match stats_utils::send_stats_payload( serialized_stats_payload, diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index d5c02ec60..94ee05c87 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -11,8 +11,8 @@ use tokio::sync::mpsc::Sender; use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; -use crate::traces::http_utils::{self, log_and_create_http_response}; use super::trace_agent::MAX_CONTENT_LENGTH; +use crate::traces::http_utils::{self, log_and_create_http_response}; #[async_trait] pub trait StatsProcessor { diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 9996a3049..fdba8c109 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -11,10 +11,10 @@ use std::time::Instant; use tokio::sync::mpsc::{self, Receiver, Sender}; use tracing::{debug, error, info}; +use crate::config; +use crate::tags::provider; use crate::traces::http_utils::log_and_create_http_response; use crate::traces::{stats_flusher, stats_processor, trace_flusher, trace_processor}; -use crate::tags::provider; -use crate::config; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; @@ -72,9 +72,7 @@ impl TraceAgent { // let stats_config = self.config.clone(); tokio::spawn(async move { let stats_flusher = stats_flusher.clone(); - stats_flusher - .start_stats_flusher(stats_rx) - .await; + stats_flusher.start_stats_flusher(stats_rx).await; }); // setup our hyper http server, where the endpoint_handler handles incoming requests @@ -101,7 +99,7 @@ impl TraceAgent { trace_tx.clone(), stats_processor.clone(), stats_tx.clone(), - tags_provider.clone() + tags_provider.clone(), ) }); @@ -135,11 +133,14 @@ impl TraceAgent { trace_tx: Sender, stats_processor: Arc, stats_tx: Sender, - tags_provider: Arc + tags_provider: Arc, ) -> http::Result> { match (req.method(), req.uri().path()) { (&Method::PUT | &Method::POST, V4_TRACE_ENDPOINT_PATH) => { - match trace_processor.process_traces(config, req, trace_tx, tags_provider, ApiVersion::V04).await { + match trace_processor + .process_traces(config, req, trace_tx, tags_provider, ApiVersion::V04) + .await + { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), @@ -148,7 +149,10 @@ impl TraceAgent { } } (&Method::PUT | &Method::POST, V5_TRACE_ENDPOINT_PATH) => { - match trace_processor.process_traces(config, req, trace_tx, tags_provider, ApiVersion::V05).await { + match trace_processor + .process_traces(config, req, trace_tx, tags_provider, ApiVersion::V05) + .await + { Ok(res) => Ok(res), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index d95dfa2b9..c9738b91c 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -1,24 +1,24 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; -use std::str::FromStr; -use ddcommon::Endpoint; use crate::tags::provider; use datadog_trace_obfuscation::obfuscation_config; use datadog_trace_utils::config_utils::trace_intake_url; use datadog_trace_utils::tracer_payload::TraceEncoding; +use ddcommon::Endpoint; +use std::str::FromStr; +use std::sync::Arc; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; use tokio::sync::mpsc::Sender; use tracing::info; +use crate::config; +use crate::traces::http_utils::{self, log_and_create_http_response}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; -use crate::config; -use crate::traces::http_utils::{self, log_and_create_http_response}; use super::trace_agent::{ApiVersion, MAX_CONTENT_LENGTH}; @@ -32,7 +32,7 @@ pub trait TraceProcessor { req: Request, tx: Sender, tags_provider: Arc, - version: ApiVersion + version: ApiVersion, ) -> http::Result>; } @@ -84,9 +84,9 @@ impl TraceProcessor for ServerlessTraceProcessor { StatusCode::INTERNAL_SERVER_ERROR, ); } - } + }, }; - + let payload = trace_utils::collect_trace_chunks( traces, &tracer_header_tags, @@ -106,12 +106,13 @@ impl TraceProcessor for ServerlessTraceProcessor { }); // TODO(astuyve) generalize this and delegate to an enum span.meta.insert("origin".to_string(), "lambda".to_string()); - span.meta.insert("_dd.origin".to_string(), "lambda".to_string()); + span.meta + .insert("_dd.origin".to_string(), "lambda".to_string()); obfuscate_span(span, &self.obfuscation_config); } }, true, - TraceEncoding::V07 + TraceEncoding::V07, ); let intake_url = trace_intake_url(&config.site); let endpoint = Endpoint { @@ -143,23 +144,20 @@ impl TraceProcessor for ServerlessTraceProcessor { mod tests { use datadog_trace_obfuscation::obfuscation_config::ObfuscationConfig; use hyper::Request; + use serde_json::json; use std::{ collections::HashMap, sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; use tokio::sync::mpsc::{self, Receiver, Sender}; - use serde_json::json; - use crate::traces::trace_processor::{self, TraceProcessor}; use crate::config::Config; - use crate::LAMBDA_RUNTIME_SLUG; use crate::tags::provider::Provider; + use crate::traces::trace_processor::{self, TraceProcessor}; + use crate::LAMBDA_RUNTIME_SLUG; use datadog_trace_protobuf::pb; - use datadog_trace_utils::{ - trace_utils, - tracer_payload::TracerPayloadCollection - }; + use datadog_trace_utils::{trace_utils, tracer_payload::TracerPayloadCollection}; fn get_current_timestamp_nanos() -> i64 { SystemTime::now() @@ -195,7 +193,10 @@ mod tests { tags_provider: Arc, ) -> pb::Span { let mut meta: HashMap = tags_provider.get_tags_map().clone(); - meta.insert("runtime-id".to_string(), "test-runtime-id-value".to_string()); + meta.insert( + "runtime-id".to_string(), + "test-runtime-id-value".to_string(), + ); let mut span = pb::Span { trace_id, @@ -217,12 +218,9 @@ mod tests { span.metrics.insert("_top_level".to_string(), 1.0); span.meta .insert("_dd.origin".to_string(), "lambda".to_string()); + span.meta.insert("origin".to_string(), "lambda".to_string()); span.meta - .insert("origin".to_string(), "lambda".to_string()); - span.meta.insert( - "functionname".to_string(), - "my-function".to_string(), - ); + .insert("functionname".to_string(), "my-function".to_string()); span.r#type = "".to_string(); } span @@ -279,7 +277,7 @@ mod tests { .unwrap(); let trace_processor = trace_processor::ServerlessTraceProcessor { - obfuscation_config: Arc::new(ObfuscationConfig::new().unwrap()) + obfuscation_config: Arc::new(ObfuscationConfig::new().unwrap()), }; let config = create_test_config(); let tags_provider = create_tags_provider(config.clone()); From e435e8924d48dfe09f27e1854f1d4c3366e3985a Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 16:08:40 -0400 Subject: [PATCH 15/25] feat: fmt --- bottlecap/src/traces/http_utils.rs | 2 +- bottlecap/src/traces/stats_flusher.rs | 4 ++-- bottlecap/src/traces/stats_processor.rs | 2 +- bottlecap/src/traces/trace_agent.rs | 2 +- bottlecap/src/traces/trace_flusher.rs | 2 +- bottlecap/src/traces/trace_processor.rs | 25 ++++++++++++------------- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/bottlecap/src/traces/http_utils.rs b/bottlecap/src/traces/http_utils.rs index 701af8d2f..ada58ccc8 100644 --- a/bottlecap/src/traces/http_utils.rs +++ b/bottlecap/src/traces/http_utils.rs @@ -33,7 +33,7 @@ pub fn log_and_create_http_response( } /// Takes a request's header map, and verifies that the "content-length" header is present, valid, -/// and less than the given max_content_length. +/// and less than the given `max_content_length`. /// /// Will return None if no issues are found. Otherwise logs an error (with the given prefix) and /// returns and HTTP Response with the appropriate error status code. diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index a0783ca83..58ed5a8e2 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -82,9 +82,9 @@ impl StatsFlusher for ServerlessStatsFlusher { ) .await { - Ok(_) => info!("Successfully flushed stats"), + Ok(()) => info!("Successfully flushed stats"), Err(e) => { - error!("Error sending stats: {e:?}") + error!("Error sending stats: {e:?}"); } } } diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 94ee05c87..977c62304 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -68,7 +68,7 @@ impl StatsProcessor for ServerlessStatsProcessor { // send trace payload to our trace flusher match tx.send(stats).await { - Ok(_) => { + Ok(()) => { return log_and_create_http_response( "Successfully buffered stats to be flushed.", StatusCode::ACCEPTED, diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index fdba8c109..d35567bb3 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -9,7 +9,7 @@ use std::net::SocketAddr; use std::sync::Arc; use std::time::Instant; use tokio::sync::mpsc::{self, Receiver, Sender}; -use tracing::{debug, error, info}; +use tracing::{error, info}; use crate::config; use crate::tags::provider; diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs index a91cedd9d..7fd5510b2 100644 --- a/bottlecap/src/traces/trace_flusher.rs +++ b/bottlecap/src/traces/trace_flusher.rs @@ -54,7 +54,7 @@ impl TraceFlusher for ServerlessTraceFlusher { match traces.send().await.last_result { Ok(_) => info!("Successfully flushed traces"), Err(e) => { - error!("Error sending trace: {e:?}") + error!("Error sending trace: {e:?}"); // TODO: Retries } } diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index c9738b91c..4d3165ec1 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -97,10 +97,10 @@ impl TraceProcessor for ServerlessTraceProcessor { // &config.env_type, // ); chunk.spans.retain(|span| { - return (span.name != "dns.lookup" && span.resource != "0.0.0.0") - || (span.name != "dns.lookup" && span.resource != "127.0.0.1"); + (span.name != "dns.lookup" && span.resource != "0.0.0.0") + || (span.name != "dns.lookup" && span.resource != "127.0.0.1") }); - for span in chunk.spans.iter_mut() { + for span in &mut chunk.spans { tags_provider.get_tags_map().iter().for_each(|(k, v)| { span.meta.insert(k.clone(), v.clone()); }); @@ -124,7 +124,7 @@ impl TraceProcessor for ServerlessTraceProcessor { // send trace payload to our trace flusher match tx.send(send_data).await { - Ok(_) => { + Ok(()) => { return log_and_create_http_response( "Successfully buffered traces to be flushed.", StatusCode::ACCEPTED, @@ -167,12 +167,11 @@ mod tests { } fn create_test_config() -> Arc { - let config = Arc::new(Config { + Arc::new(Config { service: Some("test-service".to_string()), tags: Some("test:tag,env:test".to_string()), ..Config::default() - }); - config + }) } fn create_tags_provider(config: Arc) -> Arc { @@ -210,7 +209,7 @@ mod tests { error: 0, meta: meta.clone(), metrics: HashMap::new(), - r#type: "".to_string(), + r#type: String::new(), meta_struct: HashMap::new(), span_links: vec![], }; @@ -221,7 +220,7 @@ mod tests { span.meta.insert("origin".to_string(), "lambda".to_string()); span.meta .insert("functionname".to_string(), "my-function".to_string()); - span.r#type = "".to_string(); + span.r#type = String::new(); } span } @@ -303,16 +302,16 @@ mod tests { tracer_version: "4.0.0".to_string(), runtime_id: "test-runtime-id-value".to_string(), chunks: vec![pb::TraceChunk { - priority: i8::MIN as i32, - origin: "".to_string(), + priority: i32::from(i8::MIN), + origin: String::new(), spans: vec![create_test_span(11, 222, 333, start, true, tags_provider)], tags: HashMap::new(), dropped_trace: false, }], tags: HashMap::new(), env: "test-env".to_string(), - hostname: "".to_string(), - app_version: "".to_string(), + hostname: String::new(), + app_version: String::new(), }; let received_payload = From 2ce64fd4aa4998a243a6dc5f18629d276b8f732d Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 16:27:46 -0400 Subject: [PATCH 16/25] clippy fixes --- bottlecap/src/config/mod.rs | 1 + bottlecap/src/traces/http_utils.rs | 39 ++++++++++--------------- bottlecap/src/traces/stats_flusher.rs | 2 +- bottlecap/src/traces/stats_processor.rs | 12 ++++++-- bottlecap/src/traces/trace_agent.rs | 8 ++--- bottlecap/src/traces/trace_processor.rs | 6 ++-- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 79f631393..aafb0b39a 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -17,6 +17,7 @@ use crate::config::processing_rule::{deserialize_processing_rules, ProcessingRul #[derive(Debug, PartialEq, Deserialize, Clone)] #[serde(deny_unknown_fields)] #[serde(default)] +#[allow(clippy::struct_excessive_bools)] pub struct Config { pub site: String, pub api_key: String, diff --git a/bottlecap/src/traces/http_utils.rs b/bottlecap/src/traces/http_utils.rs index ada58ccc8..155fbf2c7 100644 --- a/bottlecap/src/traces/http_utils.rs +++ b/bottlecap/src/traces/http_utils.rs @@ -42,32 +42,23 @@ pub fn verify_request_content_length( max_content_length: usize, error_message_prefix: &str, ) -> Option>> { - let content_length_header = match header_map.get(header::CONTENT_LENGTH) { - Some(res) => res, - None => { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Missing Content-Length header"), - StatusCode::LENGTH_REQUIRED, - )); - } + let Some(content_length_header) = header_map.get(header::CONTENT_LENGTH) else { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Missing Content-Length header"), + StatusCode::LENGTH_REQUIRED, + )); }; - let header_as_string = match content_length_header.to_str() { - Ok(res) => res, - Err(_) => { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Invalid Content-Length header"), - StatusCode::BAD_REQUEST, - )); - } + let Ok(header_as_string) = content_length_header.to_str() else { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Invalid Content-Length header"), + StatusCode::BAD_REQUEST, + )); }; - let content_length = match header_as_string.to_string().parse::() { - Ok(res) => res, - Err(_) => { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Invalid Content-Length header"), - StatusCode::BAD_REQUEST, - )); - } + let Ok(content_length) = header_as_string.to_string().parse::() else { + return Some(log_and_create_http_response( + &format!("{error_message_prefix}: Invalid Content-Length header"), + StatusCode::BAD_REQUEST, + )); }; if content_length > max_content_length { return Some(log_and_create_http_response( diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 58ed5a8e2..4b2e943ae 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -71,7 +71,7 @@ impl StatsFlusher for ServerlessStatsFlusher { let stats_url = trace_stats_url(&self.config.site); let endpoint = Endpoint { - url: hyper::Uri::from_str(&stats_url).unwrap(), + url: hyper::Uri::from_str(&stats_url).expect("can't make URI from stats url, exiting"), api_key: Some(self.config.api_key.clone().into()), }; diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 977c62304..769bf87fb 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -50,7 +50,7 @@ impl StatsProcessor for ServerlessStatsProcessor { // trace-protobuf crate) let mut stats: pb::ClientStatsPayload = match stats_utils::get_stats_from_request_body(body).await { - Ok(res) => res, + Ok(result) => result, Err(err) => { return log_and_create_http_response( &format!("Error deserializing trace stats from request body: {err}"), @@ -64,7 +64,15 @@ impl StatsProcessor for ServerlessStatsProcessor { .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_nanos(); - stats.stats[0].start = timestamp as u64; + stats.stats[0].start = match u64::try_from(timestamp) { + Ok(result) => result, + Err(_) => { + return log_and_create_http_response( + "Error converting timestamp to u64", + StatusCode::INTERNAL_SERVER_ERROR, + ); + } + }; // send trace payload to our trace flusher match tx.send(stats).await { diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index d35567bb3..360d1ac63 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -141,7 +141,7 @@ impl TraceAgent { .process_traces(config, req, trace_tx, tags_provider, ApiVersion::V04) .await { - Ok(res) => Ok(res), + Ok(result) => Ok(result), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), StatusCode::INTERNAL_SERVER_ERROR, @@ -153,7 +153,7 @@ impl TraceAgent { .process_traces(config, req, trace_tx, tags_provider, ApiVersion::V05) .await { - Ok(res) => Ok(res), + Ok(result) => Ok(result), Err(err) => log_and_create_http_response( &format!("Error processing traces: {err}"), StatusCode::INTERNAL_SERVER_ERROR, @@ -162,7 +162,7 @@ impl TraceAgent { } (&Method::PUT | &Method::POST, STATS_ENDPOINT_PATH) => { match stats_processor.process_stats(req, stats_tx).await { - Ok(res) => Ok(res), + Ok(result) => Ok(result), Err(err) => log_and_create_http_response( &format!("Error processing trace stats: {err}"), StatusCode::INTERNAL_SERVER_ERROR, @@ -170,7 +170,7 @@ impl TraceAgent { } } (_, INFO_ENDPOINT_PATH) => match Self::info_handler() { - Ok(res) => Ok(res), + Ok(result) => Ok(result), Err(err) => log_and_create_http_response( &format!("Info endpoint error: {err}"), StatusCode::INTERNAL_SERVER_ERROR, diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 4d3165ec1..b5257618d 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -35,7 +35,7 @@ pub trait TraceProcessor { version: ApiVersion, ) -> http::Result>; } - +#[allow(clippy::module_name_repetitions)] #[derive(Clone)] pub struct ServerlessTraceProcessor { pub obfuscation_config: Arc, @@ -68,7 +68,7 @@ impl TraceProcessor for ServerlessTraceProcessor { // crate) let (body_size, traces) = match version { ApiVersion::V04 => match trace_utils::get_traces_from_request_body(body).await { - Ok(res) => res, + Ok(result) => result, Err(err) => { return log_and_create_http_response( &format!("Error deserializing trace from request body: {err}"), @@ -77,7 +77,7 @@ impl TraceProcessor for ServerlessTraceProcessor { } }, ApiVersion::V05 => match trace_utils::get_v05_traces_from_request_body(body).await { - Ok(res) => res, + Ok(result) => result, Err(err) => { return log_and_create_http_response( &format!("Error deserializing trace from request body: {err}"), From c06c5a2d986fe0ee7940b2b23e5ce18172a4bb28 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 16:30:36 -0400 Subject: [PATCH 17/25] parse time --- bottlecap/src/traces/trace_processor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index b5257618d..bd454cd29 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -116,7 +116,7 @@ impl TraceProcessor for ServerlessTraceProcessor { ); let intake_url = trace_intake_url(&config.site); let endpoint = Endpoint { - url: hyper::Uri::from_str(&intake_url).unwrap(), + url: hyper::Uri::from_str(&intake_url).expect("can't parse trace intake URL, exiting"), api_key: Some(config.api_key.clone().into()), }; @@ -160,10 +160,10 @@ mod tests { use datadog_trace_utils::{trace_utils, tracer_payload::TracerPayloadCollection}; fn get_current_timestamp_nanos() -> i64 { - SystemTime::now() + i64::try_from(SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() - .as_nanos() as i64 + .as_nanos()).expect("can't parse time") } fn create_test_config() -> Arc { From aeb64cb1bf6b4ac60ea2124eeb1dd947ae49f40f Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Thu, 27 Jun 2024 16:39:21 -0400 Subject: [PATCH 18/25] feat: clippy again --- bottlecap/src/traces/stats_flusher.rs | 1 + bottlecap/src/traces/stats_processor.rs | 1 + bottlecap/src/traces/trace_agent.rs | 3 ++- bottlecap/src/traces/trace_flusher.rs | 1 + bottlecap/src/traces/trace_processor.rs | 17 ++++++++++------- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 4b2e943ae..0310d5bf5 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -24,6 +24,7 @@ pub trait StatsFlusher { async fn manual_flush(&self); } +#[allow(clippy::module_name_repetitions)] #[derive(Clone)] pub struct ServerlessStatsFlusher { pub buffer: Arc>>, diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 769bf87fb..88eefb3dd 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -26,6 +26,7 @@ pub trait StatsProcessor { } #[derive(Clone, Copy)] +#[allow(clippy::module_name_repetitions)] pub struct ServerlessStatsProcessor {} #[async_trait] diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 360d1ac63..18d1b7dbf 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -106,7 +106,8 @@ impl TraceAgent { async move { Ok::<_, Infallible>(service) } }); - let addr = SocketAddr::from(([127, 0, 0, 1], TRACE_AGENT_PORT as u16)); + let port = u16::try_from(TRACE_AGENT_PORT).expect("TRACE_AGENT_PORT is too large"); + let addr = SocketAddr::from(([127, 0, 0, 1], port)); let server_builder = Server::try_bind(&addr)?; let server = server_builder.serve(make_svc); diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs index 7fd5510b2..7b5f56997 100644 --- a/bottlecap/src/traces/trace_flusher.rs +++ b/bottlecap/src/traces/trace_flusher.rs @@ -20,6 +20,7 @@ pub trait TraceFlusher { } #[derive(Clone)] +#[allow(clippy::module_name_repetitions)] pub struct ServerlessTraceFlusher { pub buffer: Arc>>, } diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index bd454cd29..a27902a79 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -35,8 +35,8 @@ pub trait TraceProcessor { version: ApiVersion, ) -> http::Result>; } -#[allow(clippy::module_name_repetitions)] #[derive(Clone)] +#[allow(clippy::module_name_repetitions)] pub struct ServerlessTraceProcessor { pub obfuscation_config: Arc, } @@ -97,8 +97,8 @@ impl TraceProcessor for ServerlessTraceProcessor { // &config.env_type, // ); chunk.spans.retain(|span| { - (span.name != "dns.lookup" && span.resource != "0.0.0.0") - || (span.name != "dns.lookup" && span.resource != "127.0.0.1") + (span.resource != "127.0.0.1" || span.resource != "0.0.0.0") + && span.name != "dns.lookup" }); for span in &mut chunk.spans { tags_provider.get_tags_map().iter().for_each(|(k, v)| { @@ -160,10 +160,13 @@ mod tests { use datadog_trace_utils::{trace_utils, tracer_payload::TracerPayloadCollection}; fn get_current_timestamp_nanos() -> i64 { - i64::try_from(SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_nanos()).expect("can't parse time") + i64::try_from( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(), + ) + .expect("can't parse time") } fn create_test_config() -> Arc { From e90cc9fbbb065090dbb173e968bd4bb49c8471a6 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:16:31 -0400 Subject: [PATCH 19/25] feat: revert dockerfile --- scripts/Dockerfile.bottlecap.build | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/Dockerfile.bottlecap.build b/scripts/Dockerfile.bottlecap.build index 70d40b471..78d7d9400 100644 --- a/scripts/Dockerfile.bottlecap.build +++ b/scripts/Dockerfile.bottlecap.build @@ -6,7 +6,6 @@ RUN yum install -y curl gcc gcc-c++ make unzip openssl openssl-devel # Install Protocol Buffers compiler by hand, since AL2 does not have a recent enough version. COPY ./scripts/install-protoc.sh / RUN chmod +x /install-protoc.sh && /install-protoc.sh - RUN curl https://sh.rustup.rs -sSf | \ sh -s -- --profile minimal --default-toolchain nightly-$PLATFORM-unknown-linux-gnu -y ENV PATH=/root/.cargo/bin:$PATH @@ -17,7 +16,7 @@ COPY ./bottlecap/Cargo.toml /tmp/dd/bottlecap/Cargo.toml COPY ./bottlecap/Cargo.lock /tmp/dd/bottlecap/Cargo.lock ENV RUSTFLAGS="-C panic=abort -Zlocation-detail=none" WORKDIR /tmp/dd/bottlecap -RUN --mount=type=cache,target=/usr/local/cargo/registry cargo +nightly build --release --target $PLATFORM-unknown-linux-gnu +RUN --mount=type=cache,target=/usr/local/cargo/registry cargo +nightly build -Z build-std=std,panic_abort -Z build-std-features=panic_immediate_abort --release --target $PLATFORM-unknown-linux-gnu RUN cp /tmp/dd/bottlecap/target/$PLATFORM-unknown-linux-gnu/release/bottlecap /tmp/dd/bottlecap/bottlecap # zip the extension From 63cfeccd6f3bf922c846e0ba83ba9e850853055c Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:19:35 -0400 Subject: [PATCH 20/25] feat: no-default-features --- bottlecap/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index f38f006ff..6504f4237 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -5,8 +5,8 @@ edition = "2021" publish = false [dependencies] -async-trait = "0.1.64" -anyhow = "1.0" +async-trait = { version = "0.1.64", default-features = false } +anyhow = { version = "1.0", default-features = false } chrono = { version = "0.4.38", features = ["serde", "std", "now"], default-features = false} datadog-protos = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://github.com/DataDog/saluki/" } From 165c7981d1bad516c615058ffdd7aea1b03828b7 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:32:31 -0400 Subject: [PATCH 21/25] feat: Remove utils, take only what we need --- bottlecap/Cargo.lock | 30 ++++- bottlecap/Cargo.toml | 1 + bottlecap/src/traces/http_utils.rs | 162 ------------------------ bottlecap/src/traces/mod.rs | 1 - bottlecap/src/traces/stats_processor.rs | 2 +- bottlecap/src/traces/trace_agent.rs | 2 +- bottlecap/src/traces/trace_processor.rs | 2 +- 7 files changed, 29 insertions(+), 171 deletions(-) delete mode 100644 bottlecap/src/traces/http_utils.rs diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index cceb50c68..6e2a70315 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -143,6 +143,7 @@ dependencies = [ "base64 0.22.1", "chrono", "datadog-protos", + "datadog-trace-mini-agent", "datadog-trace-normalization", "datadog-trace-obfuscation", "datadog-trace-protobuf", @@ -279,10 +280,29 @@ dependencies = [ "tonic-build", ] +[[package]] +name = "datadog-trace-mini-agent" +version = "0.4.2" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" +dependencies = [ + "anyhow", + "async-trait", + "datadog-trace-normalization", + "datadog-trace-obfuscation", + "datadog-trace-protobuf", + "datadog-trace-utils", + "ddcommon", + "hyper 0.14.29", + "log", + "serde", + "serde_json", + "tokio", +] + [[package]] name = "datadog-trace-normalization" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -291,7 +311,7 @@ dependencies = [ [[package]] name = "datadog-trace-obfuscation" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" dependencies = [ "anyhow", "datadog-trace-protobuf", @@ -308,7 +328,7 @@ dependencies = [ [[package]] name = "datadog-trace-protobuf" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" dependencies = [ "prost 0.11.9", "serde", @@ -318,7 +338,7 @@ dependencies = [ [[package]] name = "datadog-trace-utils" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" dependencies = [ "anyhow", "bytes", @@ -343,7 +363,7 @@ dependencies = [ [[package]] name = "ddcommon" version = "10.0.0" -source = "git+https://github.com/DataDog/libdatadog#2d7534d6fbd496793418df993b97b7ec076d647b" +source = "git+https://github.com/DataDog/libdatadog#15aa48dae5f53b853cee7ba0a0f4860a48d7970f" dependencies = [ "anyhow", "futures", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 6504f4237..e8f500a01 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -13,6 +13,7 @@ ddsketch-agent = { version = "0.1.0", default-features = false, git = "https://g ddcommon = { version = "10.0", git = "https://github.com/DataDog/libdatadog" } datadog-trace-protobuf = { version = "10.0.0", git = "https://github.com/DataDog/libdatadog" } datadog-trace-utils = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } +datadog-trace-mini-agent = { version = "0.4.2", git= "https://github.com/DataDog/libdatadog" } datadog-trace-normalization = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } datadog-trace-obfuscation = { version = "10.0.0", git= "https://github.com/DataDog/libdatadog" } figment = { version = "0.10.15", default-features = false, features = ["yaml", "env"] } diff --git a/bottlecap/src/traces/http_utils.rs b/bottlecap/src/traces/http_utils.rs deleted file mode 100644 index 155fbf2c7..000000000 --- a/bottlecap/src/traces/http_utils.rs +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -use hyper::{ - header, - http::{self, HeaderMap}, - Body, Response, StatusCode, -}; -use log::{error, info}; -use serde_json::json; - -/// Does two things: -/// 1. Logs the given message. A success status code (within 200-299) will cause an info log to be -/// written, -/// otherwise error will be written. -/// 2. Returns the given message in the body of JSON response with the given status code. -/// -/// Response body format: -/// { -/// "message": message -/// } -pub fn log_and_create_http_response( - message: &str, - status: StatusCode, -) -> http::Result> { - if status.is_success() { - info!("{message}"); - } else { - error!("{message}"); - } - let body = json!({ "message": message }).to_string(); - Response::builder().status(status).body(Body::from(body)) -} - -/// Takes a request's header map, and verifies that the "content-length" header is present, valid, -/// and less than the given `max_content_length`. -/// -/// Will return None if no issues are found. Otherwise logs an error (with the given prefix) and -/// returns and HTTP Response with the appropriate error status code. -pub fn verify_request_content_length( - header_map: &HeaderMap, - max_content_length: usize, - error_message_prefix: &str, -) -> Option>> { - let Some(content_length_header) = header_map.get(header::CONTENT_LENGTH) else { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Missing Content-Length header"), - StatusCode::LENGTH_REQUIRED, - )); - }; - let Ok(header_as_string) = content_length_header.to_str() else { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Invalid Content-Length header"), - StatusCode::BAD_REQUEST, - )); - }; - let Ok(content_length) = header_as_string.to_string().parse::() else { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Invalid Content-Length header"), - StatusCode::BAD_REQUEST, - )); - }; - if content_length > max_content_length { - return Some(log_and_create_http_response( - &format!("{error_message_prefix}: Payload too large"), - StatusCode::PAYLOAD_TOO_LARGE, - )); - } - None -} - -#[cfg(test)] -mod tests { - use hyper::header; - use hyper::Body; - use hyper::HeaderMap; - use hyper::Response; - use hyper::StatusCode; - - use super::verify_request_content_length; - - fn create_test_headers_with_content_length(val: &str) -> HeaderMap { - let mut map = HeaderMap::new(); - map.insert(header::CONTENT_LENGTH, val.parse().unwrap()); - map - } - - async fn get_response_body_as_string(response: Response) -> String { - let body = response.into_body(); - let bytes = hyper::body::to_bytes(body).await.unwrap(); - String::from_utf8(bytes.into_iter().collect()).unwrap() - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_request_content_length_missing() { - let verify_result = verify_request_content_length(&HeaderMap::new(), 1, "Test Prefix"); - assert!(verify_result.is_some()); - - let response = verify_result.unwrap().unwrap(); - assert_eq!(response.status(), StatusCode::LENGTH_REQUIRED); - assert_eq!( - get_response_body_as_string(response).await, - "{\"message\":\"Test Prefix: Missing Content-Length header\"}".to_string() - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_request_content_length_cant_convert_to_str() { - let verify_result = verify_request_content_length( - &create_test_headers_with_content_length("❤❤❤❤❤❤❤"), - 1, - "Test Prefix", - ); - assert!(verify_result.is_some()); - - let response = verify_result.unwrap().unwrap(); - assert_eq!(response.status(), StatusCode::BAD_REQUEST); - assert_eq!( - get_response_body_as_string(response).await, - "{\"message\":\"Test Prefix: Invalid Content-Length header\"}".to_string() - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_request_content_length_cant_convert_to_usize() { - let verify_result = verify_request_content_length( - &create_test_headers_with_content_length("not_an_int"), - 1, - "Test Prefix", - ); - assert!(verify_result.is_some()); - - let response = verify_result.unwrap().unwrap(); - assert_eq!(response.status(), StatusCode::BAD_REQUEST); - assert_eq!( - get_response_body_as_string(response).await, - "{\"message\":\"Test Prefix: Invalid Content-Length header\"}".to_string() - ); - } - - #[tokio::test] - #[cfg_attr(miri, ignore)] - async fn test_request_content_length_too_long() { - let verify_result = verify_request_content_length( - &create_test_headers_with_content_length("100"), - 1, - "Test Prefix", - ); - - assert!(verify_result.is_some()); - - let response = verify_result.unwrap().unwrap(); - assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE); - assert_eq!( - get_response_body_as_string(response).await, - "{\"message\":\"Test Prefix: Payload too large\"}".to_string() - ); - } -} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 479303f28..8545fbe40 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,7 +1,6 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -pub mod http_utils; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 88eefb3dd..224b4ee14 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -12,7 +12,7 @@ use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; use super::trace_agent::MAX_CONTENT_LENGTH; -use crate::traces::http_utils::{self, log_and_create_http_response}; +use datadog_trace_mini_agent::http_utils::{self, log_and_create_http_response}; #[async_trait] pub trait StatsProcessor { diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 18d1b7dbf..9216f5f58 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -13,7 +13,7 @@ use tracing::{error, info}; use crate::config; use crate::tags::provider; -use crate::traces::http_utils::log_and_create_http_response; +use datadog_trace_mini_agent::http_utils::log_and_create_http_response; use crate::traces::{stats_flusher, stats_processor, trace_flusher, trace_processor}; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index a27902a79..b8eb7dec0 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -15,7 +15,7 @@ use tokio::sync::mpsc::Sender; use tracing::info; use crate::config; -use crate::traces::http_utils::{self, log_and_create_http_response}; +use datadog_trace_mini_agent::http_utils::{self, log_and_create_http_response}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; From d49402a6d9f408ca5e49811b98d1c74d36bcc284 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:36:57 -0400 Subject: [PATCH 22/25] feat: fmt moves the import --- bottlecap/src/traces/trace_agent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 9216f5f58..8d34cd5c8 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -13,8 +13,8 @@ use tracing::{error, info}; use crate::config; use crate::tags::provider; -use datadog_trace_mini_agent::http_utils::log_and_create_http_response; use crate::traces::{stats_flusher, stats_processor, trace_flusher, trace_processor}; +use datadog_trace_mini_agent::http_utils::log_and_create_http_response; use datadog_trace_protobuf::pb; use datadog_trace_utils::trace_utils::SendData; From 86836e0417c4d9bb43595e440c0878584afd57b7 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:44:53 -0400 Subject: [PATCH 23/25] feat: replace info with debug. Replace log with tracing lib --- bottlecap/src/secrets/decrypt.rs | 2 +- bottlecap/src/traces/stats_flusher.rs | 2 +- bottlecap/src/traces/stats_processor.rs | 4 ++-- bottlecap/src/traces/trace_processor.rs | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bottlecap/src/secrets/decrypt.rs b/bottlecap/src/secrets/decrypt.rs index 139a8a8cc..ae0db7a98 100644 --- a/bottlecap/src/secrets/decrypt.rs +++ b/bottlecap/src/secrets/decrypt.rs @@ -2,7 +2,7 @@ use crate::config::{AwsConfig, Config}; use base64::prelude::*; use chrono::{DateTime, Utc}; use hmac::{Hmac, Mac}; -use log::error; +use tracing::error; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::Client; use serde_json::Value; diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 0310d5bf5..1fb00647f 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use async_trait::async_trait; -use log::{debug, error, info}; +use tracing::{debug, error, info}; use std::str::FromStr; use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 224b4ee14..1305d7817 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -5,7 +5,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; -use log::info; +use tracing::debug; use tokio::sync::mpsc::Sender; use datadog_trace_protobuf::pb; @@ -36,7 +36,7 @@ impl StatsProcessor for ServerlessStatsProcessor { req: Request, tx: Sender, ) -> http::Result> { - info!("Recieved trace stats to process"); + debug!("Recieved trace stats to process"); let (parts, body) = req.into_parts(); if let Some(response) = http_utils::verify_request_content_length( diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index b8eb7dec0..3a2d200f8 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; use tokio::sync::mpsc::Sender; -use tracing::info; +use tracing::debug; use crate::config; use datadog_trace_mini_agent::http_utils::{self, log_and_create_http_response}; @@ -51,7 +51,7 @@ impl TraceProcessor for ServerlessTraceProcessor { tags_provider: Arc, version: ApiVersion, ) -> http::Result> { - info!("Recieved traces to process"); + debug!("Recieved traces to process"); let (parts, body) = req.into_parts(); if let Some(response) = http_utils::verify_request_content_length( From bf040c34f907f6be875f6cb40d5a4606b2ae1f3a Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 10:46:46 -0400 Subject: [PATCH 24/25] feat: more debug --- bottlecap/src/secrets/decrypt.rs | 2 +- bottlecap/src/traces/stats_flusher.rs | 6 +++--- bottlecap/src/traces/stats_processor.rs | 2 +- bottlecap/src/traces/trace_flusher.rs | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bottlecap/src/secrets/decrypt.rs b/bottlecap/src/secrets/decrypt.rs index ae0db7a98..a00677a2d 100644 --- a/bottlecap/src/secrets/decrypt.rs +++ b/bottlecap/src/secrets/decrypt.rs @@ -2,7 +2,6 @@ use crate::config::{AwsConfig, Config}; use base64::prelude::*; use chrono::{DateTime, Utc}; use hmac::{Hmac, Mac}; -use tracing::error; use reqwest::header::{HeaderMap, HeaderValue}; use reqwest::Client; use serde_json::Value; @@ -11,6 +10,7 @@ use std::io::Error; use std::sync::Arc; use std::time::Instant; use tracing::debug; +use tracing::error; pub async fn resolve_secrets(config: Arc, aws_config: &AwsConfig) -> Option { if !config.api_key.is_empty() { diff --git a/bottlecap/src/traces/stats_flusher.rs b/bottlecap/src/traces/stats_flusher.rs index 1fb00647f..d373d2fc0 100644 --- a/bottlecap/src/traces/stats_flusher.rs +++ b/bottlecap/src/traces/stats_flusher.rs @@ -2,10 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 use async_trait::async_trait; -use tracing::{debug, error, info}; use std::str::FromStr; use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; +use tracing::{debug, error}; use crate::config; use datadog_trace_protobuf::pb; @@ -55,7 +55,7 @@ impl StatsFlusher for ServerlessStatsFlusher { if stats.is_empty() { return; } - info!("Flushing {} stats", stats.len()); + debug!("Flushing {} stats", stats.len()); let stats_payload = stats_utils::construct_stats_payload(stats); @@ -83,7 +83,7 @@ impl StatsFlusher for ServerlessStatsFlusher { ) .await { - Ok(()) => info!("Successfully flushed stats"), + Ok(()) => debug!("Successfully flushed stats"), Err(e) => { error!("Error sending stats: {e:?}"); } diff --git a/bottlecap/src/traces/stats_processor.rs b/bottlecap/src/traces/stats_processor.rs index 1305d7817..60c8019c8 100644 --- a/bottlecap/src/traces/stats_processor.rs +++ b/bottlecap/src/traces/stats_processor.rs @@ -5,8 +5,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use async_trait::async_trait; use hyper::{http, Body, Request, Response, StatusCode}; -use tracing::debug; use tokio::sync::mpsc::Sender; +use tracing::debug; use datadog_trace_protobuf::pb; use datadog_trace_utils::stats_utils; diff --git a/bottlecap/src/traces/trace_flusher.rs b/bottlecap/src/traces/trace_flusher.rs index 7b5f56997..aec839e8b 100644 --- a/bottlecap/src/traces/trace_flusher.rs +++ b/bottlecap/src/traces/trace_flusher.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use std::sync::Arc; use tokio::sync::{mpsc::Receiver, Mutex}; -use tracing::{error, info}; +use tracing::{debug, error}; use datadog_trace_utils::trace_utils::{self, SendData}; @@ -49,11 +49,11 @@ impl TraceFlusher for ServerlessTraceFlusher { if traces.is_empty() { return; } - info!("Flushing {} traces", traces.len()); + debug!("Flushing {} traces", traces.len()); for traces in trace_utils::coalesce_send_data(traces) { match traces.send().await.last_result { - Ok(_) => info!("Successfully flushed traces"), + Ok(_) => debug!("Successfully flushed traces"), Err(e) => { error!("Error sending trace: {e:?}"); // TODO: Retries From 83da5288f14331442b71445a7f837dcfd6cc85c9 Mon Sep 17 00:00:00 2001 From: AJ Stuyvenberg Date: Fri, 28 Jun 2024 11:26:53 -0400 Subject: [PATCH 25/25] feat: Remove call to trace utils --- bottlecap/src/traces/trace_processor.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 3a2d200f8..3a2a20cf3 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -91,11 +91,6 @@ impl TraceProcessor for ServerlessTraceProcessor { traces, &tracer_header_tags, |chunk, _root_span_index| { - // trace_utils::set_serverless_root_span_tags( - // &mut chunk.spans[root_span_index], - // config.function_name.clone(), - // &config.env_type, - // ); chunk.spans.retain(|span| { (span.resource != "127.0.0.1" || span.resource != "0.0.0.0") && span.name != "dns.lookup"