From 747a4dd4f36c0ae29ac8fa6eb3698a98314a819b Mon Sep 17 00:00:00 2001 From: RA <70325462+RAprogramm@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:11:39 +0700 Subject: [PATCH] Add telemetry hook and metrics integration --- CHANGELOG.md | 21 ++++ Cargo.lock | 99 ++++++++++++++- Cargo.toml | 23 +++- README.md | 31 +++-- src/app_error.rs | 11 +- src/app_error/constructors.rs | 4 +- src/app_error/context.rs | 11 +- src/app_error/core.rs | 229 +++++++++++++++++++++++++++------- src/app_error/tests.rs | 209 +++++++++++++++++++++++++++++++ src/convert/actix.rs | 1 + src/convert/axum.rs | 18 +-- src/response/axum_impl.rs | 5 +- 12 files changed, 578 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da23ee8..a2526e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,27 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.14.0] - 2025-09-24 + +### Added +- Introduced optional `tracing`, `metrics` and `backtrace` features. When + enabled they emit structured `tracing` events, increment the + `error_total{code,category}` counter and capture lazy [`Backtrace`] snapshots + from a new `AppError::emit_telemetry` hook. + +### Changed +- Reworked the `AppError` core to emit telemetry exactly once, track dirty + mutations and expose a crate-private `new_raw` constructor for contexts that + enrich errors before flushing instrumentation. +- Updated Axum and Actix integrations to rely on the telemetry hook instead of + manually logging errors while preserving backward-compatible APIs. + +### Tests +- Added tracing dispatcher coverage to assert a single telemetry event with MDC + propagated `trace_id` values. +- Installed a deterministic metrics recorder in unit tests to confirm + `error_total` increments once per error. + ## [0.13.1] - 2025-09-23 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index a743fc1..0400bbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -193,6 +193,18 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -1585,6 +1597,12 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "log-mdc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a94d21414c1f4a51209ad204c1776a3d0765002c76c6abcb602a6f09f1e881c7" + [[package]] name = "lru-slab" version = "0.1.2" @@ -1606,15 +1624,18 @@ dependencies = [ [[package]] name = "masterror" -version = "0.13.1" +version = "0.14.0" dependencies = [ "actix-web", "axum", "config", "http 1.3.1", "js-sys", + "log", + "log-mdc", "masterror-derive", "masterror-template", + "metrics", "redis", "reqwest", "serde", @@ -1628,6 +1649,7 @@ dependencies = [ "tokio", "toml", "tracing", + "tracing-subscriber", "trybuild", "utoipa", "uuid", @@ -1671,6 +1693,16 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "metrics" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dea7ac8057892855ec285c440160265225438c3c45072613c25a4b26e98ef5" +dependencies = [ + "ahash", + "portable-atomic", +] + [[package]] name = "mime" version = "0.3.17" @@ -1725,6 +1757,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -1972,6 +2013,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "potential_utf" version = "0.1.3" @@ -2674,6 +2721,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -3125,6 +3181,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.44" @@ -3358,6 +3423,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -3532,6 +3623,12 @@ dependencies = [ "syn", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index dfc4e1d..0bfdc16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "masterror" -version = "0.13.1" +version = "0.14.0" rust-version = "1.90" edition = "2024" license = "MIT OR Apache-2.0" @@ -50,6 +50,9 @@ readme = "README.md" [features] default = [] +tracing = ["dep:tracing", "dep:log", "dep:log-mdc"] +metrics = ["dep:metrics"] +backtrace = [] axum = ["dep:axum", "dep:serde_json"] actix = ["dep:actix-web", "dep:serde_json"] @@ -77,7 +80,10 @@ masterror-template = { version = "0.3.6" } [dependencies] masterror-derive = { version = "0.7" } masterror-template = { workspace = true } -tracing = "0.1" +tracing = { version = "0.1", optional = true } +log = { version = "0.4", optional = true } +log-mdc = { version = "0.1", optional = true } +metrics = { version = "0.24", optional = true } serde = { version = "1", features = ["derive"] } serde_json = { version = "1", optional = true } @@ -125,6 +131,7 @@ tokio = { version = "1", features = [ trybuild = "1" toml = "0.9" tempfile = "3" +tracing-subscriber = { version = "0.3", features = ["registry"] } [build-dependencies] serde = { version = "1", features = ["derive"] } @@ -136,6 +143,9 @@ feature_order = [ "actix", "openapi", "serde_json", + "tracing", + "metrics", + "backtrace", "sqlx", "sqlx-migrate", "reqwest", @@ -176,6 +186,15 @@ description = "Generate utoipa OpenAPI schema for ErrorResponse" [package.metadata.masterror.readme.features.serde_json] description = "Attach structured JSON details to AppError" +[package.metadata.masterror.readme.features.tracing] +description = "Emit structured tracing events when errors are constructed" + +[package.metadata.masterror.readme.features.metrics] +description = "Increment `error_total{code,category}` counter for each AppError" + +[package.metadata.masterror.readme.features.backtrace] +description = "Capture lazy `Backtrace` snapshots when telemetry is flushed" + [package.metadata.masterror.readme.features.sqlx] description = "Classify sqlx_core::Error variants into AppError kinds" diff --git a/README.md b/README.md index 8459c40..3b8adc6 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,14 @@ guides, comparisons with `thiserror`/`anyhow`, and troubleshooting recipes. ~~~toml [dependencies] -masterror = { version = "0.13.1", default-features = false } +masterror = { version = "0.14.0", default-features = false } # or with features: -# masterror = { version = "0.13.1", features = [ +# masterror = { version = "0.14.0", features = [ # "axum", "actix", "openapi", "serde_json", -# "sqlx", "sqlx-migrate", "reqwest", "redis", -# "validator", "config", "tokio", "multipart", -# "teloxide", "telegram-webapp-sdk", "frontend", "turnkey" +# "tracing", "metrics", "backtrace", "sqlx", +# "sqlx-migrate", "reqwest", "redis", "validator", +# "config", "tokio", "multipart", "teloxide", +# "telegram-webapp-sdk", "frontend", "turnkey" # ] } ~~~ @@ -76,14 +77,15 @@ masterror = { version = "0.13.1", default-features = false } ~~~toml [dependencies] # lean core -masterror = { version = "0.13.1", default-features = false } +masterror = { version = "0.14.0", default-features = false } # with Axum/Actix + JSON + integrations -# masterror = { version = "0.13.1", features = [ +# masterror = { version = "0.14.0", features = [ # "axum", "actix", "openapi", "serde_json", -# "sqlx", "sqlx-migrate", "reqwest", "redis", -# "validator", "config", "tokio", "multipart", -# "teloxide", "telegram-webapp-sdk", "frontend", "turnkey" +# "tracing", "metrics", "backtrace", "sqlx", +# "sqlx-migrate", "reqwest", "redis", "validator", +# "config", "tokio", "multipart", "teloxide", +# "telegram-webapp-sdk", "frontend", "turnkey" # ] } ~~~ @@ -671,6 +673,9 @@ assert_eq!(resp.status, 401); - `actix` — Actix Web ResponseError and Responder implementations - `openapi` — Generate utoipa OpenAPI schema for ErrorResponse - `serde_json` — Attach structured JSON details to AppError +- `tracing` — Emit structured tracing events when errors are constructed +- `metrics` — Increment `error_total{code,category}` counter for each AppError +- `backtrace` — Capture lazy `Backtrace` snapshots when telemetry is flushed - `sqlx` — Classify sqlx_core::Error variants into AppError kinds - `sqlx-migrate` — Map sqlx::migrate::MigrateError into AppError (Database) - `reqwest` — Classify reqwest::Error as timeout/network/external API @@ -709,13 +714,13 @@ assert_eq!(resp.status, 401); Minimal core: ~~~toml -masterror = { version = "0.13.1", default-features = false } +masterror = { version = "0.14.0", default-features = false } ~~~ API (Axum + JSON + deps): ~~~toml -masterror = { version = "0.13.1", features = [ +masterror = { version = "0.14.0", features = [ "axum", "serde_json", "openapi", "sqlx", "reqwest", "redis", "validator", "config", "tokio" ] } @@ -724,7 +729,7 @@ masterror = { version = "0.13.1", features = [ API (Actix + JSON + deps): ~~~toml -masterror = { version = "0.13.1", features = [ +masterror = { version = "0.14.0", features = [ "actix", "serde_json", "openapi", "sqlx", "reqwest", "redis", "validator", "config", "tokio" ] } diff --git a/src/app_error.rs b/src/app_error.rs index c34406c..c206df3 100644 --- a/src/app_error.rs +++ b/src/app_error.rs @@ -53,11 +53,14 @@ //! } //! ``` //! -//! ## Logging +//! ## Telemetry //! -//! [`AppError::log`] emits a single structured `tracing::error!` event with -//! `kind`, `code` and optional `message` fields. Prefer calling it at the -//! transport boundary (e.g. in `IntoResponse`) to avoid duplicate logs. +//! [`AppError::log`] flushes telemetry once: it emits a structured `tracing` +//! event (when the `tracing` feature is enabled), increments the +//! `error_total{code,category}` counter (with the `metrics` feature) and +//! captures a lazy [`Backtrace`] snapshot (with the `backtrace` feature). +//! Constructors and framework integrations call it automatically, so manual +//! usage is rarely required. mod constructors; mod context; diff --git a/src/app_error/constructors.rs b/src/app_error/constructors.rs index 20f7521..30abece 100644 --- a/src/app_error/constructors.rs +++ b/src/app_error/constructors.rs @@ -62,8 +62,8 @@ impl AppError { /// assert!(err.message.is_none()); /// ``` pub fn database(msg: Option>) -> Self { - let mut err = Self::bare(AppErrorKind::Database); - err.message = msg; + let err = Self::new_raw(AppErrorKind::Database, msg); + err.emit_telemetry(); err } diff --git a/src/app_error/context.rs b/src/app_error/context.rs index 61b1901..57cb5cc 100644 --- a/src/app_error/context.rs +++ b/src/app_error/context.rs @@ -129,13 +129,16 @@ impl Context { )); } - let mut error = AppError::bare(self.category).with_code(self.code); + let mut error = AppError::new_raw(self.category, None); + error.code = self.code; if !self.fields.is_empty() { - error = error.with_fields(self.fields); + error.metadata.extend(self.fields); } if matches!(self.edit_policy, MessageEditPolicy::Redact) { - error = error.redactable(); + error.edit_policy = MessageEditPolicy::Redact; } - error.with_source(source) + let error = error.with_source(source); + error.emit_telemetry(); + error } } diff --git a/src/app_error/core.rs b/src/app_error/core.rs index 3787d9e..3625383 100644 --- a/src/app_error/core.rs +++ b/src/app_error/core.rs @@ -1,11 +1,15 @@ +#[cfg(feature = "backtrace")] +use std::sync::OnceLock; use std::{ backtrace::Backtrace, borrow::Cow, error::Error as StdError, - fmt::{Display, Formatter, Result as FmtResult} + fmt::{Display, Formatter, Result as FmtResult}, + sync::atomic::{AtomicBool, Ordering} }; -use tracing::error; +#[cfg(feature = "tracing")] +use tracing::{Level, event}; use super::metadata::{Field, Metadata}; use crate::{AppCode, AppErrorKind, RetryAdvice}; @@ -20,6 +24,51 @@ pub enum MessageEditPolicy { Redact } +#[cfg(feature = "backtrace")] +#[derive(Debug)] +struct BacktraceSlot { + cell: OnceLock> +} + +#[cfg(feature = "backtrace")] +impl BacktraceSlot { + const fn new() -> Self { + Self { + cell: OnceLock::new() + } + } + + fn with(backtrace: Backtrace) -> Self { + let slot = Self::new(); + let _ = slot.cell.set(Some(backtrace)); + slot + } + + fn set(&mut self, backtrace: Backtrace) { + *self = Self::with(backtrace); + } + + fn get(&self) -> Option<&Backtrace> { + self.cell.get().and_then(|value| value.as_ref()) + } + + fn capture_if_absent(&self) -> Option<&Backtrace> { + self.cell + .get_or_init(|| Some(Backtrace::capture())) + .as_ref() + } +} + +#[cfg(feature = "backtrace")] +impl Default for BacktraceSlot { + fn default() -> Self { + Self::new() + } +} + +#[cfg(not(feature = "backtrace"))] +type BacktraceSlot = Option; + /// Rich application error preserving domain code, taxonomy and metadata. #[derive(Debug)] pub struct Error { @@ -38,7 +87,8 @@ pub struct Error { /// Optional authentication challenge for `WWW-Authenticate`. pub www_authenticate: Option, source: Option>, - backtrace: Option + backtrace: BacktraceSlot, + telemetry_dirty: AtomicBool } impl Display for Error { @@ -81,6 +131,86 @@ impl StdError for Error { pub type AppResult = Result; impl Error { + pub(crate) fn new_raw(kind: AppErrorKind, message: Option>) -> Self { + Self { + code: AppCode::from(kind), + kind, + message, + metadata: Metadata::new(), + edit_policy: MessageEditPolicy::Preserve, + retry: None, + www_authenticate: None, + source: None, + backtrace: BacktraceSlot::default(), + telemetry_dirty: AtomicBool::new(true) + } + } + + fn mark_dirty(&self) { + self.telemetry_dirty.store(true, Ordering::Release); + } + + fn take_dirty(&self) -> bool { + self.telemetry_dirty.swap(false, Ordering::AcqRel) + } + + #[cfg(feature = "backtrace")] + fn capture_backtrace(&self) -> Option<&Backtrace> { + self.backtrace.capture_if_absent() + } + + #[cfg(not(feature = "backtrace"))] + fn capture_backtrace(&self) -> Option<&Backtrace> { + self.backtrace.as_ref() + } + + #[cfg(feature = "backtrace")] + fn set_backtrace_slot(&mut self, backtrace: Backtrace) { + self.backtrace.set(backtrace); + } + + #[cfg(not(feature = "backtrace"))] + fn set_backtrace_slot(&mut self, backtrace: Backtrace) { + self.backtrace = Some(backtrace); + } + + pub(crate) fn emit_telemetry(&self) { + if self.take_dirty() { + #[cfg(feature = "backtrace")] + let _ = self.capture_backtrace(); + + #[cfg(feature = "metrics")] + { + metrics::counter!( + "error_total", + "code" => self.code.as_str(), + "category" => kind_label(self.kind) + ) + .increment(1); + } + + #[cfg(feature = "tracing")] + { + let message = self.message.as_deref(); + let retry_seconds = self.retry.map(|value| value.after_seconds); + let trace_id = log_mdc::get("trace_id", |value| value.map(str::to_owned)); + event!( + target: "masterror::error", + Level::ERROR, + code = self.code.as_str(), + category = kind_label(self.kind), + message = message, + retry_seconds, + redactable = matches!(self.edit_policy, MessageEditPolicy::Redact), + metadata_len = self.metadata.len() as u64, + www_authenticate = self.www_authenticate.as_deref(), + trace_id = trace_id.as_deref(), + "app error constructed" + ); + } + } + } + /// Create a new [`Error`] with a kind and message. /// /// This is equivalent to [`Error::with`], provided for API symmetry and to @@ -104,17 +234,9 @@ impl Error { /// intent. #[must_use] pub fn with(kind: AppErrorKind, msg: impl Into>) -> Self { - Self { - code: AppCode::from(kind), - kind, - message: Some(msg.into()), - metadata: Metadata::new(), - edit_policy: MessageEditPolicy::Preserve, - retry: None, - www_authenticate: None, - source: None, - backtrace: None - } + let err = Self::new_raw(kind, Some(msg.into())); + err.emit_telemetry(); + err } /// Create a message-less error with the given kind. @@ -122,23 +244,16 @@ impl Error { /// Useful when the kind alone conveys sufficient information to the client. #[must_use] pub fn bare(kind: AppErrorKind) -> Self { - Self { - code: AppCode::from(kind), - kind, - message: None, - metadata: Metadata::new(), - edit_policy: MessageEditPolicy::Preserve, - retry: None, - www_authenticate: None, - source: None, - backtrace: None - } + let err = Self::new_raw(kind, None); + err.emit_telemetry(); + err } /// Override the machine-readable [`AppCode`]. #[must_use] pub fn with_code(mut self, code: AppCode) -> Self { self.code = code; + self.mark_dirty(); self } @@ -150,6 +265,7 @@ impl Error { self.retry = Some(RetryAdvice { after_seconds: secs }); + self.mark_dirty(); self } @@ -157,6 +273,7 @@ impl Error { #[must_use] pub fn with_www_authenticate(mut self, value: impl Into) -> Self { self.www_authenticate = Some(value.into()); + self.mark_dirty(); self } @@ -164,6 +281,7 @@ impl Error { #[must_use] pub fn with_field(mut self, field: Field) -> Self { self.metadata.insert(field); + self.mark_dirty(); self } @@ -171,6 +289,7 @@ impl Error { #[must_use] pub fn with_fields(mut self, fields: impl IntoIterator) -> Self { self.metadata.extend(fields); + self.mark_dirty(); self } @@ -178,6 +297,7 @@ impl Error { #[must_use] pub fn with_metadata(mut self, metadata: Metadata) -> Self { self.metadata = metadata; + self.mark_dirty(); self } @@ -185,6 +305,7 @@ impl Error { #[must_use] pub fn redactable(mut self) -> Self { self.edit_policy = MessageEditPolicy::Redact; + self.mark_dirty(); self } @@ -192,13 +313,15 @@ impl Error { #[must_use] pub fn with_source(mut self, source: impl StdError + Send + Sync + 'static) -> Self { self.source = Some(Box::new(source)); + self.mark_dirty(); self } /// Attach a captured backtrace. #[must_use] pub fn with_backtrace(mut self, backtrace: Backtrace) -> Self { - self.backtrace = Some(backtrace); + self.set_backtrace_slot(backtrace); + self.mark_dirty(); self } @@ -208,10 +331,11 @@ impl Error { &self.metadata } - /// Borrow the backtrace if present. + /// Borrow the backtrace, capturing it lazily when the `backtrace` feature + /// is enabled. #[must_use] pub fn backtrace(&self) -> Option<&Backtrace> { - self.backtrace.as_ref() + self.capture_backtrace() } /// Borrow the source if present. @@ -229,26 +353,43 @@ impl Error { } } - /// Log the error once at the boundary with stable fields. + /// Emit telemetry (`tracing` event, metrics counter, backtrace capture). /// - /// Emits a `tracing::error!` with `kind`, `code`, optional `message` and - /// metadata length. No internals or sources are leaked. + /// Downstream code can call this to guarantee telemetry after mutating the + /// error. It is automatically invoked by constructors and conversions. pub fn log(&self) { - match &self.message { - Some(m) => error!( - kind = ?self.kind, - code = %self.code, - message = %m, - metadata_len = self.metadata.len() - ), - None => error!( - kind = ?self.kind, - code = %self.code, - metadata_len = self.metadata.len() - ) - } + self.emit_telemetry(); } } /// Backwards-compatible export using the historical name. pub use Error as AppError; + +#[cfg(any(feature = "metrics", feature = "tracing"))] +fn kind_label(kind: AppErrorKind) -> &'static str { + match kind { + AppErrorKind::NotFound => "NotFound", + AppErrorKind::Validation => "Validation", + AppErrorKind::Conflict => "Conflict", + AppErrorKind::Unauthorized => "Unauthorized", + AppErrorKind::Forbidden => "Forbidden", + AppErrorKind::NotImplemented => "NotImplemented", + AppErrorKind::Internal => "Internal", + AppErrorKind::BadRequest => "BadRequest", + AppErrorKind::TelegramAuth => "TelegramAuth", + AppErrorKind::InvalidJwt => "InvalidJwt", + AppErrorKind::Database => "Database", + AppErrorKind::Service => "Service", + AppErrorKind::Config => "Config", + AppErrorKind::Turnkey => "Turnkey", + AppErrorKind::Timeout => "Timeout", + AppErrorKind::Network => "Network", + AppErrorKind::RateLimited => "RateLimited", + AppErrorKind::DependencyUnavailable => "DependencyUnavailable", + AppErrorKind::Serialization => "Serialization", + AppErrorKind::Deserialization => "Deserialization", + AppErrorKind::ExternalApi => "ExternalApi", + AppErrorKind::Queue => "Queue", + AppErrorKind::Cache => "Cache" + } +} diff --git a/src/app_error/tests.rs b/src/app_error/tests.rs index b0cfa9a..df607ca 100644 --- a/src/app_error/tests.rs +++ b/src/app_error/tests.rs @@ -201,6 +201,215 @@ fn log_uses_kind_and_code() { err.log(); } +#[cfg(feature = "tracing")] +#[test] +fn telemetry_emits_single_tracing_event_with_trace_id() { + use std::{ + fmt, + sync::{Arc, Mutex} + }; + + use tracing::{ + Dispatch, Event, Subscriber, dispatcher, + field::{Field, Visit} + }; + use tracing_subscriber::{ + Registry, + layer::{Context, Layer, SubscriberExt} + }; + + #[derive(Default, Clone)] + struct RecordedEvent { + trace_id: Option, + code: Option, + category: Option + } + + struct RecordingLayer { + events: Arc>> + } + + impl Layer for RecordingLayer + where + S: Subscriber + { + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + if event.metadata().target() != "masterror::error" { + return; + } + + let mut record = RecordedEvent::default(); + event.record(&mut EventVisitor { + record: &mut record + }); + self.events.lock().expect("events lock").push(record); + } + } + + struct EventVisitor<'a> { + record: &'a mut RecordedEvent + } + + impl<'a> Visit for EventVisitor<'a> { + fn record_debug(&mut self, field: &Field, value: &dyn fmt::Debug) { + let normalized = normalize_debug(value); + match field.name() { + "trace_id" => self.record.trace_id = Some(normalized), + "code" => self.record.code = Some(normalized), + "category" => self.record.category = Some(normalized), + _ => {} + } + } + } + + fn normalize_debug(value: &dyn fmt::Debug) -> String { + let mut rendered = format!("{value:?}"); + while let Some(stripped) = rendered + .strip_prefix("Some(") + .and_then(|s| s.strip_suffix(')')) + { + rendered = stripped.to_owned(); + } + rendered.trim_matches('"').to_owned() + } + + let events = Arc::new(Mutex::new(Vec::new())); + let layer = RecordingLayer { + events: events.clone() + }; + let subscriber = Registry::default().with(layer); + let dispatch = Dispatch::new(subscriber); + + dispatcher::with_default(&dispatch, || { + log_mdc::insert("trace_id", "trace-123"); + let err = AppError::internal("boom"); + err.log(); + log_mdc::remove("trace_id"); + + let events = events.lock().expect("events lock"); + assert_eq!(events.len(), 1, "expected exactly one tracing event"); + + let event = &events[0]; + assert_eq!(event.code.as_deref(), Some(AppCode::Internal.as_str())); + assert_eq!(event.category.as_deref(), Some("Internal")); + assert!( + event + .trace_id + .as_deref() + .is_some_and(|value| value.contains("trace-123")) + ); + }); +} + +#[cfg(feature = "metrics")] +#[test] +fn metrics_counter_is_incremented_once() { + use std::{ + collections::HashMap, + sync::{Arc, Mutex} + }; + + use metrics::{ + Counter, CounterFn, Gauge, Histogram, Key, KeyName, Metadata, Recorder, SharedString, Unit + }; + + #[derive(Clone)] + struct MetricsCounterHandle { + name: String, + labels: Vec<(String, String)>, + counts: Arc), u64>>> + } + + impl CounterFn for MetricsCounterHandle { + fn increment(&self, value: u64) { + let mut map = self.counts.lock().expect("counter map"); + *map.entry((self.name.clone(), self.labels.clone())) + .or_default() += value; + } + + fn absolute(&self, value: u64) { + let mut map = self.counts.lock().expect("counter map"); + map.insert((self.name.clone(), self.labels.clone()), value); + } + } + + struct CountingRecorder { + counts: Arc), u64>>> + } + + impl Recorder for CountingRecorder { + fn describe_counter( + &self, + _key: KeyName, + _unit: Option, + _description: SharedString + ) { + } + + fn describe_gauge(&self, _key: KeyName, _unit: Option, _description: SharedString) {} + + fn describe_histogram( + &self, + _key: KeyName, + _unit: Option, + _description: SharedString + ) { + } + + fn register_counter(&self, key: &Key, _metadata: &Metadata<'_>) -> Counter { + let labels = key + .labels() + .map(|label| (label.key().to_owned(), label.value().to_owned())) + .collect::>(); + Counter::from_arc(Arc::new(MetricsCounterHandle { + name: key.name().to_owned(), + labels, + counts: self.counts.clone() + })) + } + + fn register_gauge(&self, _key: &Key, _metadata: &Metadata<'_>) -> Gauge { + Gauge::noop() + } + + fn register_histogram(&self, _key: &Key, _metadata: &Metadata<'_>) -> Histogram { + Histogram::noop() + } + } + + use std::sync::OnceLock; + + static RECORDER_COUNTS: OnceLock), u64>>>> = + OnceLock::new(); + + let counts = RECORDER_COUNTS + .get_or_init(|| { + let counts = Arc::new(Mutex::new(HashMap::new())); + metrics::set_global_recorder(CountingRecorder { + counts: counts.clone() + }) + .expect("install recorder"); + counts + }) + .clone(); + + counts.lock().expect("counter map").clear(); + + let err = AppError::forbidden("denied"); + err.log(); + + let key = ( + "error_total".to_owned(), + vec![ + ("code".to_owned(), AppCode::Forbidden.as_str().to_owned()), + ("category".to_owned(), "Forbidden".to_owned()), + ] + ); + + let counts = counts.lock().expect("counter map"); + assert_eq!(counts.get(&key).copied(), Some(1)); +} + #[test] fn result_alias_is_generic() { // The alias intentionally preserves the full AppError payload size. diff --git a/src/convert/actix.rs b/src/convert/actix.rs index 268e60a..e91edc9 100644 --- a/src/convert/actix.rs +++ b/src/convert/actix.rs @@ -85,6 +85,7 @@ impl ResponseError for AppError { /// Produce JSON body with `ErrorResponse`. Does not leak sources. fn error_response(&self) -> HttpResponse { + self.emit_telemetry(); let body = ErrorResponse::from(self); let mut builder = HttpResponse::build(self.status_code()); if let Some(retry) = body.retry { diff --git a/src/convert/axum.rs b/src/convert/axum.rs index 5c65a07..6abcf1b 100644 --- a/src/convert/axum.rs +++ b/src/convert/axum.rs @@ -9,7 +9,8 @@ //! Err(...)` or directly `return AppError::...(...)` and get a JSON error //! body (when the `serde_json` feature is enabled) or an empty body //! otherwise. -//! - Logs each error once at the HTTP boundary using `tracing::error`. +//! - Flushes [`AppError`] telemetry at the HTTP boundary (tracing event, +//! metrics counter, lazy backtrace). //! //! ## Wire payload //! @@ -44,7 +45,6 @@ use axum::{ http::StatusCode, response::{IntoResponse, Response} }; -use tracing::error; use crate::AppError; #[cfg(feature = "serde_json")] @@ -64,20 +64,14 @@ impl AppError { impl IntoResponse for AppError { fn into_response(self) -> Response { - let status = self.http_status(); - - // Log once at the boundary with stable fields. - error!( - status = status.as_u16(), - kind = ?self.kind, - msg = self.message.as_deref().unwrap_or(""), - "AppError -> HTTP response" - ); + let err = self; + err.emit_telemetry(); + let status = err.http_status(); #[cfg(feature = "serde_json")] { // Build the stable wire contract (includes `code`). - let body: ErrorResponse = self.into(); + let body: ErrorResponse = err.into(); return body.into_response(); } diff --git a/src/response/axum_impl.rs b/src/response/axum_impl.rs index 2dacae7..70b2bf8 100644 --- a/src/response/axum_impl.rs +++ b/src/response/axum_impl.rs @@ -42,8 +42,9 @@ impl IntoResponse for ErrorResponse { /// Convert `AppError` into the stable wire model and reuse its `IntoResponse`. impl IntoResponse for AppError { fn into_response(self) -> Response { - // Use the canonical mapping defined in `From<&AppError> for ErrorResponse` - let wire: ErrorResponse = (&self).into(); + let err = self; + err.emit_telemetry(); + let wire: ErrorResponse = err.into(); wire.into_response() } }