From 6f7c045d6f8fac9f985642f14b6480f9b6df5893 Mon Sep 17 00:00:00 2001 From: limityan Date: Wed, 13 May 2026 15:38:44 +0800 Subject: [PATCH] feat(logging): add backend diagnostic log redactor --- src/crates/core/src/service/mod.rs | 2 +- src/crates/services-core/Cargo.toml | 1 + .../services-core/src/diagnostics/mod.rs | 5 + .../src/diagnostics/redaction.rs | 177 ++++++++++++++++++ src/crates/services-core/src/lib.rs | 1 + .../tests/diagnostic_log_redaction.rs | 68 +++++++ 6 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 src/crates/services-core/src/diagnostics/mod.rs create mode 100644 src/crates/services-core/src/diagnostics/redaction.rs create mode 100644 src/crates/services-core/tests/diagnostic_log_redaction.rs diff --git a/src/crates/core/src/service/mod.rs b/src/crates/core/src/service/mod.rs index b19da6b7d..98657b44f 100644 --- a/src/crates/core/src/service/mod.rs +++ b/src/crates/core/src/service/mod.rs @@ -32,7 +32,7 @@ pub use terminal_core as terminal; // Re-export main components. pub use announcement::{AnnouncementCard, AnnouncementScheduler, AnnouncementSchedulerRef}; -pub use bitfun_services_core::{diff, system}; +pub use bitfun_services_core::{diagnostics, diff, system}; pub use bitfun_services_integrations::file_watch; pub use bootstrap::reset_workspace_persona_files_to_default; pub use config::{ConfigManager, ConfigProvider, ConfigService}; diff --git a/src/crates/services-core/Cargo.toml b/src/crates/services-core/Cargo.toml index 7961cda58..2c8da44ca 100644 --- a/src/crates/services-core/Cargo.toml +++ b/src/crates/services-core/Cargo.toml @@ -19,6 +19,7 @@ thiserror = { workspace = true } log = { workspace = true } which = { workspace = true } similar = { workspace = true } +regex = { workspace = true } [target.'cfg(windows)'.dependencies] win32job = { workspace = true } diff --git a/src/crates/services-core/src/diagnostics/mod.rs b/src/crates/services-core/src/diagnostics/mod.rs new file mode 100644 index 000000000..38479bbde --- /dev/null +++ b/src/crates/services-core/src/diagnostics/mod.rs @@ -0,0 +1,5 @@ +pub mod redaction; + +pub use redaction::{ + redact_diagnostic_log_text, redact_diagnostic_log_text_with_report, RedactedDiagnosticLog, +}; diff --git a/src/crates/services-core/src/diagnostics/redaction.rs b/src/crates/services-core/src/diagnostics/redaction.rs new file mode 100644 index 000000000..c63a409e3 --- /dev/null +++ b/src/crates/services-core/src/diagnostics/redaction.rs @@ -0,0 +1,177 @@ +use regex::{Captures, Regex}; +use std::sync::OnceLock; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RedactedDiagnosticLog { + /// Redacted diagnostic log text. + pub text: String, + /// Number of replacement operations applied while redacting the text. + pub redaction_count: usize, +} + +/// Redact sensitive values from diagnostic log text and return the text only. +/// +/// This function is intentionally line-oriented so callers can use the same +/// project-level redaction rules for large local log exports without parsing +/// the entire log as one structured payload. +pub fn redact_diagnostic_log_text(input: &str) -> String { + redact_diagnostic_log_text_with_report(input).text +} + +/// Redact sensitive values from diagnostic log text and include a replacement count. +pub fn redact_diagnostic_log_text_with_report(input: &str) -> RedactedDiagnosticLog { + let mut text = String::with_capacity(input.len()); + let mut redaction_count = 0; + + for segment in input.split_inclusive('\n') { + let (redacted, count) = redact_diagnostic_log_segment(segment); + text.push_str(&redacted); + redaction_count += count; + } + + RedactedDiagnosticLog { + text, + redaction_count, + } +} + +fn redact_diagnostic_log_segment(segment: &str) -> (String, usize) { + let (segment, quoted_count) = redact_quoted_sensitive_values(segment); + let (segment, bearer_count) = redact_bearer_tokens(&segment); + let (segment, token_count) = redact_secret_tokens(&segment); + let (segment, bare_count) = redact_bare_sensitive_values(&segment); + let (segment, path_count) = redact_absolute_paths(&segment); + + ( + segment, + quoted_count + bearer_count + token_count + bare_count + path_count, + ) +} + +fn redact_quoted_sensitive_values(input: &str) -> (String, usize) { + let mut count = 0; + let output = sensitive_quoted_value_re() + .replace_all(input, |captures: &Captures<'_>| { + count += 1; + let prefix = captures.name("prefix").map_or("", |m| m.as_str()); + let value = captures.name("value").map_or("", |m| m.as_str()); + let quote = value.chars().next().unwrap_or('"'); + let value_chars = value.chars().count().saturating_sub(2); + format!("{prefix}{quote}{quote}") + }) + .into_owned(); + + (output, count) +} + +fn redact_bare_sensitive_values(input: &str) -> (String, usize) { + let mut count = 0; + let output = sensitive_bare_value_re() + .replace_all(input, |captures: &Captures<'_>| { + let value = captures.name("value").map_or("", |m| m.as_str()); + if value.starts_with('{') || value.starts_with('[') { + return captures.get(0).map_or("", |m| m.as_str()).to_string(); + } + + count += 1; + let prefix = captures.name("prefix").map_or("", |m| m.as_str()); + format!("{prefix}", value.chars().count()) + }) + .into_owned(); + + (output, count) +} + +fn redact_bearer_tokens(input: &str) -> (String, usize) { + replace_all_count(input, bearer_token_re(), "Bearer ") +} + +fn redact_secret_tokens(input: &str) -> (String, usize) { + replace_all_count(input, secret_token_re(), "") +} + +fn redact_absolute_paths(input: &str) -> (String, usize) { + let (input, windows_escaped_count) = + replace_all_count(input, windows_escaped_path_re(), ""); + let (input, windows_count) = replace_all_count(&input, windows_path_re(), ""); + let (input, unix_count) = replace_all_count(&input, unix_path_re(), ""); + + (input, windows_escaped_count + windows_count + unix_count) +} + +fn replace_all_count(input: &str, regex: &Regex, replacement: &str) -> (String, usize) { + let mut count = 0; + let output = regex + .replace_all(input, |_captures: &Captures<'_>| { + count += 1; + replacement.to_string() + }) + .into_owned(); + + (output, count) +} + +fn sensitive_quoted_value_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(&format!( + r#"(?i)(?P(?:"(?:{keys})"|'(?:{keys})'|(?:{keys}))\s*[:=]\s*(?:Some\()?)(?P"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*')"#, + keys = sensitive_key_pattern(), + )) + .expect("sensitive quoted value regex must compile") + }) +} + +fn sensitive_bare_value_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(&format!( + r#"(?i)(?P\b(?:{keys})\b\s*[:=]\s*)(?P[^\s,}})]+)"#, + keys = sensitive_key_pattern(), + )) + .expect("sensitive bare value regex must compile") + }) +} + +fn bearer_token_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]{8,}"#) + .expect("bearer token regex must compile") + }) +} + +fn secret_token_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"\b(?:sk|sk-ant|sk-proj|ghp|gho|github_pat)_[A-Za-z0-9_\-]{8,}|\bsk-[A-Za-z0-9_\-]{8,}"#) + .expect("secret token regex must compile") + }) +} + +fn windows_escaped_path_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"\b[A-Za-z]:\\\\[^"'\r\n,}\]]+"#) + .expect("escaped Windows path regex must compile") + }) +} + +fn windows_path_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"\b[A-Za-z]:\\[^"'\r\n,}\]]+"#).expect("Windows path regex must compile") + }) +} + +fn unix_path_re() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"\b/(?:Users|home|workspace|tmp|var|private)/[^"'\s,}\]]+"#) + .expect("Unix path regex must compile") + }) +} + +fn sensitive_key_pattern() -> &'static str { + r#"api[_-]?key|apikey|authorization|x-api-key|token|access[_-]?token|refresh[_-]?token|session[_-]?key|password|secret|prompt|system_prompt|original_prompt|suggested_prompt|copyable_prompt|content|text|partial_json|arguments|payload|raw_message|rawMessage|raw_error|outer_html|text_content|command|path|file|files|data"# +} diff --git a/src/crates/services-core/src/lib.rs b/src/crates/services-core/src/lib.rs index 81e91dd1c..7bac737df 100644 --- a/src/crates/services-core/src/lib.rs +++ b/src/crates/services-core/src/lib.rs @@ -3,6 +3,7 @@ //! This crate owns platform-agnostic service building blocks that can be //! tested without compiling the full BitFun product runtime. +pub mod diagnostics; pub mod diff; pub mod process_manager; pub mod session; diff --git a/src/crates/services-core/tests/diagnostic_log_redaction.rs b/src/crates/services-core/tests/diagnostic_log_redaction.rs new file mode 100644 index 000000000..e911e917f --- /dev/null +++ b/src/crates/services-core/tests/diagnostic_log_redaction.rs @@ -0,0 +1,68 @@ +use bitfun_services_core::diagnostics::{ + redact_diagnostic_log_text, redact_diagnostic_log_text_with_report, +}; + +#[test] +fn redacts_model_payloads_without_removing_routing_metadata() { + let input = r#"[2026-05-13T10:38:21.837][DEBUG][ai::openai] Request body: +{ + "model": "kimi-k2.6", + "api_key": "sk-secret-token", + "messages": [ + {"role": "user", "content": "please review C:\\Users\\limit\\private\\file.rs"} + ], + "tools": [{"name": "Read"}], + "tool_call": {"name": "Read", "arguments": "{\"path\":\"C:\\Users\\limit\\private\\file.rs\"}"} +} +Authorization: Bearer live-provider-token +"#; + + let report = redact_diagnostic_log_text_with_report(input); + + assert!(report.redaction_count >= 4); + assert!(report.text.contains("[ai::openai]")); + assert!(report.text.contains("\"model\": \"kimi-k2.6\"")); + assert!(report.text.contains("\"role\": \"user\"")); + assert!(report.text.contains("\"name\": \"Read\"")); + assert!(!report.text.contains("sk-secret-token")); + assert!(!report.text.contains("live-provider-token")); + assert!(!report.text.contains("please review")); + assert!(!report.text.contains("C:\\Users\\limit")); + assert!(report.text.contains("", tool_call: Some(UnifiedToolCall { tool_call_index: Some(0), id: Some("toolu_1"), name: Some("Read"), arguments: Some("{\"path\":\"D:\\workspace\\secret\\main.rs\"}"), arguments_is_snapshot: false }), usage: None, finish_reason: None, provider_metadata: "" } +"#; + + let redacted = redact_diagnostic_log_text(input); + + assert!(redacted.contains("Anthropic SSE")); + assert!(redacted.contains("event: \"content_block_delta\"")); + assert!(redacted.contains("tool_call_index: Some(0)")); + assert!(redacted.contains("name: Some(\"Read\")")); + assert!(!redacted.contains("-US secret argument")); + assert!(!redacted.contains("private answer")); + assert!(!redacted.contains("hidden reasoning")); + assert!(!redacted.contains("D:\\workspace\\secret")); +} + +#[test] +fn handles_large_log_text_without_dropping_lines() { + let mut input = String::new(); + for index in 0..2_000 { + input.push_str(&format!( + "[TRACE][webview] event={index} payload={{\"prompt\":\"secret prompt {index}\",\"path\":\"C:\\\\Users\\\\limit\\\\secret-{index}.txt\"}}\n" + )); + } + + let report = redact_diagnostic_log_text_with_report(&input); + + assert_eq!(report.text.lines().count(), 2_000); + assert!(report.redaction_count >= 4_000); + assert!(report.text.contains("[TRACE][webview] event=1999")); + assert!(!report.text.contains("secret prompt")); + assert!(!report.text.contains("C:\\\\Users\\\\limit")); +}