Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/crates/core/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub use terminal_core as terminal;

// Re-export main components.
pub use announcement::{AnnouncementCard, AnnouncementScheduler, AnnouncementSchedulerRef};
pub use bitfun_services_core::{diff, system};
pub use bitfun_services_core::{diagnostics, diff, system};
pub use bitfun_services_integrations::file_watch;
pub use bootstrap::reset_workspace_persona_files_to_default;
pub use config::{ConfigManager, ConfigProvider, ConfigService};
Expand Down
1 change: 1 addition & 0 deletions src/crates/services-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ thiserror = { workspace = true }
log = { workspace = true }
which = { workspace = true }
similar = { workspace = true }
regex = { workspace = true }

[target.'cfg(windows)'.dependencies]
win32job = { workspace = true }
5 changes: 5 additions & 0 deletions src/crates/services-core/src/diagnostics/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod redaction;

pub use redaction::{
redact_diagnostic_log_text, redact_diagnostic_log_text_with_report, RedactedDiagnosticLog,
};
177 changes: 177 additions & 0 deletions src/crates/services-core/src/diagnostics/redaction.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
use regex::{Captures, Regex};
use std::sync::OnceLock;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RedactedDiagnosticLog {
/// Redacted diagnostic log text.
pub text: String,
/// Number of replacement operations applied while redacting the text.
pub redaction_count: usize,
}

/// Redact sensitive values from diagnostic log text and return the text only.
///
/// This function is intentionally line-oriented so callers can use the same
/// project-level redaction rules for large local log exports without parsing
/// the entire log as one structured payload.
pub fn redact_diagnostic_log_text(input: &str) -> String {
redact_diagnostic_log_text_with_report(input).text
}

/// Redact sensitive values from diagnostic log text and include a replacement count.
pub fn redact_diagnostic_log_text_with_report(input: &str) -> RedactedDiagnosticLog {
let mut text = String::with_capacity(input.len());
let mut redaction_count = 0;

for segment in input.split_inclusive('\n') {
let (redacted, count) = redact_diagnostic_log_segment(segment);
text.push_str(&redacted);
redaction_count += count;
}

RedactedDiagnosticLog {
text,
redaction_count,
}
}

fn redact_diagnostic_log_segment(segment: &str) -> (String, usize) {
let (segment, quoted_count) = redact_quoted_sensitive_values(segment);
let (segment, bearer_count) = redact_bearer_tokens(&segment);
let (segment, token_count) = redact_secret_tokens(&segment);
let (segment, bare_count) = redact_bare_sensitive_values(&segment);
let (segment, path_count) = redact_absolute_paths(&segment);

(
segment,
quoted_count + bearer_count + token_count + bare_count + path_count,
)
}

fn redact_quoted_sensitive_values(input: &str) -> (String, usize) {
let mut count = 0;
let output = sensitive_quoted_value_re()
.replace_all(input, |captures: &Captures<'_>| {
count += 1;
let prefix = captures.name("prefix").map_or("", |m| m.as_str());
let value = captures.name("value").map_or("", |m| m.as_str());
let quote = value.chars().next().unwrap_or('"');
let value_chars = value.chars().count().saturating_sub(2);
format!("{prefix}{quote}<redacted chars={value_chars}>{quote}")
})
.into_owned();

(output, count)
}

fn redact_bare_sensitive_values(input: &str) -> (String, usize) {
let mut count = 0;
let output = sensitive_bare_value_re()
.replace_all(input, |captures: &Captures<'_>| {
let value = captures.name("value").map_or("", |m| m.as_str());
if value.starts_with('{') || value.starts_with('[') {
return captures.get(0).map_or("", |m| m.as_str()).to_string();
}

count += 1;
let prefix = captures.name("prefix").map_or("", |m| m.as_str());
format!("{prefix}<redacted chars={}>", value.chars().count())
})
.into_owned();

(output, count)
}

fn redact_bearer_tokens(input: &str) -> (String, usize) {
replace_all_count(input, bearer_token_re(), "Bearer <redacted>")
}

fn redact_secret_tokens(input: &str) -> (String, usize) {
replace_all_count(input, secret_token_re(), "<redacted token>")
}

fn redact_absolute_paths(input: &str) -> (String, usize) {
let (input, windows_escaped_count) =
replace_all_count(input, windows_escaped_path_re(), "<redacted path>");
let (input, windows_count) = replace_all_count(&input, windows_path_re(), "<redacted path>");
let (input, unix_count) = replace_all_count(&input, unix_path_re(), "<redacted path>");

(input, windows_escaped_count + windows_count + unix_count)
}

fn replace_all_count(input: &str, regex: &Regex, replacement: &str) -> (String, usize) {
let mut count = 0;
let output = regex
.replace_all(input, |_captures: &Captures<'_>| {
count += 1;
replacement.to_string()
})
.into_owned();

(output, count)
}

fn sensitive_quoted_value_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(&format!(
r#"(?i)(?P<prefix>(?:"(?:{keys})"|'(?:{keys})'|(?:{keys}))\s*[:=]\s*(?:Some\()?)(?P<value>"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*')"#,
keys = sensitive_key_pattern(),
))
.expect("sensitive quoted value regex must compile")
})
}

fn sensitive_bare_value_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(&format!(
r#"(?i)(?P<prefix>\b(?:{keys})\b\s*[:=]\s*)(?P<value>[^\s,}})]+)"#,
keys = sensitive_key_pattern(),
))
.expect("sensitive bare value regex must compile")
})
}

fn bearer_token_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]{8,}"#)
.expect("bearer token regex must compile")
})
}

fn secret_token_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"\b(?:sk|sk-ant|sk-proj|ghp|gho|github_pat)_[A-Za-z0-9_\-]{8,}|\bsk-[A-Za-z0-9_\-]{8,}"#)
.expect("secret token regex must compile")
})
}

fn windows_escaped_path_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"\b[A-Za-z]:\\\\[^"'\r\n,}\]]+"#)
.expect("escaped Windows path regex must compile")
})
}

fn windows_path_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"\b[A-Za-z]:\\[^"'\r\n,}\]]+"#).expect("Windows path regex must compile")
})
}

fn unix_path_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"\b/(?:Users|home|workspace|tmp|var|private)/[^"'\s,}\]]+"#)
.expect("Unix path regex must compile")
})
}

fn sensitive_key_pattern() -> &'static str {
r#"api[_-]?key|apikey|authorization|x-api-key|token|access[_-]?token|refresh[_-]?token|session[_-]?key|password|secret|prompt|system_prompt|original_prompt|suggested_prompt|copyable_prompt|content|text|partial_json|arguments|payload|raw_message|rawMessage|raw_error|outer_html|text_content|command|path|file|files|data"#
}
1 change: 1 addition & 0 deletions src/crates/services-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! This crate owns platform-agnostic service building blocks that can be
//! tested without compiling the full BitFun product runtime.

pub mod diagnostics;
pub mod diff;
pub mod process_manager;
pub mod session;
Expand Down
68 changes: 68 additions & 0 deletions src/crates/services-core/tests/diagnostic_log_redaction.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use bitfun_services_core::diagnostics::{
redact_diagnostic_log_text, redact_diagnostic_log_text_with_report,
};

#[test]
fn redacts_model_payloads_without_removing_routing_metadata() {
let input = r#"[2026-05-13T10:38:21.837][DEBUG][ai::openai] Request body:
{
"model": "kimi-k2.6",
"api_key": "sk-secret-token",
"messages": [
{"role": "user", "content": "please review C:\\Users\\limit\\private\\file.rs"}
],
"tools": [{"name": "Read"}],
"tool_call": {"name": "Read", "arguments": "{\"path\":\"C:\\Users\\limit\\private\\file.rs\"}"}
}
Authorization: Bearer live-provider-token
"#;

let report = redact_diagnostic_log_text_with_report(input);

assert!(report.redaction_count >= 4);
assert!(report.text.contains("[ai::openai]"));
assert!(report.text.contains("\"model\": \"kimi-k2.6\""));
assert!(report.text.contains("\"role\": \"user\""));
assert!(report.text.contains("\"name\": \"Read\""));
assert!(!report.text.contains("sk-secret-token"));
assert!(!report.text.contains("live-provider-token"));
assert!(!report.text.contains("please review"));
assert!(!report.text.contains("C:\\Users\\limit"));
assert!(report.text.contains("<redacted"));
}

#[test]
fn redacts_anthropic_stream_payloads_but_keeps_event_shape() {
let input = r#"[TRACE][ai::anthropic_stream_response] Anthropic SSE: Event { event: "content_block_delta", data: "{\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"-US secret argument\"}}", id: "", retry: None }
[TRACE][ai::anthropic_stream_response] Anthropic unified response: UnifiedResponse { text: Some("private answer"), reasoning_content: Some("hidden reasoning"), thinking_signature: "<omitted>", tool_call: Some(UnifiedToolCall { tool_call_index: Some(0), id: Some("toolu_1"), name: Some("Read"), arguments: Some("{\"path\":\"D:\\workspace\\secret\\main.rs\"}"), arguments_is_snapshot: false }), usage: None, finish_reason: None, provider_metadata: "<omitted>" }
"#;

let redacted = redact_diagnostic_log_text(input);

assert!(redacted.contains("Anthropic SSE"));
assert!(redacted.contains("event: \"content_block_delta\""));
assert!(redacted.contains("tool_call_index: Some(0)"));
assert!(redacted.contains("name: Some(\"Read\")"));
assert!(!redacted.contains("-US secret argument"));
assert!(!redacted.contains("private answer"));
assert!(!redacted.contains("hidden reasoning"));
assert!(!redacted.contains("D:\\workspace\\secret"));
}

#[test]
fn handles_large_log_text_without_dropping_lines() {
let mut input = String::new();
for index in 0..2_000 {
input.push_str(&format!(
"[TRACE][webview] event={index} payload={{\"prompt\":\"secret prompt {index}\",\"path\":\"C:\\\\Users\\\\limit\\\\secret-{index}.txt\"}}\n"
));
}

let report = redact_diagnostic_log_text_with_report(&input);

assert_eq!(report.text.lines().count(), 2_000);
assert!(report.redaction_count >= 4_000);
assert!(report.text.contains("[TRACE][webview] event=1999"));
assert!(!report.text.contains("secret prompt"));
assert!(!report.text.contains("C:\\\\Users\\\\limit"));
}
Loading