diff --git a/src/crates/agent-stream/src/lib.rs b/src/crates/agent-stream/src/lib.rs index 5256f844b..2e4682eb9 100644 --- a/src/crates/agent-stream/src/lib.rs +++ b/src/crates/agent-stream/src/lib.rs @@ -609,6 +609,7 @@ impl StreamProcessor { total_token_count: response_usage.total_token_count, reasoning_token_count: response_usage.reasoning_token_count, cached_content_token_count: response_usage.cached_content_token_count, + cache_creation_token_count: response_usage.cache_creation_token_count, }); debug!( "Received token usage stats: input={}, output={}, total={}", diff --git a/src/crates/ai-adapters/src/client/response_aggregator.rs b/src/crates/ai-adapters/src/client/response_aggregator.rs index 6ae2e8b19..24ac03c50 100644 --- a/src/crates/ai-adapters/src/client/response_aggregator.rs +++ b/src/crates/ai-adapters/src/client/response_aggregator.rs @@ -166,5 +166,6 @@ pub(crate) fn unified_usage_to_gemini_usage( total_token_count: usage.total_token_count, reasoning_token_count: usage.reasoning_token_count, cached_content_token_count: usage.cached_content_token_count, + cache_creation_token_count: usage.cache_creation_token_count, } } diff --git a/src/crates/ai-adapters/src/stream/types/responses.rs b/src/crates/ai-adapters/src/stream/types/responses.rs index a58da6366..7a093e3e5 100644 --- a/src/crates/ai-adapters/src/stream/types/responses.rs +++ b/src/crates/ai-adapters/src/stream/types/responses.rs @@ -129,9 +129,33 @@ pub fn parse_responses_output_item( #[cfg(test)] mod tests { - use super::{parse_responses_output_item, ResponsesCompleted, ResponsesStreamEvent}; + use super::{parse_responses_output_item, ResponsesCompleted, ResponsesStreamEvent, ResponsesUsage}; + use crate::stream::types::unified::UnifiedTokenUsage; use serde_json::json; + #[test] + fn responses_cached_tokens_maps_to_cached_content() { + let raw = r#"{ + "input_tokens": 200, + "input_tokens_details": { "cached_tokens": 80 }, + "output_tokens": 40, + "total_tokens": 240 + }"#; + let usage: ResponsesUsage = serde_json::from_str(raw).expect("valid responses usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, Some(80)); + assert_eq!(unified.cache_creation_token_count, None); + } + + #[test] + fn responses_absent_cache_stays_none() { + let raw = r#"{ "input_tokens": 200, "output_tokens": 40, "total_tokens": 240 }"#; + let usage: ResponsesUsage = serde_json::from_str(raw).expect("valid responses usage"); + let unified: UnifiedTokenUsage = usage.into(); + assert_eq!(unified.cached_content_token_count, None); + assert_eq!(unified.cache_creation_token_count, None); + } + #[test] fn parses_output_text_message_item() { let response = parse_responses_output_item( diff --git a/src/crates/ai-adapters/src/types/ai.rs b/src/crates/ai-adapters/src/types/ai.rs index 72a76d9a5..ee8982611 100644 --- a/src/crates/ai-adapters/src/types/ai.rs +++ b/src/crates/ai-adapters/src/types/ai.rs @@ -32,6 +32,9 @@ pub struct GeminiUsage { #[serde(rename = "cachedContentTokenCount")] #[serde(skip_serializing_if = "Option::is_none")] pub cached_content_token_count: Option, + #[serde(rename = "cacheCreationTokenCount")] + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cache_creation_token_count: Option, } /// Structured message codes for localized connection test messaging. @@ -69,3 +72,40 @@ pub struct RemoteModelInfo { #[serde(skip_serializing_if = "Option::is_none")] pub display_name: Option, } + +#[cfg(test)] +mod tests { + use super::GeminiUsage; + + #[test] + fn gemini_usage_roundtrips_cache_creation_field() { + let usage = GeminiUsage { + prompt_token_count: 100, + candidates_token_count: 20, + total_token_count: 120, + reasoning_token_count: None, + cached_content_token_count: Some(30), + cache_creation_token_count: Some(20), + }; + let json = serde_json::to_string(&usage).expect("serialize"); + assert!(json.contains("\"cacheCreationTokenCount\":20")); + + let parsed: GeminiUsage = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(parsed.cache_creation_token_count, Some(20)); + } + + #[test] + fn gemini_usage_legacy_payload_parses_with_new_field_absent() { + // Records persisted before this plan don't have cacheCreationTokenCount; + // they must still parse, with the new field defaulting to None. + let raw = r#"{ + "promptTokenCount": 10, + "candidatesTokenCount": 5, + "totalTokenCount": 15, + "cachedContentTokenCount": 3 + }"#; + let parsed: GeminiUsage = serde_json::from_str(raw).expect("legacy payload"); + assert_eq!(parsed.cached_content_token_count, Some(3)); + assert_eq!(parsed.cache_creation_token_count, None); + } +} diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index d5083f895..b357c913e 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -1356,6 +1356,13 @@ fn token_details_from_usage( serde_json::json!(cached_tokens), ); } + // Cache writes (Anthropic only at the moment). Disjoint from reads. + if let Some(creation_tokens) = usage.cache_creation_token_count { + details.insert( + "cacheCreationTokenCount".to_string(), + serde_json::json!(creation_tokens), + ); + } (!details.is_empty()).then_some(serde_json::Value::Object(details)) } @@ -1973,4 +1980,50 @@ mod tests { use super::detect_placeholder_patterns; assert!(detect_placeholder_patterns("").is_none()); } + + #[test] + fn token_details_emits_both_cache_keys_when_present() { + use crate::util::types::ai::GeminiUsage; + let usage = GeminiUsage { + prompt_token_count: 100, + candidates_token_count: 20, + total_token_count: 120, + reasoning_token_count: None, + cached_content_token_count: Some(30), + cache_creation_token_count: Some(20), + }; + let details = super::token_details_from_usage(&usage).expect("details"); + assert_eq!(details.get("cachedContentTokenCount").and_then(|v| v.as_u64()), Some(30)); + assert_eq!(details.get("cacheCreationTokenCount").and_then(|v| v.as_u64()), Some(20)); + } + + #[test] + fn token_details_emits_only_read_when_creation_absent() { + use crate::util::types::ai::GeminiUsage; + let usage = GeminiUsage { + prompt_token_count: 100, + candidates_token_count: 20, + total_token_count: 120, + reasoning_token_count: None, + cached_content_token_count: Some(30), + cache_creation_token_count: None, + }; + let details = super::token_details_from_usage(&usage).expect("details"); + assert_eq!(details.get("cachedContentTokenCount").and_then(|v| v.as_u64()), Some(30)); + assert!(details.get("cacheCreationTokenCount").is_none()); + } + + #[test] + fn token_details_is_none_when_no_cache_info() { + use crate::util::types::ai::GeminiUsage; + let usage = GeminiUsage { + prompt_token_count: 100, + candidates_token_count: 20, + total_token_count: 120, + reasoning_token_count: None, + cached_content_token_count: None, + cache_creation_token_count: None, + }; + assert!(super::token_details_from_usage(&usage).is_none()); + } }