From 0c61c79bc052079d2aeb1a8e85d5d0b6a090a5b3 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Tue, 28 Oct 2025 16:36:04 -0400 Subject: [PATCH 1/9] feat(llma): send number of web searches --- posthog/ai/anthropic/anthropic_converter.py | 40 ++++++++ posthog/ai/gemini/gemini_converter.py | 54 +++++++++- posthog/ai/openai/openai_converter.py | 90 ++++++++++++++++ posthog/ai/types.py | 1 + posthog/ai/utils.py | 30 ++++++ posthog/test/ai/anthropic/test_anthropic.py | 45 +++++++- posthog/test/ai/gemini/test_gemini.py | 61 +++++++++++ posthog/test/ai/openai/test_openai.py | 107 ++++++++++++++++++++ 8 files changed, 426 insertions(+), 2 deletions(-) diff --git a/posthog/ai/anthropic/anthropic_converter.py b/posthog/ai/anthropic/anthropic_converter.py index 7d2e615f..663fc00d 100644 --- a/posthog/ai/anthropic/anthropic_converter.py +++ b/posthog/ai/anthropic/anthropic_converter.py @@ -163,6 +163,32 @@ def format_anthropic_streaming_content( return formatted +def extract_anthropic_web_search_count(response: Any) -> int: + """ + Extract web search count from Anthropic response. + + Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests. + + Args: + response: The response from Anthropic API + + Returns: + Number of web search requests (0 if none) + """ + if not hasattr(response, "usage"): + return 0 + + if not hasattr(response.usage, "server_tool_use"): + return 0 + + server_tool_use = response.usage.server_tool_use + + if hasattr(server_tool_use, "web_search_requests"): + return int(getattr(server_tool_use, "web_search_requests", 0)) + + return 0 + + def extract_anthropic_usage_from_response(response: Any) -> TokenUsage: """ Extract usage from a full Anthropic response (non-streaming). @@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage: if cache_creation and cache_creation > 0: result["cache_creation_input_tokens"] = cache_creation + web_search_count = extract_anthropic_web_search_count(response) + if web_search_count > 0: + result["web_search_count"] = web_search_count + return result @@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage: if hasattr(event, "usage") and event.usage: usage["output_tokens"] = getattr(event.usage, "output_tokens", 0) + # Extract web search count from usage + if hasattr(event.usage, "server_tool_use"): + server_tool_use = event.usage.server_tool_use + if hasattr(server_tool_use, "web_search_requests"): + web_search_count = int( + getattr(server_tool_use, "web_search_requests", 0) + ) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + return usage diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py index 7a8cd0ca..3d509598 100644 --- a/posthog/ai/gemini/gemini_converter.py +++ b/posthog/ai/gemini/gemini_converter.py @@ -338,6 +338,46 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]: return [_format_object_message(contents)] +def extract_gemini_web_search_count(response: Any) -> int: + """ + Extract web search count from Gemini response. + + Gemini bills per request that uses grounding, not per query. + Returns 1 if grounding_metadata is present, 0 otherwise. + + Args: + response: The response from Gemini API + + Returns: + 1 if web search/grounding was used, 0 otherwise + """ + + # Check for grounding_metadata in candidates + if hasattr(response, "candidates"): + for candidate in response.candidates: + if ( + hasattr(candidate, "grounding_metadata") + and candidate.grounding_metadata + ): + return 1 + + # Also check for google_search or grounding in function call names + if hasattr(candidate, "content") and candidate.content: + if hasattr(candidate.content, "parts") and candidate.content.parts: + for part in candidate.content.parts: + if hasattr(part, "function_call") and part.function_call: + function_name = getattr( + part.function_call, "name", "" + ).lower() + if ( + "google_search" in function_name + or "grounding" in function_name + ): + return 1 + + return 0 + + def _extract_usage_from_metadata(metadata: Any) -> TokenUsage: """ Common logic to extract usage from Gemini metadata. @@ -382,7 +422,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage: if not hasattr(response, "usage_metadata") or not response.usage_metadata: return TokenUsage(input_tokens=0, output_tokens=0) - return _extract_usage_from_metadata(response.usage_metadata) + usage = _extract_usage_from_metadata(response.usage_metadata) + + # Add web search count if present + web_search_count = extract_gemini_web_search_count(response) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + + return usage def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage: @@ -404,6 +451,11 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage: # Use the shared helper to extract usage usage = _extract_usage_from_metadata(chunk.usage_metadata) + # Add web search count if present + web_search_count = extract_gemini_web_search_count(chunk) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + return usage diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py index 778478ff..69491a48 100644 --- a/posthog/ai/openai/openai_converter.py +++ b/posthog/ai/openai/openai_converter.py @@ -255,6 +255,81 @@ def format_openai_streaming_content( return formatted +def extract_openai_web_search_count(response: Any) -> int: + """ + Extract web search count from OpenAI response. + + Uses a two-tier detection strategy: + 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API) + 2. Priority 2 (binary detection): Check for various web search indicators: + - Root-level citations, search_results, or usage.search_context_size (Perplexity) + - Annotations with type "url_citation" in choices/output + + Args: + response: The response from OpenAI API + + Returns: + Number of web search requests (exact count or binary 1/0) + """ + + # Priority 1: Check for exact count in Responses API output + if hasattr(response, "output"): + web_search_count = 0 + for item in response.output: + if hasattr(item, "type") and item.type == "web_search_call": + web_search_count += 1 + + if web_search_count > 0: + return web_search_count + + # Priority 2: Binary detection (returns 1 or 0) + + # Check root-level indicators (Perplexity) + if hasattr(response, "citations"): + citations = getattr(response, "citations") + if citations and len(citations) > 0: + return 1 + + if hasattr(response, "search_results"): + search_results = getattr(response, "search_results") + if search_results and len(search_results) > 0: + return 1 + + if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"): + if response.usage.search_context_size: + return 1 + + # Check for url_citation annotations in choices (Chat Completions) + if hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message") and hasattr(choice.message, "annotations"): + annotations = choice.message.annotations + if annotations: + for annotation in annotations: + if ( + hasattr(annotation, "type") + and annotation.type == "url_citation" + ): + return 1 + + # Check for url_citation annotations in output (Responses API) + if hasattr(response, "output"): + for item in response.output: + if hasattr(item, "content") and isinstance(item.content, list): + for content_item in item.content: + if hasattr(content_item, "annotations"): + annotations = content_item.annotations + if annotations: + for annotation in annotations: + if ( + hasattr(annotation, "type") + and annotation.type == "url_citation" + ): + return 1 + + return 0 + + def extract_openai_usage_from_response(response: Any) -> TokenUsage: """ Extract usage statistics from a full OpenAI response (non-streaming). @@ -312,6 +387,10 @@ def extract_openai_usage_from_response(response: Any) -> TokenUsage: if reasoning_tokens > 0: result["reasoning_tokens"] = reasoning_tokens + web_search_count = extract_openai_web_search_count(response) + if web_search_count > 0: + result["web_search_count"] = web_search_count + return result @@ -358,6 +437,11 @@ def extract_openai_usage_from_chunk( chunk.usage.completion_tokens_details.reasoning_tokens ) + # Extract web search count from the chunk (available in final streaming chunks) + web_search_count = extract_openai_web_search_count(chunk) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + elif provider_type == "responses": # For Responses API, usage is only in chunk.response.usage for completed events if hasattr(chunk, "type") and chunk.type == "response.completed": @@ -386,6 +470,12 @@ def extract_openai_usage_from_chunk( response_usage.output_tokens_details.reasoning_tokens ) + # Extract web search count from the complete response + if hasattr(chunk, "response"): + web_search_count = extract_openai_web_search_count(chunk.response) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + return usage diff --git a/posthog/ai/types.py b/posthog/ai/types.py index d90a0df8..c549cadc 100644 --- a/posthog/ai/types.py +++ b/posthog/ai/types.py @@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False): cache_read_input_tokens: Optional[int] cache_creation_input_tokens: Optional[int] reasoning_tokens: Optional[int] + web_search_count: Optional[int] class ProviderResponse(TypedDict, total=False): diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index d6afd1db..03b37600 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -53,6 +53,12 @@ def merge_usage_stats( if source_reasoning is not None: current = target.get("reasoning_tokens") or 0 target["reasoning_tokens"] = current + source_reasoning + + source_web_search = source.get("web_search_count") + if source_web_search is not None: + current = target.get("web_search_count") or 0 + target["web_search_count"] = current + source_web_search + elif mode == "cumulative": # Replace with latest values (already cumulative) if source.get("input_tokens") is not None: @@ -67,6 +73,9 @@ def merge_usage_stats( ] if source.get("reasoning_tokens") is not None: target["reasoning_tokens"] = source["reasoning_tokens"] + if source.get("web_search_count") is not None: + target["web_search_count"] = source["web_search_count"] + else: raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'") @@ -311,6 +320,10 @@ def call_llm_and_track_usage( if reasoning is not None and reasoning > 0: event_properties["$ai_reasoning_tokens"] = reasoning + web_search_count = usage.get("web_search_count") + if web_search_count is not None and web_search_count > 0: + event_properties["$ai_web_search_count"] = web_search_count + if posthog_distinct_id is None: event_properties["$process_person_profile"] = False @@ -414,6 +427,14 @@ async def call_llm_and_track_usage_async( if cache_creation is not None and cache_creation > 0: event_properties["$ai_cache_creation_input_tokens"] = cache_creation + reasoning = usage.get("reasoning_tokens") + if reasoning is not None and reasoning > 0: + event_properties["$ai_reasoning_tokens"] = reasoning + + web_search_count = usage.get("web_search_count") + if web_search_count is not None and web_search_count > 0: + event_properties["$ai_web_search_count"] = web_search_count + if posthog_distinct_id is None: event_properties["$process_person_profile"] = False @@ -535,6 +556,15 @@ def capture_streaming_event( if value is not None and isinstance(value, int) and value > 0: event_properties[f"$ai_{field}"] = value + # Add web search count if present (all providers) + web_search_count = event_data["usage_stats"].get("web_search_count") + if ( + web_search_count is not None + and isinstance(web_search_count, int) + and web_search_count > 0 + ): + event_properties["$ai_web_search_count"] = web_search_count + # Handle provider-specific fields if ( event_data["provider"] == "openai" diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py index 5f65a99e..3cd77e0c 100644 --- a/posthog/test/ai/anthropic/test_anthropic.py +++ b/posthog/test/ai/anthropic/test_anthropic.py @@ -1,4 +1,3 @@ -import os from unittest.mock import patch import pytest @@ -1034,3 +1033,47 @@ async def run_test(): assert props["$ai_output_tokens"] == 25 assert props["$ai_cache_read_input_tokens"] == 5 assert props["$ai_cache_creation_input_tokens"] == 0 + + +def test_web_search_count(mock_client): + """Test that web search count is properly tracked from Anthropic responses.""" + + # Create a mock usage with web search + class MockServerToolUse: + def __init__(self): + self.web_search_requests = 3 + + class MockUsageWithWebSearch: + def __init__(self): + self.input_tokens = 100 + self.output_tokens = 50 + self.cache_read_input_tokens = 0 + self.cache_creation_input_tokens = 0 + self.server_tool_use = MockServerToolUse() + + class MockResponseWithWebSearch: + def __init__(self): + self.content = [MockContent(text="Search results show...")] + self.model = "claude-3-opus-20240229" + self.usage = MockUsageWithWebSearch() + + mock_response = MockResponseWithWebSearch() + + with patch("anthropic.resources.Messages.create", return_value=mock_response): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + response = client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Search for recent news"}], + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured + assert props["$ai_web_search_count"] == 3 + assert props["$ai_input_tokens"] == 100 + assert props["$ai_output_tokens"] == 50 diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py index e7571bba..954ed06d 100644 --- a/posthog/test/ai/gemini/test_gemini.py +++ b/posthog/test/ai/gemini/test_gemini.py @@ -837,3 +837,64 @@ def test_streaming_cache_and_reasoning_tokens(mock_client, mock_google_genai_cli assert props["$ai_output_tokens"] == 10 assert props["$ai_cache_read_input_tokens"] == 30 assert props["$ai_reasoning_tokens"] == 5 + + +def test_web_search_grounding(mock_client, mock_google_genai_client): + """Test web search detection via grounding_metadata.""" + + # Create mock response with grounding metadata + mock_response = MagicMock() + + # Mock usage metadata + mock_usage = MagicMock() + mock_usage.prompt_token_count = 60 + mock_usage.candidates_token_count = 40 + mock_usage.cached_content_token_count = 0 + mock_usage.thoughts_token_count = 0 + mock_response.usage_metadata = mock_usage + + # Mock grounding metadata + mock_grounding_chunk = MagicMock() + mock_grounding_chunk.uri = "https://example.com" + + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] + + # Mock text part + mock_text_part = MagicMock() + mock_text_part.text = "According to search results..." + type(mock_text_part).text = mock_text_part.text + + # Mock content with parts + mock_content = MagicMock() + mock_content.parts = [mock_text_part] + + # Mock candidate with grounding metadata + mock_candidate = MagicMock() + mock_candidate.content = mock_content + mock_candidate.grounding_metadata = mock_grounding_metadata + type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata + + mock_response.candidates = [mock_candidate] + mock_response.text = "According to search results..." + + # Mock the generate_content method + mock_google_genai_client.models.generate_content.return_value = mock_response + + client = Client(api_key="test-key", posthog_client=mock_client) + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="What's the latest news?", + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is detected (binary for grounding) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 60 + assert props["$ai_output_tokens"] == 40 diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py index 0e380b07..95f10336 100644 --- a/posthog/test/ai/openai/test_openai.py +++ b/posthog/test/ai/openai/test_openai.py @@ -1339,3 +1339,110 @@ def test_tool_definition(mock_client, mock_openai_response): assert isinstance(props["$ai_latency"], float) # Verify that tools are captured in the $ai_tools property assert props["$ai_tools"] == tools + + +def test_web_search_perplexity_style(mock_client): + """Test web search detection via annotations (Perplexity-style).""" + + class MockAnnotation: + def __init__(self): + self.type = "url_citation" + + class MockMessage: + def __init__(self): + self.role = "assistant" + self.content = "Based on recent search results..." + self.annotations = [MockAnnotation(), MockAnnotation()] + + class MockChoice: + def __init__(self): + self.message = MockMessage() + + class MockUsage: + def __init__(self): + self.prompt_tokens = 50 + self.completion_tokens = 30 + + class MockResponseWithAnnotations: + def __init__(self): + self.choices = [MockChoice()] + self.usage = MockUsage() + self.model = "gpt-4-turbo" + + mock_response = MockResponseWithAnnotations() + + with patch("openai.resources.chat.Completions.create", return_value=mock_response): + client = OpenAI(api_key="test-key", posthog_client=mock_client) + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=[{"role": "user", "content": "What's happening in tech?"}], + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is detected (binary detection) + assert props["$ai_web_search_count"] == 1 + + +def test_web_search_responses_api(mock_client): + """Test exact web search count from Responses API.""" + + class MockWebSearchItem: + def __init__(self): + self.type = "web_search_call" + + class MockMessageItem: + def __init__(self): + self.type = "message" + self.role = "assistant" + self.content = "Here are the results..." + + class MockUsage: + def __init__(self): + self.input_tokens = 100 + self.output_tokens = 75 + + class MockResponsesAPIResponse: + def __init__(self): + self.output = [MockWebSearchItem(), MockWebSearchItem(), MockMessageItem()] + self.usage = MockUsage() + self.model = "gpt-4o" + + mock_response = MockResponsesAPIResponse() + + with patch( + "openai.resources.responses.Responses.create", return_value=mock_response + ): + # Manually call the tracking since we're testing the converter logic + from posthog.ai.utils import call_llm_and_track_usage + + def mock_create_call(**kwargs): + return mock_response + + result = call_llm_and_track_usage( + posthog_distinct_id="test-id", + ph_client=mock_client, + provider="openai", + posthog_trace_id=None, + posthog_properties=None, + posthog_privacy_mode=False, + posthog_groups=None, + base_url="https://api.openai.com/v1", + call_method=mock_create_call, + model="gpt-4o", + messages=[{"role": "user", "content": "Search query"}], + ) + + assert result == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify exact web search count + assert props["$ai_web_search_count"] == 2 From 5fd75c7d67c6b8a8e22d5be5ec76bcfaac227ad6 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Thu, 30 Oct 2025 16:21:26 -0400 Subject: [PATCH 2/9] feat(llma): add more tests --- posthog/ai/anthropic/anthropic_converter.py | 2 +- posthog/ai/openai/openai_converter.py | 2 + posthog/test/ai/anthropic/test_anthropic.py | 186 +++++++++++++++++ posthog/test/ai/gemini/test_gemini.py | 216 ++++++++++++++++++++ posthog/test/ai/openai/test_openai.py | 194 ++++++++++++++++++ 5 files changed, 599 insertions(+), 1 deletion(-) diff --git a/posthog/ai/anthropic/anthropic_converter.py b/posthog/ai/anthropic/anthropic_converter.py index 663fc00d..24f20b9f 100644 --- a/posthog/ai/anthropic/anthropic_converter.py +++ b/posthog/ai/anthropic/anthropic_converter.py @@ -184,7 +184,7 @@ def extract_anthropic_web_search_count(response: Any) -> int: server_tool_use = response.usage.server_tool_use if hasattr(server_tool_use, "web_search_requests"): - return int(getattr(server_tool_use, "web_search_requests", 0)) + return max(0, int(getattr(server_tool_use, "web_search_requests", 0))) return 0 diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py index 69491a48..320c0e51 100644 --- a/posthog/ai/openai/openai_converter.py +++ b/posthog/ai/openai/openai_converter.py @@ -279,6 +279,8 @@ def extract_openai_web_search_count(response: Any) -> int: if hasattr(item, "type") and item.type == "web_search_call": web_search_count += 1 + web_search_count = max(0, web_search_count) + if web_search_count > 0: return web_search_count diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py index 3cd77e0c..4617f801 100644 --- a/posthog/test/ai/anthropic/test_anthropic.py +++ b/posthog/test/ai/anthropic/test_anthropic.py @@ -1077,3 +1077,189 @@ def __init__(self): assert props["$ai_web_search_count"] == 3 assert props["$ai_input_tokens"] == 100 assert props["$ai_output_tokens"] == 50 + + +@pytest.fixture +def mock_anthropic_stream_with_web_search(): + """Mock stream events for web search.""" + + class MockServerToolUse: + def __init__(self): + self.web_search_requests = 2 + + class MockMessage: + def __init__(self): + self.usage = MockUsage( + input_tokens=50, + cache_creation_input_tokens=0, + cache_read_input_tokens=5, + ) + + def stream_generator(): + # Message start with usage + event = MockStreamEvent("message_start") + event.message = MockMessage() + yield event + + # Text block start + event = MockStreamEvent("content_block_start") + event.content_block = MockContentBlock("text") + event.index = 0 + yield event + + # Text delta + event = MockStreamEvent("content_block_delta") + event.delta = MockDelta(text="Here are the search results...") + event.index = 0 + yield event + + # Text block stop + event = MockStreamEvent("content_block_stop") + event.index = 0 + yield event + + # Message delta with final usage including web search + event = MockStreamEvent("message_delta") + usage = MockUsage(output_tokens=25) + usage.server_tool_use = MockServerToolUse() + event.usage = usage + yield event + + # Message stop + event = MockStreamEvent("message_stop") + yield event + + return stream_generator() + + +def test_streaming_with_web_search(mock_client, mock_anthropic_stream_with_web_search): + """Test that web search count is properly captured in streaming mode.""" + with patch( + "anthropic.resources.Messages.create", + return_value=mock_anthropic_stream_with_web_search, + ): + client = Anthropic(api_key="test-key", posthog_client=mock_client) + response = client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Search for recent news"}], + stream=True, + posthog_distinct_id="test-id", + ) + + # Consume the stream - this triggers the finally block synchronously + list(response) + + # Capture happens synchronously when generator is exhausted + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured + assert props["$ai_web_search_count"] == 2 + assert props["$ai_input_tokens"] == 50 + assert props["$ai_output_tokens"] == 25 + + +def test_async_with_web_search(mock_client): + """Test that web search count is properly tracked in async non-streaming mode.""" + import asyncio + + # Create a mock usage with web search + class MockServerToolUse: + def __init__(self): + self.web_search_requests = 3 + + class MockUsageWithWebSearch: + def __init__(self): + self.input_tokens = 100 + self.output_tokens = 50 + self.cache_read_input_tokens = 0 + self.cache_creation_input_tokens = 0 + self.server_tool_use = MockServerToolUse() + + class MockResponseWithWebSearch: + def __init__(self): + self.content = [MockContent(text="Search results show...")] + self.model = "claude-3-opus-20240229" + self.usage = MockUsageWithWebSearch() + + mock_response = MockResponseWithWebSearch() + + async def mock_async_create(**kwargs): + return mock_response + + with patch( + "anthropic.resources.AsyncMessages.create", + side_effect=mock_async_create, + ): + async_client = AsyncAnthropic(api_key="test-key", posthog_client=mock_client) + + async def run_test(): + response = await async_client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Search for recent news"}], + posthog_distinct_id="test-id", + ) + return response + + # asyncio.run() waits for all async operations to complete + response = asyncio.run(run_test()) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured + assert props["$ai_web_search_count"] == 3 + assert props["$ai_input_tokens"] == 100 + assert props["$ai_output_tokens"] == 50 + + +def test_async_streaming_with_web_search( + mock_client, mock_anthropic_stream_with_web_search +): + """Test that web search count is properly captured in async streaming mode.""" + import asyncio + + async def mock_async_generator(): + # Convert regular generator to async generator + for event in mock_anthropic_stream_with_web_search: + yield event + + async def mock_async_create(**kwargs): + # Return the async generator (to be awaited by the implementation) + return mock_async_generator() + + with patch( + "anthropic.resources.AsyncMessages.create", + side_effect=mock_async_create, + ): + async_client = AsyncAnthropic(api_key="test-key", posthog_client=mock_client) + + async def run_test(): + response = await async_client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": "Search for recent news"}], + stream=True, + posthog_distinct_id="test-id", + ) + + # Consume the async stream + [event async for event in response] + + # asyncio.run() waits for all async operations to complete + asyncio.run(run_test()) + + # Capture completes before asyncio.run() returns + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured + assert props["$ai_web_search_count"] == 2 + assert props["$ai_input_tokens"] == 50 + assert props["$ai_output_tokens"] == 25 diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py index 954ed06d..581bdcdf 100644 --- a/posthog/test/ai/gemini/test_gemini.py +++ b/posthog/test/ai/gemini/test_gemini.py @@ -898,3 +898,219 @@ def test_web_search_grounding(mock_client, mock_google_genai_client): assert props["$ai_web_search_count"] == 1 assert props["$ai_input_tokens"] == 60 assert props["$ai_output_tokens"] == 40 + + +def test_streaming_with_web_search(mock_client, mock_google_genai_client): + """Test that web search count is properly captured in streaming mode.""" + + def mock_streaming_response(): + # Create chunk 1 with grounding metadata + mock_chunk1 = MagicMock() + mock_chunk1.text = "According to " + + mock_usage1 = MagicMock() + mock_usage1.prompt_token_count = 30 + mock_usage1.candidates_token_count = 5 + mock_usage1.cached_content_token_count = 0 + mock_usage1.thoughts_token_count = 0 + mock_chunk1.usage_metadata = mock_usage1 + + # Add grounding metadata to first chunk + mock_grounding_chunk = MagicMock() + mock_grounding_chunk.uri = "https://example.com" + + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] + + mock_candidate1 = MagicMock() + mock_candidate1.grounding_metadata = mock_grounding_metadata + type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata + + mock_chunk1.candidates = [mock_candidate1] + + # Create chunk 2 + mock_chunk2 = MagicMock() + mock_chunk2.text = "search results..." + + mock_usage2 = MagicMock() + mock_usage2.prompt_token_count = 30 + mock_usage2.candidates_token_count = 15 + mock_usage2.cached_content_token_count = 0 + mock_usage2.thoughts_token_count = 0 + mock_chunk2.usage_metadata = mock_usage2 + + mock_candidate2 = MagicMock() + mock_chunk2.candidates = [mock_candidate2] + + yield mock_chunk1 + yield mock_chunk2 + + # Mock the generate_content_stream method + mock_google_genai_client.models.generate_content_stream.return_value = ( + mock_streaming_response() + ) + + client = Client(api_key="test-key", posthog_client=mock_client) + + response = client.models.generate_content_stream( + model="gemini-2.5-flash", + contents="What's the latest news?", + posthog_distinct_id="test-id", + ) + + chunks = list(response) + assert len(chunks) == 2 + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is detected (binary for grounding) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 30 + assert props["$ai_output_tokens"] == 15 + + +@pytest.mark.asyncio +async def test_async_with_web_search(mock_client, mock_google_genai_client): + """Test that web search count is properly tracked in async non-streaming mode.""" + + # Create mock response with grounding metadata + mock_response = MagicMock() + + # Mock usage metadata + mock_usage = MagicMock() + mock_usage.prompt_token_count = 60 + mock_usage.candidates_token_count = 40 + mock_usage.cached_content_token_count = 0 + mock_usage.thoughts_token_count = 0 + mock_response.usage_metadata = mock_usage + + # Mock grounding metadata + mock_grounding_chunk = MagicMock() + mock_grounding_chunk.uri = "https://example.com" + + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] + + # Mock text part + mock_text_part = MagicMock() + mock_text_part.text = "According to search results..." + type(mock_text_part).text = mock_text_part.text + + # Mock content with parts + mock_content = MagicMock() + mock_content.parts = [mock_text_part] + + # Mock candidate with grounding metadata + mock_candidate = MagicMock() + mock_candidate.content = mock_content + mock_candidate.grounding_metadata = mock_grounding_metadata + type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata + + mock_response.candidates = [mock_candidate] + mock_response.text = "According to search results..." + + # Mock the async generate_content method + async def mock_async_generate_content(*args, **kwargs): + return mock_response + + mock_google_genai_client.models.generate_content_async = mock_async_generate_content + + client = Client(api_key="test-key", posthog_client=mock_client) + + async def run_test(): + response = await client.models.generate_content_async( + model="gemini-2.5-flash", + contents="What's the latest news?", + posthog_distinct_id="test-id", + ) + return response + + response = await run_test() + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is detected (binary for grounding) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 60 + assert props["$ai_output_tokens"] == 40 + + +@pytest.mark.asyncio +async def test_async_streaming_with_web_search(mock_client, mock_google_genai_client): + """Test that web search count is properly captured in async streaming mode.""" + + async def mock_async_streaming_response(): + # Create chunk 1 with grounding metadata + mock_chunk1 = MagicMock() + mock_chunk1.text = "According to " + + mock_usage1 = MagicMock() + mock_usage1.prompt_token_count = 30 + mock_usage1.candidates_token_count = 5 + mock_usage1.cached_content_token_count = 0 + mock_usage1.thoughts_token_count = 0 + mock_chunk1.usage_metadata = mock_usage1 + + # Add grounding metadata to first chunk + mock_grounding_chunk = MagicMock() + mock_grounding_chunk.uri = "https://example.com" + + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] + + mock_candidate1 = MagicMock() + mock_candidate1.grounding_metadata = mock_grounding_metadata + type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata + + mock_chunk1.candidates = [mock_candidate1] + + # Create chunk 2 + mock_chunk2 = MagicMock() + mock_chunk2.text = "search results..." + + mock_usage2 = MagicMock() + mock_usage2.prompt_token_count = 30 + mock_usage2.candidates_token_count = 15 + mock_usage2.cached_content_token_count = 0 + mock_usage2.thoughts_token_count = 0 + mock_chunk2.usage_metadata = mock_usage2 + + mock_candidate2 = MagicMock() + mock_chunk2.candidates = [mock_candidate2] + + yield mock_chunk1 + yield mock_chunk2 + + # Mock the async generate_content_stream method + mock_google_genai_client.models.generate_content_stream_async = ( + mock_async_streaming_response + ) + + client = Client(api_key="test-key", posthog_client=mock_client) + + response = await client.models.generate_content_stream_async( + model="gemini-2.5-flash", + contents="What's the latest news?", + posthog_distinct_id="test-id", + ) + + chunks = [] + async for chunk in response: + chunks.append(chunk) + + assert len(chunks) == 2 + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is detected (binary for grounding) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 30 + assert props["$ai_output_tokens"] == 15 diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py index 95f10336..a281f8a9 100644 --- a/posthog/test/ai/openai/test_openai.py +++ b/posthog/test/ai/openai/test_openai.py @@ -1446,3 +1446,197 @@ def mock_create_call(**kwargs): # Verify exact web search count assert props["$ai_web_search_count"] == 2 + + +@pytest.fixture +def streaming_web_search_chunks(): + """Streaming chunks with web search indicators (Perplexity-style).""" + return [ + ChatCompletionChunk( + id="chunk1", + model="gpt-4", + object="chat.completion.chunk", + created=1234567890, + choices=[ + ChoiceChunk( + index=0, + delta=ChoiceDelta( + role="assistant", + content="Based on my search, ", + ), + finish_reason=None, + ) + ], + ), + ChatCompletionChunk( + id="chunk2", + model="gpt-4", + object="chat.completion.chunk", + created=1234567891, + choices=[ + ChoiceChunk( + index=0, + delta=ChoiceDelta( + content="here are the latest news...", + ), + finish_reason=None, + ) + ], + ), + ChatCompletionChunk( + id="chunk3", + model="gpt-4", + object="chat.completion.chunk", + created=1234567892, + choices=[ + ChoiceChunk( + index=0, + delta=ChoiceDelta(), + finish_reason="stop", + ) + ], + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=15, + total_tokens=35, + ), + ), + ] + + +def test_streaming_with_web_search(mock_client, streaming_web_search_chunks): + """Test that web search count is properly captured in streaming mode.""" + + # Add citations attribute to the last chunk to indicate web search was used + streaming_web_search_chunks[-1].citations = ["https://example.com/news"] + + with patch("openai.resources.chat.completions.Completions.create") as mock_create: + mock_create.return_value = streaming_web_search_chunks + + client = OpenAI(api_key="test-key", posthog_client=mock_client) + response_generator = client.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Search for recent news"}], + stream=True, + posthog_distinct_id="test-id", + ) + + # Consume the generator to trigger the event capture + chunks = list(response_generator) + + # Verify the chunks were returned correctly + assert len(chunks) == 3 + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured (binary detection = 1) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 20 + assert props["$ai_output_tokens"] == 15 + + +@pytest.mark.asyncio +async def test_async_chat_with_web_search(mock_client): + """Test that web search count is properly tracked in async non-streaming mode.""" + + # Create mock response with citations (Perplexity-style) + mock_response = ChatCompletion( + id="chatcmpl-test", + model="gpt-4", + object="chat.completion", + created=1234567890, + choices=[ + Choice( + index=0, + message=ChatCompletionMessage( + role="assistant", + content="Here are the search results...", + ), + finish_reason="stop", + ) + ], + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=15, + total_tokens=35, + ), + ) + + # Add citations attribute to indicate web search + mock_response.citations = ["https://example.com/result1"] + + async def mock_create(self, **kwargs): + return mock_response + + with patch( + "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create + ): + client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client) + + response = await client.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Search for recent news"}], + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured (binary detection = 1) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 20 + assert props["$ai_output_tokens"] == 15 + + +@pytest.mark.asyncio +async def test_async_chat_streaming_with_web_search( + mock_client, streaming_web_search_chunks +): + """Test that web search count is properly captured in async streaming mode.""" + + # Add citations attribute to the last chunk to indicate web search was used + streaming_web_search_chunks[-1].citations = ["https://example.com/news"] + + captured_kwargs = {} + + async def mock_create(self, **kwargs): + captured_kwargs["kwargs"] = kwargs + + async def chunk_iterable(): + for chunk in streaming_web_search_chunks: + yield chunk + + return chunk_iterable() + + with patch( + "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create + ): + client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client) + + response_stream = await client.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Search for recent news"}], + stream=True, + posthog_distinct_id="test-id", + ) + + chunks = [] + async for chunk in response_stream: + chunks.append(chunk) + + # Verify the chunks were returned correctly + assert len(chunks) == 3 + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured (binary detection = 1) + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 20 + assert props["$ai_output_tokens"] == 15 From 312b9c8e8044d375fc6678647a7fd9616fe24dde Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Thu, 30 Oct 2025 16:23:04 -0400 Subject: [PATCH 3/9] chore(llma): bump version --- CHANGELOG.md | 4 ++++ posthog/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index efe00781..367530e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 6.8.0 - 2025-10-28 + +- feat(llma): send web search calls to be used for LLM cost calculations + # 6.7.11 - 2025-10-28 - feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain) diff --git a/posthog/version.py b/posthog/version.py index 1fe91ee0..c3dd9e0b 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "6.7.11" +VERSION = "6.8.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201 From 73ab99af4a866b73080db1a5375c17cdcc9cf254 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Fri, 31 Oct 2025 13:22:20 -0400 Subject: [PATCH 4/9] fix(llma): feedback --- posthog/ai/gemini/gemini_converter.py | 16 +++++++----- posthog/ai/openai/openai_async.py | 19 ++++++++++++++ posthog/ai/openai/openai_converter.py | 12 +++++---- posthog/test/ai/openai/test_openai.py | 36 +++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 11 deletions(-) diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py index 3d509598..4fc2b2ba 100644 --- a/posthog/ai/gemini/gemini_converter.py +++ b/posthog/ai/gemini/gemini_converter.py @@ -445,16 +445,20 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage: usage: TokenUsage = TokenUsage() + # Extract web search count from the chunk before checking for usage_metadata + # Web search indicators can appear on any chunk, not just those with usage data + web_search_count = extract_gemini_web_search_count(chunk) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata: return usage - # Use the shared helper to extract usage - usage = _extract_usage_from_metadata(chunk.usage_metadata) + usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata) - # Add web search count if present - web_search_count = extract_gemini_web_search_count(chunk) - if web_search_count > 0: - usage["web_search_count"] = web_search_count + # Merge the usage from metadata with any web search count we found + for key, value in usage_from_metadata.items(): + usage[key] = value return usage diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py index 54ded707..404895fc 100644 --- a/posthog/ai/openai/openai_async.py +++ b/posthog/ai/openai/openai_async.py @@ -213,6 +213,15 @@ async def _capture_streaming_event( **(posthog_properties or {}), } + # Add web search count if present + web_search_count = usage_stats.get("web_search_count") + if ( + web_search_count is not None + and isinstance(web_search_count, int) + and web_search_count > 0 + ): + event_properties["$ai_web_search_count"] = web_search_count + if available_tool_calls: event_properties["$ai_tools"] = available_tool_calls @@ -444,6 +453,16 @@ async def _capture_streaming_event( **(posthog_properties or {}), } + # Add web search count if present + web_search_count = usage_stats.get("web_search_count") + + if ( + web_search_count is not None + and isinstance(web_search_count, int) + and web_search_count > 0 + ): + event_properties["$ai_web_search_count"] = web_search_count + if available_tool_calls: event_properties["$ai_tools"] = available_tool_calls diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py index 320c0e51..76ee0162 100644 --- a/posthog/ai/openai/openai_converter.py +++ b/posthog/ai/openai/openai_converter.py @@ -415,6 +415,13 @@ def extract_openai_usage_from_chunk( usage: TokenUsage = TokenUsage() if provider_type == "chat": + # Extract web search count from the chunk before checking for usage + # Web search indicators (citations, annotations) can appear on any chunk, + # not just those with usage data + web_search_count = extract_openai_web_search_count(chunk) + if web_search_count > 0: + usage["web_search_count"] = web_search_count + if not hasattr(chunk, "usage") or not chunk.usage: return usage @@ -439,11 +446,6 @@ def extract_openai_usage_from_chunk( chunk.usage.completion_tokens_details.reasoning_tokens ) - # Extract web search count from the chunk (available in final streaming chunks) - web_search_count = extract_openai_web_search_count(chunk) - if web_search_count > 0: - usage["web_search_count"] = web_search_count - elif provider_type == "responses": # For Responses API, usage is only in chunk.response.usage for completed events if hasattr(chunk, "type") and chunk.type == "response.completed": diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py index a281f8a9..07d45753 100644 --- a/posthog/test/ai/openai/test_openai.py +++ b/posthog/test/ai/openai/test_openai.py @@ -1537,6 +1537,42 @@ def test_streaming_with_web_search(mock_client, streaming_web_search_chunks): assert props["$ai_output_tokens"] == 15 +def test_streaming_with_web_search_on_non_usage_chunk( + mock_client, streaming_web_search_chunks +): + """Test that web search count is captured even when citations appear on chunks without usage data.""" + + # Add citations attribute to the FIRST chunk (which has no usage data) + # This tests the fix for the bug where web search indicators on non-usage chunks were ignored + streaming_web_search_chunks[0].citations = ["https://example.com/news"] + + with patch("openai.resources.chat.completions.Completions.create") as mock_create: + mock_create.return_value = streaming_web_search_chunks + + client = OpenAI(api_key="test-key", posthog_client=mock_client) + response_generator = client.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Search for recent news"}], + stream=True, + posthog_distinct_id="test-id", + ) + + # Consume the generator to trigger the event capture + chunks = list(response_generator) + + # Verify the chunks were returned correctly + assert len(chunks) == 3 + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is captured even though citations were on first chunk + assert props["$ai_web_search_count"] == 1 + assert props["$ai_input_tokens"] == 20 + assert props["$ai_output_tokens"] == 15 + + @pytest.mark.asyncio async def test_async_chat_with_web_search(mock_client): """Test that web search count is properly tracked in async non-streaming mode.""" From 5c9b6fb49be3d26e800785bd33f3368c3f5325bf Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Mon, 3 Nov 2025 15:30:19 -0500 Subject: [PATCH 5/9] fix(llma): fix Gemini --- posthog/ai/gemini/gemini_converter.py | 19 +++- posthog/test/ai/gemini/test_gemini.py | 127 ++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 2 deletions(-) diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py index 4fc2b2ba..b7996fa8 100644 --- a/posthog/ai/gemini/gemini_converter.py +++ b/posthog/ai/gemini/gemini_converter.py @@ -343,7 +343,7 @@ def extract_gemini_web_search_count(response: Any) -> int: Extract web search count from Gemini response. Gemini bills per request that uses grounding, not per query. - Returns 1 if grounding_metadata is present, 0 otherwise. + Returns 1 if grounding_metadata is present with actual search data, 0 otherwise. Args: response: The response from Gemini API @@ -359,7 +359,21 @@ def extract_gemini_web_search_count(response: Any) -> int: hasattr(candidate, "grounding_metadata") and candidate.grounding_metadata ): - return 1 + grounding_metadata = candidate.grounding_metadata + + # Check if web_search_queries exists and is non-empty + if hasattr(grounding_metadata, "web_search_queries"): + queries = grounding_metadata.web_search_queries + + if queries is not None and len(queries) > 0: + return 1 + + # Check if grounding_chunks exists and is non-empty + if hasattr(grounding_metadata, "grounding_chunks"): + chunks = grounding_metadata.grounding_chunks + + if chunks is not None and len(chunks) > 0: + return 1 # Also check for google_search or grounding in function call names if hasattr(candidate, "content") and candidate.content: @@ -369,6 +383,7 @@ def extract_gemini_web_search_count(response: Any) -> int: function_name = getattr( part.function_call, "name", "" ).lower() + if ( "google_search" in function_name or "grounding" in function_name diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py index 581bdcdf..1379d4e2 100644 --- a/posthog/test/ai/gemini/test_gemini.py +++ b/posthog/test/ai/gemini/test_gemini.py @@ -1114,3 +1114,130 @@ async def mock_async_streaming_response(): assert props["$ai_web_search_count"] == 1 assert props["$ai_input_tokens"] == 30 assert props["$ai_output_tokens"] == 15 + + +def test_empty_grounding_metadata_no_web_search(mock_client, mock_google_genai_client): + """Test that empty grounding_metadata (all null fields) does not count as web search.""" + + # Create mock response with empty grounding metadata (all null fields) + mock_response = MagicMock() + + # Mock usage metadata + mock_usage = MagicMock() + mock_usage.prompt_token_count = 10 + mock_usage.candidates_token_count = 10 + mock_usage.cached_content_token_count = 0 + mock_usage.thoughts_token_count = 0 + mock_response.usage_metadata = mock_usage + + # Mock empty grounding metadata (all fields are None) + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.web_search_queries = None + mock_grounding_metadata.grounding_chunks = None + mock_grounding_metadata.grounding_supports = None + mock_grounding_metadata.retrieval_metadata = None + mock_grounding_metadata.retrieval_queries = None + mock_grounding_metadata.search_entry_point = None + + # Mock text part + mock_text_part = MagicMock() + mock_text_part.text = "Hey there! How can I help you today?" + type(mock_text_part).text = mock_text_part.text + + # Mock content with parts + mock_content = MagicMock() + mock_content.parts = [mock_text_part] + + # Mock candidate with empty grounding metadata + mock_candidate = MagicMock() + mock_candidate.content = mock_content + mock_candidate.grounding_metadata = mock_grounding_metadata + type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata + + mock_response.candidates = [mock_candidate] + mock_response.text = "Hey there! How can I help you today?" + + # Mock the generate_content method + mock_google_genai_client.models.generate_content.return_value = mock_response + + client = Client(api_key="test-key", posthog_client=mock_client) + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Hello", + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is 0 (not present in properties when 0) + assert "$ai_web_search_count" not in props + assert props["$ai_input_tokens"] == 10 + assert props["$ai_output_tokens"] == 10 + + +def test_empty_array_grounding_metadata_no_web_search( + mock_client, mock_google_genai_client +): + """Test that grounding_metadata with empty arrays does not count as web search.""" + + # Create mock response with grounding metadata having empty arrays + mock_response = MagicMock() + + # Mock usage metadata + mock_usage = MagicMock() + mock_usage.prompt_token_count = 15 + mock_usage.candidates_token_count = 12 + mock_usage.cached_content_token_count = 0 + mock_usage.thoughts_token_count = 0 + mock_response.usage_metadata = mock_usage + + # Mock grounding metadata with empty arrays + mock_grounding_metadata = MagicMock() + mock_grounding_metadata.web_search_queries = [] + mock_grounding_metadata.grounding_chunks = [] + mock_grounding_metadata.grounding_supports = [] + + # Mock text part + mock_text_part = MagicMock() + mock_text_part.text = "I can help with that." + type(mock_text_part).text = mock_text_part.text + + # Mock content with parts + mock_content = MagicMock() + mock_content.parts = [mock_text_part] + + # Mock candidate with grounding metadata containing empty arrays + mock_candidate = MagicMock() + mock_candidate.content = mock_content + mock_candidate.grounding_metadata = mock_grounding_metadata + type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata + + mock_response.candidates = [mock_candidate] + mock_response.text = "I can help with that." + + # Mock the generate_content method + mock_google_genai_client.models.generate_content.return_value = mock_response + + client = Client(api_key="test-key", posthog_client=mock_client) + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="What can you do?", + posthog_distinct_id="test-id", + ) + + assert response == mock_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify web search count is 0 (not present in properties when 0) + assert "$ai_web_search_count" not in props + assert props["$ai_input_tokens"] == 15 + assert props["$ai_output_tokens"] == 12 From e6e417c29dc31c6861d26e3f54be1293f3147341 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Mon, 3 Nov 2025 17:07:10 -0500 Subject: [PATCH 6/9] fix(llma): fix OpenAI's Chat Completions streaming --- posthog/ai/openai/openai_converter.py | 48 ++++++++++++++++++++++----- posthog/ai/utils.py | 2 +- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py index 76ee0162..e7b8ce5e 100644 --- a/posthog/ai/openai/openai_converter.py +++ b/posthog/ai/openai/openai_converter.py @@ -263,7 +263,7 @@ def extract_openai_web_search_count(response: Any) -> int: 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API) 2. Priority 2 (binary detection): Check for various web search indicators: - Root-level citations, search_results, or usage.search_context_size (Perplexity) - - Annotations with type "url_citation" in choices/output + - Annotations with type "url_citation" in choices/output (including delta for streaming) Args: response: The response from OpenAI API @@ -275,6 +275,7 @@ def extract_openai_web_search_count(response: Any) -> int: # Priority 1: Check for exact count in Responses API output if hasattr(response, "output"): web_search_count = 0 + for item in response.output: if hasattr(item, "type") and item.type == "web_search_call": web_search_count += 1 @@ -289,11 +290,13 @@ def extract_openai_web_search_count(response: Any) -> int: # Check root-level indicators (Perplexity) if hasattr(response, "citations"): citations = getattr(response, "citations") + if citations and len(citations) > 0: return 1 if hasattr(response, "search_results"): search_results = getattr(response, "search_results") + if search_results and len(search_results) > 0: return 1 @@ -304,14 +307,36 @@ def extract_openai_web_search_count(response: Any) -> int: # Check for url_citation annotations in choices (Chat Completions) if hasattr(response, "choices"): for choice in response.choices: + # Check message.annotations (non-streaming or final chunk) if hasattr(choice, "message") and hasattr(choice.message, "annotations"): annotations = choice.message.annotations + if annotations: for annotation in annotations: - if ( - hasattr(annotation, "type") - and annotation.type == "url_citation" - ): + # Support both dict and object formats + annotation_type = ( + annotation.get("type") + if isinstance(annotation, dict) + else getattr(annotation, "type", None) + ) + + if annotation_type == "url_citation": + return 1 + + # Check delta.annotations (streaming chunks) + if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"): + annotations = choice.delta.annotations + + if annotations: + for annotation in annotations: + # Support both dict and object formats + annotation_type = ( + annotation.get("type") + if isinstance(annotation, dict) + else getattr(annotation, "type", None) + ) + + if annotation_type == "url_citation": return 1 # Check for url_citation annotations in output (Responses API) @@ -321,12 +346,17 @@ def extract_openai_web_search_count(response: Any) -> int: for content_item in item.content: if hasattr(content_item, "annotations"): annotations = content_item.annotations + if annotations: for annotation in annotations: - if ( - hasattr(annotation, "type") - and annotation.type == "url_citation" - ): + # Support both dict and object formats + annotation_type = ( + annotation.get("type") + if isinstance(annotation, dict) + else getattr(annotation, "type", None) + ) + + if annotation_type == "url_citation": return 1 return 0 diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py index 03b37600..559860cc 100644 --- a/posthog/ai/utils.py +++ b/posthog/ai/utils.py @@ -57,7 +57,7 @@ def merge_usage_stats( source_web_search = source.get("web_search_count") if source_web_search is not None: current = target.get("web_search_count") or 0 - target["web_search_count"] = current + source_web_search + target["web_search_count"] = max(current, source_web_search) elif mode == "cumulative": # Replace with latest values (already cumulative) From 1bcec76b57a780963719db2400387998197fca69 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Mon, 3 Nov 2025 17:16:49 -0500 Subject: [PATCH 7/9] fix(llma): fix mypy issue --- posthog/ai/gemini/gemini_converter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py index b7996fa8..bfa3625a 100644 --- a/posthog/ai/gemini/gemini_converter.py +++ b/posthog/ai/gemini/gemini_converter.py @@ -472,8 +472,7 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage: usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata) # Merge the usage from metadata with any web search count we found - for key, value in usage_from_metadata.items(): - usage[key] = value + usage.update(usage_from_metadata) return usage From 84ab551a80d851232d87f13f113adcbd170e309c Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Tue, 4 Nov 2025 11:47:10 -0500 Subject: [PATCH 8/9] fix(llma): anthropic streaming --- posthog/ai/anthropic/anthropic_async.py | 97 ++++++++----------------- 1 file changed, 30 insertions(+), 67 deletions(-) diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py index 527b73f9..d83a9828 100644 --- a/posthog/ai/anthropic/anthropic_async.py +++ b/posthog/ai/anthropic/anthropic_async.py @@ -14,14 +14,9 @@ from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress from posthog.ai.utils import ( call_llm_and_track_usage_async, - extract_available_tool_calls, - get_model_params, - merge_system_prompt, merge_usage_stats, - with_privacy_mode, ) from posthog.ai.anthropic.anthropic_converter import ( - format_anthropic_streaming_content, extract_anthropic_usage_from_event, handle_anthropic_content_block_start, handle_anthropic_text_delta, @@ -220,66 +215,34 @@ async def _capture_streaming_event( content_blocks: List[StreamingContentBlock], accumulated_content: str, ): - if posthog_trace_id is None: - posthog_trace_id = str(uuid.uuid4()) - - # Format output using converter - formatted_content = format_anthropic_streaming_content(content_blocks) - formatted_output = [] - - if formatted_content: - formatted_output = [{"role": "assistant", "content": formatted_content}] - else: - # Fallback to accumulated content if no blocks - formatted_output = [ - { - "role": "assistant", - "content": [{"type": "text", "text": accumulated_content}], - } - ] - - event_properties = { - "$ai_provider": "anthropic", - "$ai_model": kwargs.get("model"), - "$ai_model_parameters": get_model_params(kwargs), - "$ai_input": with_privacy_mode( - self._client._ph_client, - posthog_privacy_mode, - sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")), - ), - "$ai_output_choices": with_privacy_mode( - self._client._ph_client, - posthog_privacy_mode, - formatted_output, - ), - "$ai_http_status": 200, - "$ai_input_tokens": usage_stats.get("input_tokens", 0), - "$ai_output_tokens": usage_stats.get("output_tokens", 0), - "$ai_cache_read_input_tokens": usage_stats.get( - "cache_read_input_tokens", 0 - ), - "$ai_cache_creation_input_tokens": usage_stats.get( - "cache_creation_input_tokens", 0 + from posthog.ai.types import StreamingEventData + from posthog.ai.anthropic.anthropic_converter import ( + format_anthropic_streaming_input, + format_anthropic_streaming_output_complete, + ) + from posthog.ai.utils import capture_streaming_event + + # Prepare standardized event data + formatted_input = format_anthropic_streaming_input(kwargs) + sanitized_input = sanitize_anthropic(formatted_input) + + event_data = StreamingEventData( + provider="anthropic", + model=kwargs.get("model", "unknown"), + base_url=str(self._client.base_url), + kwargs=kwargs, + formatted_input=sanitized_input, + formatted_output=format_anthropic_streaming_output_complete( + content_blocks, accumulated_content ), - "$ai_latency": latency, - "$ai_trace_id": posthog_trace_id, - "$ai_base_url": str(self._client.base_url), - **(posthog_properties or {}), - } - - # Add tools if available - available_tools = extract_available_tool_calls("anthropic", kwargs) - - if available_tools: - event_properties["$ai_tools"] = available_tools - - if posthog_distinct_id is None: - event_properties["$process_person_profile"] = False - - if hasattr(self._client._ph_client, "capture"): - self._client._ph_client.capture( - distinct_id=posthog_distinct_id or posthog_trace_id, - event="$ai_generation", - properties=event_properties, - groups=posthog_groups, - ) + usage_stats=usage_stats, + latency=latency, + distinct_id=posthog_distinct_id, + trace_id=posthog_trace_id, + properties=posthog_properties, + privacy_mode=posthog_privacy_mode, + groups=posthog_groups, + ) + + # Use the common capture function + capture_streaming_event(self._client._ph_client, event_data) From cc0cfc7b38330d9dccd84d5289ba6d40bd230677 Mon Sep 17 00:00:00 2001 From: Radu Raicea Date: Tue, 4 Nov 2025 14:40:46 -0500 Subject: [PATCH 9/9] fix(llma): remove async gemini tests --- posthog/test/ai/gemini/test_gemini.py | 145 -------------------------- 1 file changed, 145 deletions(-) diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py index 1379d4e2..e0c216a0 100644 --- a/posthog/test/ai/gemini/test_gemini.py +++ b/posthog/test/ai/gemini/test_gemini.py @@ -971,151 +971,6 @@ def mock_streaming_response(): assert props["$ai_output_tokens"] == 15 -@pytest.mark.asyncio -async def test_async_with_web_search(mock_client, mock_google_genai_client): - """Test that web search count is properly tracked in async non-streaming mode.""" - - # Create mock response with grounding metadata - mock_response = MagicMock() - - # Mock usage metadata - mock_usage = MagicMock() - mock_usage.prompt_token_count = 60 - mock_usage.candidates_token_count = 40 - mock_usage.cached_content_token_count = 0 - mock_usage.thoughts_token_count = 0 - mock_response.usage_metadata = mock_usage - - # Mock grounding metadata - mock_grounding_chunk = MagicMock() - mock_grounding_chunk.uri = "https://example.com" - - mock_grounding_metadata = MagicMock() - mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] - - # Mock text part - mock_text_part = MagicMock() - mock_text_part.text = "According to search results..." - type(mock_text_part).text = mock_text_part.text - - # Mock content with parts - mock_content = MagicMock() - mock_content.parts = [mock_text_part] - - # Mock candidate with grounding metadata - mock_candidate = MagicMock() - mock_candidate.content = mock_content - mock_candidate.grounding_metadata = mock_grounding_metadata - type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata - - mock_response.candidates = [mock_candidate] - mock_response.text = "According to search results..." - - # Mock the async generate_content method - async def mock_async_generate_content(*args, **kwargs): - return mock_response - - mock_google_genai_client.models.generate_content_async = mock_async_generate_content - - client = Client(api_key="test-key", posthog_client=mock_client) - - async def run_test(): - response = await client.models.generate_content_async( - model="gemini-2.5-flash", - contents="What's the latest news?", - posthog_distinct_id="test-id", - ) - return response - - response = await run_test() - - assert response == mock_response - assert mock_client.capture.call_count == 1 - - call_args = mock_client.capture.call_args[1] - props = call_args["properties"] - - # Verify web search count is detected (binary for grounding) - assert props["$ai_web_search_count"] == 1 - assert props["$ai_input_tokens"] == 60 - assert props["$ai_output_tokens"] == 40 - - -@pytest.mark.asyncio -async def test_async_streaming_with_web_search(mock_client, mock_google_genai_client): - """Test that web search count is properly captured in async streaming mode.""" - - async def mock_async_streaming_response(): - # Create chunk 1 with grounding metadata - mock_chunk1 = MagicMock() - mock_chunk1.text = "According to " - - mock_usage1 = MagicMock() - mock_usage1.prompt_token_count = 30 - mock_usage1.candidates_token_count = 5 - mock_usage1.cached_content_token_count = 0 - mock_usage1.thoughts_token_count = 0 - mock_chunk1.usage_metadata = mock_usage1 - - # Add grounding metadata to first chunk - mock_grounding_chunk = MagicMock() - mock_grounding_chunk.uri = "https://example.com" - - mock_grounding_metadata = MagicMock() - mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk] - - mock_candidate1 = MagicMock() - mock_candidate1.grounding_metadata = mock_grounding_metadata - type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata - - mock_chunk1.candidates = [mock_candidate1] - - # Create chunk 2 - mock_chunk2 = MagicMock() - mock_chunk2.text = "search results..." - - mock_usage2 = MagicMock() - mock_usage2.prompt_token_count = 30 - mock_usage2.candidates_token_count = 15 - mock_usage2.cached_content_token_count = 0 - mock_usage2.thoughts_token_count = 0 - mock_chunk2.usage_metadata = mock_usage2 - - mock_candidate2 = MagicMock() - mock_chunk2.candidates = [mock_candidate2] - - yield mock_chunk1 - yield mock_chunk2 - - # Mock the async generate_content_stream method - mock_google_genai_client.models.generate_content_stream_async = ( - mock_async_streaming_response - ) - - client = Client(api_key="test-key", posthog_client=mock_client) - - response = await client.models.generate_content_stream_async( - model="gemini-2.5-flash", - contents="What's the latest news?", - posthog_distinct_id="test-id", - ) - - chunks = [] - async for chunk in response: - chunks.append(chunk) - - assert len(chunks) == 2 - assert mock_client.capture.call_count == 1 - - call_args = mock_client.capture.call_args[1] - props = call_args["properties"] - - # Verify web search count is detected (binary for grounding) - assert props["$ai_web_search_count"] == 1 - assert props["$ai_input_tokens"] == 30 - assert props["$ai_output_tokens"] == 15 - - def test_empty_grounding_metadata_no_web_search(mock_client, mock_google_genai_client): """Test that empty grounding_metadata (all null fields) does not count as web search."""