From 0c61c79bc052079d2aeb1a8e85d5d0b6a090a5b3 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Tue, 28 Oct 2025 16:36:04 -0400
Subject: [PATCH 1/9] feat(llma): send number of web searches

---
 posthog/ai/anthropic/anthropic_converter.py |  40 ++++++++
 posthog/ai/gemini/gemini_converter.py       |  54 +++++++++-
 posthog/ai/openai/openai_converter.py       |  90 ++++++++++++++++
 posthog/ai/types.py                         |   1 +
 posthog/ai/utils.py                         |  30 ++++++
 posthog/test/ai/anthropic/test_anthropic.py |  45 +++++++-
 posthog/test/ai/gemini/test_gemini.py       |  61 +++++++++++
 posthog/test/ai/openai/test_openai.py       | 107 ++++++++++++++++++++
 8 files changed, 426 insertions(+), 2 deletions(-)

diff --git a/posthog/ai/anthropic/anthropic_converter.py b/posthog/ai/anthropic/anthropic_converter.py
index 7d2e615f..663fc00d 100644
--- a/posthog/ai/anthropic/anthropic_converter.py
+++ b/posthog/ai/anthropic/anthropic_converter.py
@@ -163,6 +163,32 @@ def format_anthropic_streaming_content(
     return formatted
 
 
+def extract_anthropic_web_search_count(response: Any) -> int:
+    """
+    Extract web search count from Anthropic response.
+
+    Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests.
+
+    Args:
+        response: The response from Anthropic API
+
+    Returns:
+        Number of web search requests (0 if none)
+    """
+    if not hasattr(response, "usage"):
+        return 0
+
+    if not hasattr(response.usage, "server_tool_use"):
+        return 0
+
+    server_tool_use = response.usage.server_tool_use
+
+    if hasattr(server_tool_use, "web_search_requests"):
+        return int(getattr(server_tool_use, "web_search_requests", 0))
+
+    return 0
+
+
 def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
     """
     Extract usage from a full Anthropic response (non-streaming).
@@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
         if cache_creation and cache_creation > 0:
             result["cache_creation_input_tokens"] = cache_creation
 
+    web_search_count = extract_anthropic_web_search_count(response)
+    if web_search_count > 0:
+        result["web_search_count"] = web_search_count
+
     return result
 
 
@@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
     if hasattr(event, "usage") and event.usage:
         usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
 
+        # Extract web search count from usage
+        if hasattr(event.usage, "server_tool_use"):
+            server_tool_use = event.usage.server_tool_use
+            if hasattr(server_tool_use, "web_search_requests"):
+                web_search_count = int(
+                    getattr(server_tool_use, "web_search_requests", 0)
+                )
+                if web_search_count > 0:
+                    usage["web_search_count"] = web_search_count
+
     return usage
 
 
diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py
index 7a8cd0ca..3d509598 100644
--- a/posthog/ai/gemini/gemini_converter.py
+++ b/posthog/ai/gemini/gemini_converter.py
@@ -338,6 +338,46 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
     return [_format_object_message(contents)]
 
 
+def extract_gemini_web_search_count(response: Any) -> int:
+    """
+    Extract web search count from Gemini response.
+
+    Gemini bills per request that uses grounding, not per query.
+    Returns 1 if grounding_metadata is present, 0 otherwise.
+
+    Args:
+        response: The response from Gemini API
+
+    Returns:
+        1 if web search/grounding was used, 0 otherwise
+    """
+
+    # Check for grounding_metadata in candidates
+    if hasattr(response, "candidates"):
+        for candidate in response.candidates:
+            if (
+                hasattr(candidate, "grounding_metadata")
+                and candidate.grounding_metadata
+            ):
+                return 1
+
+            # Also check for google_search or grounding in function call names
+            if hasattr(candidate, "content") and candidate.content:
+                if hasattr(candidate.content, "parts") and candidate.content.parts:
+                    for part in candidate.content.parts:
+                        if hasattr(part, "function_call") and part.function_call:
+                            function_name = getattr(
+                                part.function_call, "name", ""
+                            ).lower()
+                            if (
+                                "google_search" in function_name
+                                or "grounding" in function_name
+                            ):
+                                return 1
+
+    return 0
+
+
 def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
     """
     Common logic to extract usage from Gemini metadata.
@@ -382,7 +422,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
     if not hasattr(response, "usage_metadata") or not response.usage_metadata:
         return TokenUsage(input_tokens=0, output_tokens=0)
 
-    return _extract_usage_from_metadata(response.usage_metadata)
+    usage = _extract_usage_from_metadata(response.usage_metadata)
+
+    # Add web search count if present
+    web_search_count = extract_gemini_web_search_count(response)
+    if web_search_count > 0:
+        usage["web_search_count"] = web_search_count
+
+    return usage
 
 
 def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
@@ -404,6 +451,11 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
     # Use the shared helper to extract usage
     usage = _extract_usage_from_metadata(chunk.usage_metadata)
 
+    # Add web search count if present
+    web_search_count = extract_gemini_web_search_count(chunk)
+    if web_search_count > 0:
+        usage["web_search_count"] = web_search_count
+
     return usage
 
 
diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py
index 778478ff..69491a48 100644
--- a/posthog/ai/openai/openai_converter.py
+++ b/posthog/ai/openai/openai_converter.py
@@ -255,6 +255,81 @@ def format_openai_streaming_content(
     return formatted
 
 
+def extract_openai_web_search_count(response: Any) -> int:
+    """
+    Extract web search count from OpenAI response.
+
+    Uses a two-tier detection strategy:
+    1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
+    2. Priority 2 (binary detection): Check for various web search indicators:
+       - Root-level citations, search_results, or usage.search_context_size (Perplexity)
+       - Annotations with type "url_citation" in choices/output
+
+    Args:
+        response: The response from OpenAI API
+
+    Returns:
+        Number of web search requests (exact count or binary 1/0)
+    """
+
+    # Priority 1: Check for exact count in Responses API output
+    if hasattr(response, "output"):
+        web_search_count = 0
+        for item in response.output:
+            if hasattr(item, "type") and item.type == "web_search_call":
+                web_search_count += 1
+
+        if web_search_count > 0:
+            return web_search_count
+
+    # Priority 2: Binary detection (returns 1 or 0)
+
+    # Check root-level indicators (Perplexity)
+    if hasattr(response, "citations"):
+        citations = getattr(response, "citations")
+        if citations and len(citations) > 0:
+            return 1
+
+    if hasattr(response, "search_results"):
+        search_results = getattr(response, "search_results")
+        if search_results and len(search_results) > 0:
+            return 1
+
+    if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
+        if response.usage.search_context_size:
+            return 1
+
+    # Check for url_citation annotations in choices (Chat Completions)
+    if hasattr(response, "choices"):
+        for choice in response.choices:
+            if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
+                annotations = choice.message.annotations
+                if annotations:
+                    for annotation in annotations:
+                        if (
+                            hasattr(annotation, "type")
+                            and annotation.type == "url_citation"
+                        ):
+                            return 1
+
+    # Check for url_citation annotations in output (Responses API)
+    if hasattr(response, "output"):
+        for item in response.output:
+            if hasattr(item, "content") and isinstance(item.content, list):
+                for content_item in item.content:
+                    if hasattr(content_item, "annotations"):
+                        annotations = content_item.annotations
+                        if annotations:
+                            for annotation in annotations:
+                                if (
+                                    hasattr(annotation, "type")
+                                    and annotation.type == "url_citation"
+                                ):
+                                    return 1
+
+    return 0
+
+
 def extract_openai_usage_from_response(response: Any) -> TokenUsage:
     """
     Extract usage statistics from a full OpenAI response (non-streaming).
@@ -312,6 +387,10 @@ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
     if reasoning_tokens > 0:
         result["reasoning_tokens"] = reasoning_tokens
 
+    web_search_count = extract_openai_web_search_count(response)
+    if web_search_count > 0:
+        result["web_search_count"] = web_search_count
+
     return result
 
 
@@ -358,6 +437,11 @@ def extract_openai_usage_from_chunk(
                 chunk.usage.completion_tokens_details.reasoning_tokens
             )
 
+        # Extract web search count from the chunk (available in final streaming chunks)
+        web_search_count = extract_openai_web_search_count(chunk)
+        if web_search_count > 0:
+            usage["web_search_count"] = web_search_count
+
     elif provider_type == "responses":
         # For Responses API, usage is only in chunk.response.usage for completed events
         if hasattr(chunk, "type") and chunk.type == "response.completed":
@@ -386,6 +470,12 @@ def extract_openai_usage_from_chunk(
                         response_usage.output_tokens_details.reasoning_tokens
                     )
 
+                # Extract web search count from the complete response
+                if hasattr(chunk, "response"):
+                    web_search_count = extract_openai_web_search_count(chunk.response)
+                    if web_search_count > 0:
+                        usage["web_search_count"] = web_search_count
+
     return usage
 
 
diff --git a/posthog/ai/types.py b/posthog/ai/types.py
index d90a0df8..c549cadc 100644
--- a/posthog/ai/types.py
+++ b/posthog/ai/types.py
@@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False):
     cache_read_input_tokens: Optional[int]
     cache_creation_input_tokens: Optional[int]
     reasoning_tokens: Optional[int]
+    web_search_count: Optional[int]
 
 
 class ProviderResponse(TypedDict, total=False):
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index d6afd1db..03b37600 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -53,6 +53,12 @@ def merge_usage_stats(
         if source_reasoning is not None:
             current = target.get("reasoning_tokens") or 0
             target["reasoning_tokens"] = current + source_reasoning
+
+        source_web_search = source.get("web_search_count")
+        if source_web_search is not None:
+            current = target.get("web_search_count") or 0
+            target["web_search_count"] = current + source_web_search
+
     elif mode == "cumulative":
         # Replace with latest values (already cumulative)
         if source.get("input_tokens") is not None:
@@ -67,6 +73,9 @@ def merge_usage_stats(
             ]
         if source.get("reasoning_tokens") is not None:
             target["reasoning_tokens"] = source["reasoning_tokens"]
+        if source.get("web_search_count") is not None:
+            target["web_search_count"] = source["web_search_count"]
+
     else:
         raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'")
 
@@ -311,6 +320,10 @@ def call_llm_and_track_usage(
         if reasoning is not None and reasoning > 0:
             event_properties["$ai_reasoning_tokens"] = reasoning
 
+        web_search_count = usage.get("web_search_count")
+        if web_search_count is not None and web_search_count > 0:
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if posthog_distinct_id is None:
             event_properties["$process_person_profile"] = False
 
@@ -414,6 +427,14 @@ async def call_llm_and_track_usage_async(
         if cache_creation is not None and cache_creation > 0:
             event_properties["$ai_cache_creation_input_tokens"] = cache_creation
 
+        reasoning = usage.get("reasoning_tokens")
+        if reasoning is not None and reasoning > 0:
+            event_properties["$ai_reasoning_tokens"] = reasoning
+
+        web_search_count = usage.get("web_search_count")
+        if web_search_count is not None and web_search_count > 0:
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if posthog_distinct_id is None:
             event_properties["$process_person_profile"] = False
 
@@ -535,6 +556,15 @@ def capture_streaming_event(
             if value is not None and isinstance(value, int) and value > 0:
                 event_properties[f"$ai_{field}"] = value
 
+    # Add web search count if present (all providers)
+    web_search_count = event_data["usage_stats"].get("web_search_count")
+    if (
+        web_search_count is not None
+        and isinstance(web_search_count, int)
+        and web_search_count > 0
+    ):
+        event_properties["$ai_web_search_count"] = web_search_count
+
     # Handle provider-specific fields
     if (
         event_data["provider"] == "openai"
diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py
index 5f65a99e..3cd77e0c 100644
--- a/posthog/test/ai/anthropic/test_anthropic.py
+++ b/posthog/test/ai/anthropic/test_anthropic.py
@@ -1,4 +1,3 @@
-import os
 from unittest.mock import patch
 
 import pytest
@@ -1034,3 +1033,47 @@ async def run_test():
         assert props["$ai_output_tokens"] == 25
         assert props["$ai_cache_read_input_tokens"] == 5
         assert props["$ai_cache_creation_input_tokens"] == 0
+
+
+def test_web_search_count(mock_client):
+    """Test that web search count is properly tracked from Anthropic responses."""
+
+    # Create a mock usage with web search
+    class MockServerToolUse:
+        def __init__(self):
+            self.web_search_requests = 3
+
+    class MockUsageWithWebSearch:
+        def __init__(self):
+            self.input_tokens = 100
+            self.output_tokens = 50
+            self.cache_read_input_tokens = 0
+            self.cache_creation_input_tokens = 0
+            self.server_tool_use = MockServerToolUse()
+
+    class MockResponseWithWebSearch:
+        def __init__(self):
+            self.content = [MockContent(text="Search results show...")]
+            self.model = "claude-3-opus-20240229"
+            self.usage = MockUsageWithWebSearch()
+
+    mock_response = MockResponseWithWebSearch()
+
+    with patch("anthropic.resources.Messages.create", return_value=mock_response):
+        client = Anthropic(api_key="test-key", posthog_client=mock_client)
+        response = client.messages.create(
+            model="claude-3-opus-20240229",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured
+        assert props["$ai_web_search_count"] == 3
+        assert props["$ai_input_tokens"] == 100
+        assert props["$ai_output_tokens"] == 50
diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py
index e7571bba..954ed06d 100644
--- a/posthog/test/ai/gemini/test_gemini.py
+++ b/posthog/test/ai/gemini/test_gemini.py
@@ -837,3 +837,64 @@ def test_streaming_cache_and_reasoning_tokens(mock_client, mock_google_genai_cli
     assert props["$ai_output_tokens"] == 10
     assert props["$ai_cache_read_input_tokens"] == 30
     assert props["$ai_reasoning_tokens"] == 5
+
+
+def test_web_search_grounding(mock_client, mock_google_genai_client):
+    """Test web search detection via grounding_metadata."""
+
+    # Create mock response with grounding metadata
+    mock_response = MagicMock()
+
+    # Mock usage metadata
+    mock_usage = MagicMock()
+    mock_usage.prompt_token_count = 60
+    mock_usage.candidates_token_count = 40
+    mock_usage.cached_content_token_count = 0
+    mock_usage.thoughts_token_count = 0
+    mock_response.usage_metadata = mock_usage
+
+    # Mock grounding metadata
+    mock_grounding_chunk = MagicMock()
+    mock_grounding_chunk.uri = "https://example.com"
+
+    mock_grounding_metadata = MagicMock()
+    mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
+
+    # Mock text part
+    mock_text_part = MagicMock()
+    mock_text_part.text = "According to search results..."
+    type(mock_text_part).text = mock_text_part.text
+
+    # Mock content with parts
+    mock_content = MagicMock()
+    mock_content.parts = [mock_text_part]
+
+    # Mock candidate with grounding metadata
+    mock_candidate = MagicMock()
+    mock_candidate.content = mock_content
+    mock_candidate.grounding_metadata = mock_grounding_metadata
+    type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata
+
+    mock_response.candidates = [mock_candidate]
+    mock_response.text = "According to search results..."
+
+    # Mock the generate_content method
+    mock_google_genai_client.models.generate_content.return_value = mock_response
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+    response = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents="What's the latest news?",
+        posthog_distinct_id="test-id",
+    )
+
+    assert response == mock_response
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is detected (binary for grounding)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 60
+    assert props["$ai_output_tokens"] == 40
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index 0e380b07..95f10336 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1339,3 +1339,110 @@ def test_tool_definition(mock_client, mock_openai_response):
         assert isinstance(props["$ai_latency"], float)
         # Verify that tools are captured in the $ai_tools property
         assert props["$ai_tools"] == tools
+
+
+def test_web_search_perplexity_style(mock_client):
+    """Test web search detection via annotations (Perplexity-style)."""
+
+    class MockAnnotation:
+        def __init__(self):
+            self.type = "url_citation"
+
+    class MockMessage:
+        def __init__(self):
+            self.role = "assistant"
+            self.content = "Based on recent search results..."
+            self.annotations = [MockAnnotation(), MockAnnotation()]
+
+    class MockChoice:
+        def __init__(self):
+            self.message = MockMessage()
+
+    class MockUsage:
+        def __init__(self):
+            self.prompt_tokens = 50
+            self.completion_tokens = 30
+
+    class MockResponseWithAnnotations:
+        def __init__(self):
+            self.choices = [MockChoice()]
+            self.usage = MockUsage()
+            self.model = "gpt-4-turbo"
+
+    mock_response = MockResponseWithAnnotations()
+
+    with patch("openai.resources.chat.Completions.create", return_value=mock_response):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=[{"role": "user", "content": "What's happening in tech?"}],
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is detected (binary detection)
+        assert props["$ai_web_search_count"] == 1
+
+
+def test_web_search_responses_api(mock_client):
+    """Test exact web search count from Responses API."""
+
+    class MockWebSearchItem:
+        def __init__(self):
+            self.type = "web_search_call"
+
+    class MockMessageItem:
+        def __init__(self):
+            self.type = "message"
+            self.role = "assistant"
+            self.content = "Here are the results..."
+
+    class MockUsage:
+        def __init__(self):
+            self.input_tokens = 100
+            self.output_tokens = 75
+
+    class MockResponsesAPIResponse:
+        def __init__(self):
+            self.output = [MockWebSearchItem(), MockWebSearchItem(), MockMessageItem()]
+            self.usage = MockUsage()
+            self.model = "gpt-4o"
+
+    mock_response = MockResponsesAPIResponse()
+
+    with patch(
+        "openai.resources.responses.Responses.create", return_value=mock_response
+    ):
+        # Manually call the tracking since we're testing the converter logic
+        from posthog.ai.utils import call_llm_and_track_usage
+
+        def mock_create_call(**kwargs):
+            return mock_response
+
+        result = call_llm_and_track_usage(
+            posthog_distinct_id="test-id",
+            ph_client=mock_client,
+            provider="openai",
+            posthog_trace_id=None,
+            posthog_properties=None,
+            posthog_privacy_mode=False,
+            posthog_groups=None,
+            base_url="https://api.openai.com/v1",
+            call_method=mock_create_call,
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Search query"}],
+        )
+
+        assert result == mock_response
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify exact web search count
+        assert props["$ai_web_search_count"] == 2

From 5fd75c7d67c6b8a8e22d5be5ec76bcfaac227ad6 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Thu, 30 Oct 2025 16:21:26 -0400
Subject: [PATCH 2/9] feat(llma): add more tests

---
 posthog/ai/anthropic/anthropic_converter.py |   2 +-
 posthog/ai/openai/openai_converter.py       |   2 +
 posthog/test/ai/anthropic/test_anthropic.py | 186 +++++++++++++++++
 posthog/test/ai/gemini/test_gemini.py       | 216 ++++++++++++++++++++
 posthog/test/ai/openai/test_openai.py       | 194 ++++++++++++++++++
 5 files changed, 599 insertions(+), 1 deletion(-)

diff --git a/posthog/ai/anthropic/anthropic_converter.py b/posthog/ai/anthropic/anthropic_converter.py
index 663fc00d..24f20b9f 100644
--- a/posthog/ai/anthropic/anthropic_converter.py
+++ b/posthog/ai/anthropic/anthropic_converter.py
@@ -184,7 +184,7 @@ def extract_anthropic_web_search_count(response: Any) -> int:
     server_tool_use = response.usage.server_tool_use
 
     if hasattr(server_tool_use, "web_search_requests"):
-        return int(getattr(server_tool_use, "web_search_requests", 0))
+        return max(0, int(getattr(server_tool_use, "web_search_requests", 0)))
 
     return 0
 
diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py
index 69491a48..320c0e51 100644
--- a/posthog/ai/openai/openai_converter.py
+++ b/posthog/ai/openai/openai_converter.py
@@ -279,6 +279,8 @@ def extract_openai_web_search_count(response: Any) -> int:
             if hasattr(item, "type") and item.type == "web_search_call":
                 web_search_count += 1
 
+        web_search_count = max(0, web_search_count)
+
         if web_search_count > 0:
             return web_search_count
 
diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py
index 3cd77e0c..4617f801 100644
--- a/posthog/test/ai/anthropic/test_anthropic.py
+++ b/posthog/test/ai/anthropic/test_anthropic.py
@@ -1077,3 +1077,189 @@ def __init__(self):
         assert props["$ai_web_search_count"] == 3
         assert props["$ai_input_tokens"] == 100
         assert props["$ai_output_tokens"] == 50
+
+
+@pytest.fixture
+def mock_anthropic_stream_with_web_search():
+    """Mock stream events for web search."""
+
+    class MockServerToolUse:
+        def __init__(self):
+            self.web_search_requests = 2
+
+    class MockMessage:
+        def __init__(self):
+            self.usage = MockUsage(
+                input_tokens=50,
+                cache_creation_input_tokens=0,
+                cache_read_input_tokens=5,
+            )
+
+    def stream_generator():
+        # Message start with usage
+        event = MockStreamEvent("message_start")
+        event.message = MockMessage()
+        yield event
+
+        # Text block start
+        event = MockStreamEvent("content_block_start")
+        event.content_block = MockContentBlock("text")
+        event.index = 0
+        yield event
+
+        # Text delta
+        event = MockStreamEvent("content_block_delta")
+        event.delta = MockDelta(text="Here are the search results...")
+        event.index = 0
+        yield event
+
+        # Text block stop
+        event = MockStreamEvent("content_block_stop")
+        event.index = 0
+        yield event
+
+        # Message delta with final usage including web search
+        event = MockStreamEvent("message_delta")
+        usage = MockUsage(output_tokens=25)
+        usage.server_tool_use = MockServerToolUse()
+        event.usage = usage
+        yield event
+
+        # Message stop
+        event = MockStreamEvent("message_stop")
+        yield event
+
+    return stream_generator()
+
+
+def test_streaming_with_web_search(mock_client, mock_anthropic_stream_with_web_search):
+    """Test that web search count is properly captured in streaming mode."""
+    with patch(
+        "anthropic.resources.Messages.create",
+        return_value=mock_anthropic_stream_with_web_search,
+    ):
+        client = Anthropic(api_key="test-key", posthog_client=mock_client)
+        response = client.messages.create(
+            model="claude-3-opus-20240229",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        # Consume the stream - this triggers the finally block synchronously
+        list(response)
+
+        # Capture happens synchronously when generator is exhausted
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured
+        assert props["$ai_web_search_count"] == 2
+        assert props["$ai_input_tokens"] == 50
+        assert props["$ai_output_tokens"] == 25
+
+
+def test_async_with_web_search(mock_client):
+    """Test that web search count is properly tracked in async non-streaming mode."""
+    import asyncio
+
+    # Create a mock usage with web search
+    class MockServerToolUse:
+        def __init__(self):
+            self.web_search_requests = 3
+
+    class MockUsageWithWebSearch:
+        def __init__(self):
+            self.input_tokens = 100
+            self.output_tokens = 50
+            self.cache_read_input_tokens = 0
+            self.cache_creation_input_tokens = 0
+            self.server_tool_use = MockServerToolUse()
+
+    class MockResponseWithWebSearch:
+        def __init__(self):
+            self.content = [MockContent(text="Search results show...")]
+            self.model = "claude-3-opus-20240229"
+            self.usage = MockUsageWithWebSearch()
+
+    mock_response = MockResponseWithWebSearch()
+
+    async def mock_async_create(**kwargs):
+        return mock_response
+
+    with patch(
+        "anthropic.resources.AsyncMessages.create",
+        side_effect=mock_async_create,
+    ):
+        async_client = AsyncAnthropic(api_key="test-key", posthog_client=mock_client)
+
+        async def run_test():
+            response = await async_client.messages.create(
+                model="claude-3-opus-20240229",
+                messages=[{"role": "user", "content": "Search for recent news"}],
+                posthog_distinct_id="test-id",
+            )
+            return response
+
+        # asyncio.run() waits for all async operations to complete
+        response = asyncio.run(run_test())
+
+        assert response == mock_response
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured
+        assert props["$ai_web_search_count"] == 3
+        assert props["$ai_input_tokens"] == 100
+        assert props["$ai_output_tokens"] == 50
+
+
+def test_async_streaming_with_web_search(
+    mock_client, mock_anthropic_stream_with_web_search
+):
+    """Test that web search count is properly captured in async streaming mode."""
+    import asyncio
+
+    async def mock_async_generator():
+        # Convert regular generator to async generator
+        for event in mock_anthropic_stream_with_web_search:
+            yield event
+
+    async def mock_async_create(**kwargs):
+        # Return the async generator (to be awaited by the implementation)
+        return mock_async_generator()
+
+    with patch(
+        "anthropic.resources.AsyncMessages.create",
+        side_effect=mock_async_create,
+    ):
+        async_client = AsyncAnthropic(api_key="test-key", posthog_client=mock_client)
+
+        async def run_test():
+            response = await async_client.messages.create(
+                model="claude-3-opus-20240229",
+                messages=[{"role": "user", "content": "Search for recent news"}],
+                stream=True,
+                posthog_distinct_id="test-id",
+            )
+
+            # Consume the async stream
+            [event async for event in response]
+
+        # asyncio.run() waits for all async operations to complete
+        asyncio.run(run_test())
+
+        # Capture completes before asyncio.run() returns
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured
+        assert props["$ai_web_search_count"] == 2
+        assert props["$ai_input_tokens"] == 50
+        assert props["$ai_output_tokens"] == 25
diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py
index 954ed06d..581bdcdf 100644
--- a/posthog/test/ai/gemini/test_gemini.py
+++ b/posthog/test/ai/gemini/test_gemini.py
@@ -898,3 +898,219 @@ def test_web_search_grounding(mock_client, mock_google_genai_client):
     assert props["$ai_web_search_count"] == 1
     assert props["$ai_input_tokens"] == 60
     assert props["$ai_output_tokens"] == 40
+
+
+def test_streaming_with_web_search(mock_client, mock_google_genai_client):
+    """Test that web search count is properly captured in streaming mode."""
+
+    def mock_streaming_response():
+        # Create chunk 1 with grounding metadata
+        mock_chunk1 = MagicMock()
+        mock_chunk1.text = "According to "
+
+        mock_usage1 = MagicMock()
+        mock_usage1.prompt_token_count = 30
+        mock_usage1.candidates_token_count = 5
+        mock_usage1.cached_content_token_count = 0
+        mock_usage1.thoughts_token_count = 0
+        mock_chunk1.usage_metadata = mock_usage1
+
+        # Add grounding metadata to first chunk
+        mock_grounding_chunk = MagicMock()
+        mock_grounding_chunk.uri = "https://example.com"
+
+        mock_grounding_metadata = MagicMock()
+        mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
+
+        mock_candidate1 = MagicMock()
+        mock_candidate1.grounding_metadata = mock_grounding_metadata
+        type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata
+
+        mock_chunk1.candidates = [mock_candidate1]
+
+        # Create chunk 2
+        mock_chunk2 = MagicMock()
+        mock_chunk2.text = "search results..."
+
+        mock_usage2 = MagicMock()
+        mock_usage2.prompt_token_count = 30
+        mock_usage2.candidates_token_count = 15
+        mock_usage2.cached_content_token_count = 0
+        mock_usage2.thoughts_token_count = 0
+        mock_chunk2.usage_metadata = mock_usage2
+
+        mock_candidate2 = MagicMock()
+        mock_chunk2.candidates = [mock_candidate2]
+
+        yield mock_chunk1
+        yield mock_chunk2
+
+    # Mock the generate_content_stream method
+    mock_google_genai_client.models.generate_content_stream.return_value = (
+        mock_streaming_response()
+    )
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+
+    response = client.models.generate_content_stream(
+        model="gemini-2.5-flash",
+        contents="What's the latest news?",
+        posthog_distinct_id="test-id",
+    )
+
+    chunks = list(response)
+    assert len(chunks) == 2
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is detected (binary for grounding)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 30
+    assert props["$ai_output_tokens"] == 15
+
+
+@pytest.mark.asyncio
+async def test_async_with_web_search(mock_client, mock_google_genai_client):
+    """Test that web search count is properly tracked in async non-streaming mode."""
+
+    # Create mock response with grounding metadata
+    mock_response = MagicMock()
+
+    # Mock usage metadata
+    mock_usage = MagicMock()
+    mock_usage.prompt_token_count = 60
+    mock_usage.candidates_token_count = 40
+    mock_usage.cached_content_token_count = 0
+    mock_usage.thoughts_token_count = 0
+    mock_response.usage_metadata = mock_usage
+
+    # Mock grounding metadata
+    mock_grounding_chunk = MagicMock()
+    mock_grounding_chunk.uri = "https://example.com"
+
+    mock_grounding_metadata = MagicMock()
+    mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
+
+    # Mock text part
+    mock_text_part = MagicMock()
+    mock_text_part.text = "According to search results..."
+    type(mock_text_part).text = mock_text_part.text
+
+    # Mock content with parts
+    mock_content = MagicMock()
+    mock_content.parts = [mock_text_part]
+
+    # Mock candidate with grounding metadata
+    mock_candidate = MagicMock()
+    mock_candidate.content = mock_content
+    mock_candidate.grounding_metadata = mock_grounding_metadata
+    type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata
+
+    mock_response.candidates = [mock_candidate]
+    mock_response.text = "According to search results..."
+
+    # Mock the async generate_content method
+    async def mock_async_generate_content(*args, **kwargs):
+        return mock_response
+
+    mock_google_genai_client.models.generate_content_async = mock_async_generate_content
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+
+    async def run_test():
+        response = await client.models.generate_content_async(
+            model="gemini-2.5-flash",
+            contents="What's the latest news?",
+            posthog_distinct_id="test-id",
+        )
+        return response
+
+    response = await run_test()
+
+    assert response == mock_response
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is detected (binary for grounding)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 60
+    assert props["$ai_output_tokens"] == 40
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_with_web_search(mock_client, mock_google_genai_client):
+    """Test that web search count is properly captured in async streaming mode."""
+
+    async def mock_async_streaming_response():
+        # Create chunk 1 with grounding metadata
+        mock_chunk1 = MagicMock()
+        mock_chunk1.text = "According to "
+
+        mock_usage1 = MagicMock()
+        mock_usage1.prompt_token_count = 30
+        mock_usage1.candidates_token_count = 5
+        mock_usage1.cached_content_token_count = 0
+        mock_usage1.thoughts_token_count = 0
+        mock_chunk1.usage_metadata = mock_usage1
+
+        # Add grounding metadata to first chunk
+        mock_grounding_chunk = MagicMock()
+        mock_grounding_chunk.uri = "https://example.com"
+
+        mock_grounding_metadata = MagicMock()
+        mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
+
+        mock_candidate1 = MagicMock()
+        mock_candidate1.grounding_metadata = mock_grounding_metadata
+        type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata
+
+        mock_chunk1.candidates = [mock_candidate1]
+
+        # Create chunk 2
+        mock_chunk2 = MagicMock()
+        mock_chunk2.text = "search results..."
+
+        mock_usage2 = MagicMock()
+        mock_usage2.prompt_token_count = 30
+        mock_usage2.candidates_token_count = 15
+        mock_usage2.cached_content_token_count = 0
+        mock_usage2.thoughts_token_count = 0
+        mock_chunk2.usage_metadata = mock_usage2
+
+        mock_candidate2 = MagicMock()
+        mock_chunk2.candidates = [mock_candidate2]
+
+        yield mock_chunk1
+        yield mock_chunk2
+
+    # Mock the async generate_content_stream method
+    mock_google_genai_client.models.generate_content_stream_async = (
+        mock_async_streaming_response
+    )
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+
+    response = await client.models.generate_content_stream_async(
+        model="gemini-2.5-flash",
+        contents="What's the latest news?",
+        posthog_distinct_id="test-id",
+    )
+
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    assert len(chunks) == 2
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is detected (binary for grounding)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 30
+    assert props["$ai_output_tokens"] == 15
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index 95f10336..a281f8a9 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1446,3 +1446,197 @@ def mock_create_call(**kwargs):
 
         # Verify exact web search count
         assert props["$ai_web_search_count"] == 2
+
+
+@pytest.fixture
+def streaming_web_search_chunks():
+    """Streaming chunks with web search indicators (Perplexity-style)."""
+    return [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4",
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(
+                        role="assistant",
+                        content="Based on my search, ",
+                    ),
+                    finish_reason=None,
+                )
+            ],
+        ),
+        ChatCompletionChunk(
+            id="chunk2",
+            model="gpt-4",
+            object="chat.completion.chunk",
+            created=1234567891,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(
+                        content="here are the latest news...",
+                    ),
+                    finish_reason=None,
+                )
+            ],
+        ),
+        ChatCompletionChunk(
+            id="chunk3",
+            model="gpt-4",
+            object="chat.completion.chunk",
+            created=1234567892,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=20,
+                completion_tokens=15,
+                total_tokens=35,
+            ),
+        ),
+    ]
+
+
+def test_streaming_with_web_search(mock_client, streaming_web_search_chunks):
+    """Test that web search count is properly captured in streaming mode."""
+
+    # Add citations attribute to the last chunk to indicate web search was used
+    streaming_web_search_chunks[-1].citations = ["https://example.com/news"]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = streaming_web_search_chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+        response_generator = client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        # Consume the generator to trigger the event capture
+        chunks = list(response_generator)
+
+        # Verify the chunks were returned correctly
+        assert len(chunks) == 3
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured (binary detection = 1)
+        assert props["$ai_web_search_count"] == 1
+        assert props["$ai_input_tokens"] == 20
+        assert props["$ai_output_tokens"] == 15
+
+
+@pytest.mark.asyncio
+async def test_async_chat_with_web_search(mock_client):
+    """Test that web search count is properly tracked in async non-streaming mode."""
+
+    # Create mock response with citations (Perplexity-style)
+    mock_response = ChatCompletion(
+        id="chatcmpl-test",
+        model="gpt-4",
+        object="chat.completion",
+        created=1234567890,
+        choices=[
+            Choice(
+                index=0,
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content="Here are the search results...",
+                ),
+                finish_reason="stop",
+            )
+        ],
+        usage=CompletionUsage(
+            prompt_tokens=20,
+            completion_tokens=15,
+            total_tokens=35,
+        ),
+    )
+
+    # Add citations attribute to indicate web search
+    mock_response.citations = ["https://example.com/result1"]
+
+    async def mock_create(self, **kwargs):
+        return mock_response
+
+    with patch(
+        "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create
+    ):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response = await client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            posthog_distinct_id="test-id",
+        )
+
+    assert response == mock_response
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is captured (binary detection = 1)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 20
+    assert props["$ai_output_tokens"] == 15
+
+
+@pytest.mark.asyncio
+async def test_async_chat_streaming_with_web_search(
+    mock_client, streaming_web_search_chunks
+):
+    """Test that web search count is properly captured in async streaming mode."""
+
+    # Add citations attribute to the last chunk to indicate web search was used
+    streaming_web_search_chunks[-1].citations = ["https://example.com/news"]
+
+    captured_kwargs = {}
+
+    async def mock_create(self, **kwargs):
+        captured_kwargs["kwargs"] = kwargs
+
+        async def chunk_iterable():
+            for chunk in streaming_web_search_chunks:
+                yield chunk
+
+        return chunk_iterable()
+
+    with patch(
+        "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create
+    ):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_stream = await client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        chunks = []
+        async for chunk in response_stream:
+            chunks.append(chunk)
+
+    # Verify the chunks were returned correctly
+    assert len(chunks) == 3
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is captured (binary detection = 1)
+    assert props["$ai_web_search_count"] == 1
+    assert props["$ai_input_tokens"] == 20
+    assert props["$ai_output_tokens"] == 15

From 312b9c8e8044d375fc6678647a7fd9616fe24dde Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Thu, 30 Oct 2025 16:23:04 -0400
Subject: [PATCH 3/9] chore(llma): bump version

---
 CHANGELOG.md       | 4 ++++
 posthog/version.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index efe00781..367530e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# 6.8.0 - 2025-10-28
+
+- feat(llma): send web search calls to be used for LLM cost calculations
+
 # 6.7.11 - 2025-10-28
 
 - feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain)
diff --git a/posthog/version.py b/posthog/version.py
index 1fe91ee0..c3dd9e0b 100644
--- a/posthog/version.py
+++ b/posthog/version.py
@@ -1,4 +1,4 @@
-VERSION = "6.7.11"
+VERSION = "6.8.0"
 
 if __name__ == "__main__":
     print(VERSION, end="")  # noqa: T201

From 73ab99af4a866b73080db1a5375c17cdcc9cf254 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Fri, 31 Oct 2025 13:22:20 -0400
Subject: [PATCH 4/9] fix(llma): feedback

---
 posthog/ai/gemini/gemini_converter.py | 16 +++++++-----
 posthog/ai/openai/openai_async.py     | 19 ++++++++++++++
 posthog/ai/openai/openai_converter.py | 12 +++++----
 posthog/test/ai/openai/test_openai.py | 36 +++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py
index 3d509598..4fc2b2ba 100644
--- a/posthog/ai/gemini/gemini_converter.py
+++ b/posthog/ai/gemini/gemini_converter.py
@@ -445,16 +445,20 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
 
     usage: TokenUsage = TokenUsage()
 
+    # Extract web search count from the chunk before checking for usage_metadata
+    # Web search indicators can appear on any chunk, not just those with usage data
+    web_search_count = extract_gemini_web_search_count(chunk)
+    if web_search_count > 0:
+        usage["web_search_count"] = web_search_count
+
     if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
         return usage
 
-    # Use the shared helper to extract usage
-    usage = _extract_usage_from_metadata(chunk.usage_metadata)
+    usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
 
-    # Add web search count if present
-    web_search_count = extract_gemini_web_search_count(chunk)
-    if web_search_count > 0:
-        usage["web_search_count"] = web_search_count
+    # Merge the usage from metadata with any web search count we found
+    for key, value in usage_from_metadata.items():
+        usage[key] = value
 
     return usage
 
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
index 54ded707..404895fc 100644
--- a/posthog/ai/openai/openai_async.py
+++ b/posthog/ai/openai/openai_async.py
@@ -213,6 +213,15 @@ async def _capture_streaming_event(
             **(posthog_properties or {}),
         }
 
+        # Add web search count if present
+        web_search_count = usage_stats.get("web_search_count")
+        if (
+            web_search_count is not None
+            and isinstance(web_search_count, int)
+            and web_search_count > 0
+        ):
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if available_tool_calls:
             event_properties["$ai_tools"] = available_tool_calls
 
@@ -444,6 +453,16 @@ async def _capture_streaming_event(
             **(posthog_properties or {}),
         }
 
+        # Add web search count if present
+        web_search_count = usage_stats.get("web_search_count")
+
+        if (
+            web_search_count is not None
+            and isinstance(web_search_count, int)
+            and web_search_count > 0
+        ):
+            event_properties["$ai_web_search_count"] = web_search_count
+
         if available_tool_calls:
             event_properties["$ai_tools"] = available_tool_calls
 
diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py
index 320c0e51..76ee0162 100644
--- a/posthog/ai/openai/openai_converter.py
+++ b/posthog/ai/openai/openai_converter.py
@@ -415,6 +415,13 @@ def extract_openai_usage_from_chunk(
     usage: TokenUsage = TokenUsage()
 
     if provider_type == "chat":
+        # Extract web search count from the chunk before checking for usage
+        # Web search indicators (citations, annotations) can appear on any chunk,
+        # not just those with usage data
+        web_search_count = extract_openai_web_search_count(chunk)
+        if web_search_count > 0:
+            usage["web_search_count"] = web_search_count
+
         if not hasattr(chunk, "usage") or not chunk.usage:
             return usage
 
@@ -439,11 +446,6 @@ def extract_openai_usage_from_chunk(
                 chunk.usage.completion_tokens_details.reasoning_tokens
             )
 
-        # Extract web search count from the chunk (available in final streaming chunks)
-        web_search_count = extract_openai_web_search_count(chunk)
-        if web_search_count > 0:
-            usage["web_search_count"] = web_search_count
-
     elif provider_type == "responses":
         # For Responses API, usage is only in chunk.response.usage for completed events
         if hasattr(chunk, "type") and chunk.type == "response.completed":
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index a281f8a9..07d45753 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1537,6 +1537,42 @@ def test_streaming_with_web_search(mock_client, streaming_web_search_chunks):
         assert props["$ai_output_tokens"] == 15
 
 
+def test_streaming_with_web_search_on_non_usage_chunk(
+    mock_client, streaming_web_search_chunks
+):
+    """Test that web search count is captured even when citations appear on chunks without usage data."""
+
+    # Add citations attribute to the FIRST chunk (which has no usage data)
+    # This tests the fix for the bug where web search indicators on non-usage chunks were ignored
+    streaming_web_search_chunks[0].citations = ["https://example.com/news"]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = streaming_web_search_chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+        response_generator = client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Search for recent news"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        # Consume the generator to trigger the event capture
+        chunks = list(response_generator)
+
+        # Verify the chunks were returned correctly
+        assert len(chunks) == 3
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Verify web search count is captured even though citations were on first chunk
+        assert props["$ai_web_search_count"] == 1
+        assert props["$ai_input_tokens"] == 20
+        assert props["$ai_output_tokens"] == 15
+
+
 @pytest.mark.asyncio
 async def test_async_chat_with_web_search(mock_client):
     """Test that web search count is properly tracked in async non-streaming mode."""

From 5c9b6fb49be3d26e800785bd33f3368c3f5325bf Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Mon, 3 Nov 2025 15:30:19 -0500
Subject: [PATCH 5/9] fix(llma): fix Gemini

---
 posthog/ai/gemini/gemini_converter.py |  19 +++-
 posthog/test/ai/gemini/test_gemini.py | 127 ++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py
index 4fc2b2ba..b7996fa8 100644
--- a/posthog/ai/gemini/gemini_converter.py
+++ b/posthog/ai/gemini/gemini_converter.py
@@ -343,7 +343,7 @@ def extract_gemini_web_search_count(response: Any) -> int:
     Extract web search count from Gemini response.
 
     Gemini bills per request that uses grounding, not per query.
-    Returns 1 if grounding_metadata is present, 0 otherwise.
+    Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
 
     Args:
         response: The response from Gemini API
@@ -359,7 +359,21 @@ def extract_gemini_web_search_count(response: Any) -> int:
                 hasattr(candidate, "grounding_metadata")
                 and candidate.grounding_metadata
             ):
-                return 1
+                grounding_metadata = candidate.grounding_metadata
+
+                # Check if web_search_queries exists and is non-empty
+                if hasattr(grounding_metadata, "web_search_queries"):
+                    queries = grounding_metadata.web_search_queries
+
+                    if queries is not None and len(queries) > 0:
+                        return 1
+
+                # Check if grounding_chunks exists and is non-empty
+                if hasattr(grounding_metadata, "grounding_chunks"):
+                    chunks = grounding_metadata.grounding_chunks
+
+                    if chunks is not None and len(chunks) > 0:
+                        return 1
 
             # Also check for google_search or grounding in function call names
             if hasattr(candidate, "content") and candidate.content:
@@ -369,6 +383,7 @@ def extract_gemini_web_search_count(response: Any) -> int:
                             function_name = getattr(
                                 part.function_call, "name", ""
                             ).lower()
+
                             if (
                                 "google_search" in function_name
                                 or "grounding" in function_name
diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py
index 581bdcdf..1379d4e2 100644
--- a/posthog/test/ai/gemini/test_gemini.py
+++ b/posthog/test/ai/gemini/test_gemini.py
@@ -1114,3 +1114,130 @@ async def mock_async_streaming_response():
     assert props["$ai_web_search_count"] == 1
     assert props["$ai_input_tokens"] == 30
     assert props["$ai_output_tokens"] == 15
+
+
+def test_empty_grounding_metadata_no_web_search(mock_client, mock_google_genai_client):
+    """Test that empty grounding_metadata (all null fields) does not count as web search."""
+
+    # Create mock response with empty grounding metadata (all null fields)
+    mock_response = MagicMock()
+
+    # Mock usage metadata
+    mock_usage = MagicMock()
+    mock_usage.prompt_token_count = 10
+    mock_usage.candidates_token_count = 10
+    mock_usage.cached_content_token_count = 0
+    mock_usage.thoughts_token_count = 0
+    mock_response.usage_metadata = mock_usage
+
+    # Mock empty grounding metadata (all fields are None)
+    mock_grounding_metadata = MagicMock()
+    mock_grounding_metadata.web_search_queries = None
+    mock_grounding_metadata.grounding_chunks = None
+    mock_grounding_metadata.grounding_supports = None
+    mock_grounding_metadata.retrieval_metadata = None
+    mock_grounding_metadata.retrieval_queries = None
+    mock_grounding_metadata.search_entry_point = None
+
+    # Mock text part
+    mock_text_part = MagicMock()
+    mock_text_part.text = "Hey there! How can I help you today?"
+    type(mock_text_part).text = mock_text_part.text
+
+    # Mock content with parts
+    mock_content = MagicMock()
+    mock_content.parts = [mock_text_part]
+
+    # Mock candidate with empty grounding metadata
+    mock_candidate = MagicMock()
+    mock_candidate.content = mock_content
+    mock_candidate.grounding_metadata = mock_grounding_metadata
+    type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata
+
+    mock_response.candidates = [mock_candidate]
+    mock_response.text = "Hey there! How can I help you today?"
+
+    # Mock the generate_content method
+    mock_google_genai_client.models.generate_content.return_value = mock_response
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+
+    response = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents="Hello",
+        posthog_distinct_id="test-id",
+    )
+
+    assert response == mock_response
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is 0 (not present in properties when 0)
+    assert "$ai_web_search_count" not in props
+    assert props["$ai_input_tokens"] == 10
+    assert props["$ai_output_tokens"] == 10
+
+
+def test_empty_array_grounding_metadata_no_web_search(
+    mock_client, mock_google_genai_client
+):
+    """Test that grounding_metadata with empty arrays does not count as web search."""
+
+    # Create mock response with grounding metadata having empty arrays
+    mock_response = MagicMock()
+
+    # Mock usage metadata
+    mock_usage = MagicMock()
+    mock_usage.prompt_token_count = 15
+    mock_usage.candidates_token_count = 12
+    mock_usage.cached_content_token_count = 0
+    mock_usage.thoughts_token_count = 0
+    mock_response.usage_metadata = mock_usage
+
+    # Mock grounding metadata with empty arrays
+    mock_grounding_metadata = MagicMock()
+    mock_grounding_metadata.web_search_queries = []
+    mock_grounding_metadata.grounding_chunks = []
+    mock_grounding_metadata.grounding_supports = []
+
+    # Mock text part
+    mock_text_part = MagicMock()
+    mock_text_part.text = "I can help with that."
+    type(mock_text_part).text = mock_text_part.text
+
+    # Mock content with parts
+    mock_content = MagicMock()
+    mock_content.parts = [mock_text_part]
+
+    # Mock candidate with grounding metadata containing empty arrays
+    mock_candidate = MagicMock()
+    mock_candidate.content = mock_content
+    mock_candidate.grounding_metadata = mock_grounding_metadata
+    type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata
+
+    mock_response.candidates = [mock_candidate]
+    mock_response.text = "I can help with that."
+
+    # Mock the generate_content method
+    mock_google_genai_client.models.generate_content.return_value = mock_response
+
+    client = Client(api_key="test-key", posthog_client=mock_client)
+
+    response = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents="What can you do?",
+        posthog_distinct_id="test-id",
+    )
+
+    assert response == mock_response
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    # Verify web search count is 0 (not present in properties when 0)
+    assert "$ai_web_search_count" not in props
+    assert props["$ai_input_tokens"] == 15
+    assert props["$ai_output_tokens"] == 12

From e6e417c29dc31c6861d26e3f54be1293f3147341 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Mon, 3 Nov 2025 17:07:10 -0500
Subject: [PATCH 6/9] fix(llma): fix OpenAI's Chat Completions streaming

---
 posthog/ai/openai/openai_converter.py | 48 ++++++++++++++++++++++-----
 posthog/ai/utils.py                   |  2 +-
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py
index 76ee0162..e7b8ce5e 100644
--- a/posthog/ai/openai/openai_converter.py
+++ b/posthog/ai/openai/openai_converter.py
@@ -263,7 +263,7 @@ def extract_openai_web_search_count(response: Any) -> int:
     1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
     2. Priority 2 (binary detection): Check for various web search indicators:
        - Root-level citations, search_results, or usage.search_context_size (Perplexity)
-       - Annotations with type "url_citation" in choices/output
+       - Annotations with type "url_citation" in choices/output (including delta for streaming)
 
     Args:
         response: The response from OpenAI API
@@ -275,6 +275,7 @@ def extract_openai_web_search_count(response: Any) -> int:
     # Priority 1: Check for exact count in Responses API output
     if hasattr(response, "output"):
         web_search_count = 0
+
         for item in response.output:
             if hasattr(item, "type") and item.type == "web_search_call":
                 web_search_count += 1
@@ -289,11 +290,13 @@ def extract_openai_web_search_count(response: Any) -> int:
     # Check root-level indicators (Perplexity)
     if hasattr(response, "citations"):
         citations = getattr(response, "citations")
+
         if citations and len(citations) > 0:
             return 1
 
     if hasattr(response, "search_results"):
         search_results = getattr(response, "search_results")
+
         if search_results and len(search_results) > 0:
             return 1
 
@@ -304,14 +307,36 @@ def extract_openai_web_search_count(response: Any) -> int:
     # Check for url_citation annotations in choices (Chat Completions)
     if hasattr(response, "choices"):
         for choice in response.choices:
+            # Check message.annotations (non-streaming or final chunk)
             if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
                 annotations = choice.message.annotations
+
                 if annotations:
                     for annotation in annotations:
-                        if (
-                            hasattr(annotation, "type")
-                            and annotation.type == "url_citation"
-                        ):
+                        # Support both dict and object formats
+                        annotation_type = (
+                            annotation.get("type")
+                            if isinstance(annotation, dict)
+                            else getattr(annotation, "type", None)
+                        )
+
+                        if annotation_type == "url_citation":
+                            return 1
+
+            # Check delta.annotations (streaming chunks)
+            if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
+                annotations = choice.delta.annotations
+
+                if annotations:
+                    for annotation in annotations:
+                        # Support both dict and object formats
+                        annotation_type = (
+                            annotation.get("type")
+                            if isinstance(annotation, dict)
+                            else getattr(annotation, "type", None)
+                        )
+
+                        if annotation_type == "url_citation":
                             return 1
 
     # Check for url_citation annotations in output (Responses API)
@@ -321,12 +346,17 @@ def extract_openai_web_search_count(response: Any) -> int:
                 for content_item in item.content:
                     if hasattr(content_item, "annotations"):
                         annotations = content_item.annotations
+
                         if annotations:
                             for annotation in annotations:
-                                if (
-                                    hasattr(annotation, "type")
-                                    and annotation.type == "url_citation"
-                                ):
+                                # Support both dict and object formats
+                                annotation_type = (
+                                    annotation.get("type")
+                                    if isinstance(annotation, dict)
+                                    else getattr(annotation, "type", None)
+                                )
+
+                                if annotation_type == "url_citation":
                                     return 1
 
     return 0
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 03b37600..559860cc 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -57,7 +57,7 @@ def merge_usage_stats(
         source_web_search = source.get("web_search_count")
         if source_web_search is not None:
             current = target.get("web_search_count") or 0
-            target["web_search_count"] = current + source_web_search
+            target["web_search_count"] = max(current, source_web_search)
 
     elif mode == "cumulative":
         # Replace with latest values (already cumulative)

From 1bcec76b57a780963719db2400387998197fca69 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Mon, 3 Nov 2025 17:16:49 -0500
Subject: [PATCH 7/9] fix(llma): fix mypy issue

---
 posthog/ai/gemini/gemini_converter.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/posthog/ai/gemini/gemini_converter.py b/posthog/ai/gemini/gemini_converter.py
index b7996fa8..bfa3625a 100644
--- a/posthog/ai/gemini/gemini_converter.py
+++ b/posthog/ai/gemini/gemini_converter.py
@@ -472,8 +472,7 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
     usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
 
     # Merge the usage from metadata with any web search count we found
-    for key, value in usage_from_metadata.items():
-        usage[key] = value
+    usage.update(usage_from_metadata)
 
     return usage
 

From 84ab551a80d851232d87f13f113adcbd170e309c Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Tue, 4 Nov 2025 11:47:10 -0500
Subject: [PATCH 8/9] fix(llma): anthropic streaming

---
 posthog/ai/anthropic/anthropic_async.py | 97 ++++++++-----------------
 1 file changed, 30 insertions(+), 67 deletions(-)

diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py
index 527b73f9..d83a9828 100644
--- a/posthog/ai/anthropic/anthropic_async.py
+++ b/posthog/ai/anthropic/anthropic_async.py
@@ -14,14 +14,9 @@
 from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
 from posthog.ai.utils import (
     call_llm_and_track_usage_async,
-    extract_available_tool_calls,
-    get_model_params,
-    merge_system_prompt,
     merge_usage_stats,
-    with_privacy_mode,
 )
 from posthog.ai.anthropic.anthropic_converter import (
-    format_anthropic_streaming_content,
     extract_anthropic_usage_from_event,
     handle_anthropic_content_block_start,
     handle_anthropic_text_delta,
@@ -220,66 +215,34 @@ async def _capture_streaming_event(
         content_blocks: List[StreamingContentBlock],
         accumulated_content: str,
     ):
-        if posthog_trace_id is None:
-            posthog_trace_id = str(uuid.uuid4())
-
-        # Format output using converter
-        formatted_content = format_anthropic_streaming_content(content_blocks)
-        formatted_output = []
-
-        if formatted_content:
-            formatted_output = [{"role": "assistant", "content": formatted_content}]
-        else:
-            # Fallback to accumulated content if no blocks
-            formatted_output = [
-                {
-                    "role": "assistant",
-                    "content": [{"type": "text", "text": accumulated_content}],
-                }
-            ]
-
-        event_properties = {
-            "$ai_provider": "anthropic",
-            "$ai_model": kwargs.get("model"),
-            "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
-            ),
-            "$ai_output_choices": with_privacy_mode(
-                self._client._ph_client,
-                posthog_privacy_mode,
-                formatted_output,
-            ),
-            "$ai_http_status": 200,
-            "$ai_input_tokens": usage_stats.get("input_tokens", 0),
-            "$ai_output_tokens": usage_stats.get("output_tokens", 0),
-            "$ai_cache_read_input_tokens": usage_stats.get(
-                "cache_read_input_tokens", 0
-            ),
-            "$ai_cache_creation_input_tokens": usage_stats.get(
-                "cache_creation_input_tokens", 0
+        from posthog.ai.types import StreamingEventData
+        from posthog.ai.anthropic.anthropic_converter import (
+            format_anthropic_streaming_input,
+            format_anthropic_streaming_output_complete,
+        )
+        from posthog.ai.utils import capture_streaming_event
+
+        # Prepare standardized event data
+        formatted_input = format_anthropic_streaming_input(kwargs)
+        sanitized_input = sanitize_anthropic(formatted_input)
+
+        event_data = StreamingEventData(
+            provider="anthropic",
+            model=kwargs.get("model", "unknown"),
+            base_url=str(self._client.base_url),
+            kwargs=kwargs,
+            formatted_input=sanitized_input,
+            formatted_output=format_anthropic_streaming_output_complete(
+                content_blocks, accumulated_content
             ),
-            "$ai_latency": latency,
-            "$ai_trace_id": posthog_trace_id,
-            "$ai_base_url": str(self._client.base_url),
-            **(posthog_properties or {}),
-        }
-
-        # Add tools if available
-        available_tools = extract_available_tool_calls("anthropic", kwargs)
-
-        if available_tools:
-            event_properties["$ai_tools"] = available_tools
-
-        if posthog_distinct_id is None:
-            event_properties["$process_person_profile"] = False
-
-        if hasattr(self._client._ph_client, "capture"):
-            self._client._ph_client.capture(
-                distinct_id=posthog_distinct_id or posthog_trace_id,
-                event="$ai_generation",
-                properties=event_properties,
-                groups=posthog_groups,
-            )
+            usage_stats=usage_stats,
+            latency=latency,
+            distinct_id=posthog_distinct_id,
+            trace_id=posthog_trace_id,
+            properties=posthog_properties,
+            privacy_mode=posthog_privacy_mode,
+            groups=posthog_groups,
+        )
+
+        # Use the common capture function
+        capture_streaming_event(self._client._ph_client, event_data)

From cc0cfc7b38330d9dccd84d5289ba6d40bd230677 Mon Sep 17 00:00:00 2001
From: Radu Raicea <radu@raicea.com>
Date: Tue, 4 Nov 2025 14:40:46 -0500
Subject: [PATCH 9/9] fix(llma): remove async gemini tests

---
 posthog/test/ai/gemini/test_gemini.py | 145 --------------------------
 1 file changed, 145 deletions(-)

diff --git a/posthog/test/ai/gemini/test_gemini.py b/posthog/test/ai/gemini/test_gemini.py
index 1379d4e2..e0c216a0 100644
--- a/posthog/test/ai/gemini/test_gemini.py
+++ b/posthog/test/ai/gemini/test_gemini.py
@@ -971,151 +971,6 @@ def mock_streaming_response():
     assert props["$ai_output_tokens"] == 15
 
 
-@pytest.mark.asyncio
-async def test_async_with_web_search(mock_client, mock_google_genai_client):
-    """Test that web search count is properly tracked in async non-streaming mode."""
-
-    # Create mock response with grounding metadata
-    mock_response = MagicMock()
-
-    # Mock usage metadata
-    mock_usage = MagicMock()
-    mock_usage.prompt_token_count = 60
-    mock_usage.candidates_token_count = 40
-    mock_usage.cached_content_token_count = 0
-    mock_usage.thoughts_token_count = 0
-    mock_response.usage_metadata = mock_usage
-
-    # Mock grounding metadata
-    mock_grounding_chunk = MagicMock()
-    mock_grounding_chunk.uri = "https://example.com"
-
-    mock_grounding_metadata = MagicMock()
-    mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
-
-    # Mock text part
-    mock_text_part = MagicMock()
-    mock_text_part.text = "According to search results..."
-    type(mock_text_part).text = mock_text_part.text
-
-    # Mock content with parts
-    mock_content = MagicMock()
-    mock_content.parts = [mock_text_part]
-
-    # Mock candidate with grounding metadata
-    mock_candidate = MagicMock()
-    mock_candidate.content = mock_content
-    mock_candidate.grounding_metadata = mock_grounding_metadata
-    type(mock_candidate).grounding_metadata = mock_candidate.grounding_metadata
-
-    mock_response.candidates = [mock_candidate]
-    mock_response.text = "According to search results..."
-
-    # Mock the async generate_content method
-    async def mock_async_generate_content(*args, **kwargs):
-        return mock_response
-
-    mock_google_genai_client.models.generate_content_async = mock_async_generate_content
-
-    client = Client(api_key="test-key", posthog_client=mock_client)
-
-    async def run_test():
-        response = await client.models.generate_content_async(
-            model="gemini-2.5-flash",
-            contents="What's the latest news?",
-            posthog_distinct_id="test-id",
-        )
-        return response
-
-    response = await run_test()
-
-    assert response == mock_response
-    assert mock_client.capture.call_count == 1
-
-    call_args = mock_client.capture.call_args[1]
-    props = call_args["properties"]
-
-    # Verify web search count is detected (binary for grounding)
-    assert props["$ai_web_search_count"] == 1
-    assert props["$ai_input_tokens"] == 60
-    assert props["$ai_output_tokens"] == 40
-
-
-@pytest.mark.asyncio
-async def test_async_streaming_with_web_search(mock_client, mock_google_genai_client):
-    """Test that web search count is properly captured in async streaming mode."""
-
-    async def mock_async_streaming_response():
-        # Create chunk 1 with grounding metadata
-        mock_chunk1 = MagicMock()
-        mock_chunk1.text = "According to "
-
-        mock_usage1 = MagicMock()
-        mock_usage1.prompt_token_count = 30
-        mock_usage1.candidates_token_count = 5
-        mock_usage1.cached_content_token_count = 0
-        mock_usage1.thoughts_token_count = 0
-        mock_chunk1.usage_metadata = mock_usage1
-
-        # Add grounding metadata to first chunk
-        mock_grounding_chunk = MagicMock()
-        mock_grounding_chunk.uri = "https://example.com"
-
-        mock_grounding_metadata = MagicMock()
-        mock_grounding_metadata.grounding_chunks = [mock_grounding_chunk]
-
-        mock_candidate1 = MagicMock()
-        mock_candidate1.grounding_metadata = mock_grounding_metadata
-        type(mock_candidate1).grounding_metadata = mock_candidate1.grounding_metadata
-
-        mock_chunk1.candidates = [mock_candidate1]
-
-        # Create chunk 2
-        mock_chunk2 = MagicMock()
-        mock_chunk2.text = "search results..."
-
-        mock_usage2 = MagicMock()
-        mock_usage2.prompt_token_count = 30
-        mock_usage2.candidates_token_count = 15
-        mock_usage2.cached_content_token_count = 0
-        mock_usage2.thoughts_token_count = 0
-        mock_chunk2.usage_metadata = mock_usage2
-
-        mock_candidate2 = MagicMock()
-        mock_chunk2.candidates = [mock_candidate2]
-
-        yield mock_chunk1
-        yield mock_chunk2
-
-    # Mock the async generate_content_stream method
-    mock_google_genai_client.models.generate_content_stream_async = (
-        mock_async_streaming_response
-    )
-
-    client = Client(api_key="test-key", posthog_client=mock_client)
-
-    response = await client.models.generate_content_stream_async(
-        model="gemini-2.5-flash",
-        contents="What's the latest news?",
-        posthog_distinct_id="test-id",
-    )
-
-    chunks = []
-    async for chunk in response:
-        chunks.append(chunk)
-
-    assert len(chunks) == 2
-    assert mock_client.capture.call_count == 1
-
-    call_args = mock_client.capture.call_args[1]
-    props = call_args["properties"]
-
-    # Verify web search count is detected (binary for grounding)
-    assert props["$ai_web_search_count"] == 1
-    assert props["$ai_input_tokens"] == 30
-    assert props["$ai_output_tokens"] == 15
-
-
 def test_empty_grounding_metadata_no_web_search(mock_client, mock_google_genai_client):
     """Test that empty grounding_metadata (all null fields) does not count as web search."""