In [None]:
%pip install google-cloud-aiplatform google-auth pydantic python-dotenv

In [39]:
import os
import json
from typing import Iterator, List, Union, Dict, Any, Optional
import base64
import re

from pydantic import BaseModel, Field

# Google Auth
from google.oauth2 import service_account
from google.oauth2.service_account import Credentials

# Google Vertex SDK deps
import vertexai
from vertexai.generative_models import (
    Content,
    GenerativeModel,
    GenerationResponse,
    HarmBlockThreshold,
    HarmCategory,
    Part,
    Image,
)
from vertexai.preview.generative_models import Tool, grounding, GenerationConfig

# Google GenAI SDK deps
from google import genai
from google.genai.types import (
    GenerateContentConfig,
    GoogleSearch,
    HttpOptions,
    Tool,
)

# local env mgmt, don't use in production
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
SERVICE_ACCOUNT_JSON = os.getenv("VERTEX_API_KEY_JSON", "")

SERVICE_ACCOUNT_JSON = SERVICE_ACCOUNT_JSON.strip().replace("\r", "\\r") \
                                         .replace("\n", "\\n")

key_info = json.loads(SERVICE_ACCOUNT_JSON)

creds = Credentials.from_service_account_info(
    key_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
)

In [53]:
def get_content_from_message(message: dict) -> Optional[str]:
    if isinstance(message["content"], list):
        for item in message["content"]:
            if item["type"] == "text":
                return item["text"]
    else:
        return message["content"]
    return None

def get_last_assistant_message(messages: list[dict]) -> Optional[str]:
    for message in reversed(messages):
        if message["role"] == "assistant":
            return get_content_from_message(message)
    return None


def outlet(body: dict, user: Optional[dict] = None) -> dict:
    print(f"outlet:{__name__}")

    messages = body["messages"]
    assistant_message = get_last_assistant_message(messages)
    
    # print(f"Current msg: \n{assistant_message}")

    if "<ws_text>" in assistant_message:
        # Translate assistant message
        pattern = re.compile(
                r"<ws_text>(?P<text>.*?)<ws_url>(?P<url>.*?)</ws_url></ws_text>",
                re.DOTALL
            )
        
        pairs = [(m.group("text"), m.group("url")) for m in pattern.finditer(assistant_message)]
        # print(f"pairs is {pairs}")
        
        new_assistant_message = pattern.sub('', assistant_message)
        
        sources = set()
        for text, url in pairs:
            new_assistant_message = new_assistant_message.replace(text, f"{text} [{url}]")
            urls = url.split(", ")
            for url in urls:
                sources.add(url)
        
        new_assistant_message += f"\nSources:\n"
        for i, url in enumerate(sources):
            new_assistant_message += f"- {i+1}. {url}\n"
        
        print(f"New assistant message: \n{new_assistant_message}")
    
    assistant_message = new_assistant_message

    for message in reversed(messages):
        if message["role"] == "assistant":
            message["content"] = assistant_message
            break

    body = {**body, "messages": messages}
    return body

# 4. Initialize the GenAI client with explicit credentials
client = genai.Client(
    vertexai=True,
    project="tts-datagov",
    location="us-central1",
    credentials=creds,
    http_options=HttpOptions(api_version="v1"),
)

# 5. Use it exactly as before
response_text = ""
for chunk in client.models.generate_content_stream(
    model="gemini-2.0-flash-001",
    contents="What is the most recent google gemini model?",
    config=GenerateContentConfig(
        tools=[
            Tool(google_search=GoogleSearch()),  # Use Google Search Tool
        ],
        temperature=0.0,
        top_p=1.0
    ),
):
    urls = []
    text_index_pairs = []
    # print(chunk, end="")
    for candidate in chunk.candidates:
        print(chunk.text)
        # print("\n====\n")
        if candidate.grounding_metadata:
            if candidate.grounding_metadata.grounding_chunks:
                for i, grounding_chunk in enumerate(candidate.grounding_metadata.grounding_chunks):
                    web_uri = f"{grounding_chunk.web.uri}"
                    domain = f"{grounding_chunk.web.domain}"
                    markdown_link =f"[{domain}]({web_uri}) 🔗"
                    # print(f"Grounding metadata index: {i}")
                    # print(f"{markdown_link}")
                    # print("\n====\n")
                    urls.append(markdown_link)
            if candidate.grounding_metadata.grounding_supports:
                for i, grounding_support in enumerate(candidate.grounding_metadata.grounding_supports):
                    # print(f"Grounding support index: {i}")
                    # print(f"Grounding support: {grounding_support.segment.text}")
                    # print(f"Indices: {grounding_support.grounding_chunk_indices}")
                    # print("\n====\n")
                    text_index_pairs.append([grounding_support.segment.text, grounding_support.grounding_chunk_indices])
        # print("\n====\n")
    
    response_text += chunk.text
    
    if text_index_pairs:
        for text, indices in text_index_pairs:
            md_links = ""
            for index in indices:
                if index < len(urls):
                    url = urls[index]
                    md_links += f", {url}" if md_links else f"{url}"
            citation = f"\n<ws_text>{text}<ws_url>{md_links}</ws_url></ws_text>"
            response_text += citation
            # print(f"citation: {citation}")
            # print("\n====\n")

        
from IPython.display import clear_output
clear_output()
new_body = outlet(body={"messages": [{"role": "assistant", "content": response_text}]})
# print(new_body['messages'][0]['content'])

outlet:__main__
New assistant message: 
The most recent Google Gemini models are:

*   **Gemini 2.5 Pro Preview:** Launched on May 6, 2025, this model is Google's most advanced for complex tasks, leading on benchmarks for reasoning and code capabilities [[googleblog.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGUctdDzP874B2d0C4Dyk9FPuNk1eQbiBScnM74AXL2A746ZU-H194Q5QajpA80KFinvOvFoo5a-Wl8ge-yw98KtB1-3iIsQ61JxwDu5hfMFAn8p5IdjjtEOM-RMSuHPMC5iKh9YjNlPb15gwNdXJYJXVhJrgMwbgD_NNo_RCQ=) 🔗, [blog.google](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHir41lrwj6I3mjn097743bI73wUpjsTGKcLEnOVmyneN1AF_ftUxdHGRY-gXsg-Emm7NTCAbEHTc0k-3fqiIEJaOpbq9Ag39KPPbk6esONB87TnP3Mp065mb2KuqD7sj1AjBeBE12quFLA571PbfxRyUfHGd8775f9qkP25Da9QzG2-nb_5aX2QZyG1xoU-ifNMw==) 🔗]. It is available in Google AI Studio and the Gemini app for Gemini Advanced users [[blog.google](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHir41lrwj6I3mjn097743bI73wUpjsTG

As of May 10, 2025, the most recent Google Gemini models are:

*   **Gemini 2.5 Pro Preview:** This model is described as Google's most advanced model for complex tasks, leading on benchmarks for reasoning and code capabilities [[blog.google](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFb-r7nVjicBd8cNzKz2qhdjyvppxvm2h9uAZO9To-PutW01bZI41uCYWLkcn-tytPthuZirOTDcjVQHAnYVujnKd_VFMbE3f1YW454-RZfqxm-TrT8J-QB_Bf8erTG4s_6kKLhGaAhC5eB2AD8JFuTthqtT-UKQCmAo3yZ0UpsDJZ90i1W6NaxsauwTxpDd-1YxfM=) 🔗, [google.dev](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEkHJQG-bGilRq9-94akRz4lzd_jz9Fx_tMR3MnAI4Xz8LUGsdvWK2I0-O5B8ppOI35DF36IdQxKsvIcnNqS644uWW5L65ukjL87AsCLciqpEZSAvtmzZ8nUeU1wMkCc2mPrhcNqX_M) 🔗]. It is available in Google AI Studio and the Gemini app for Gemini Advanced users [[blog.google](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFb-r7nVjicBd8cNzKz2qhdjyvppxvm2h9uAZO9To-PutW01bZI41uCYWLkcn-tytPthuZirOTDcjVQHAnYVujnKd_VFMbE3f1YW454-RZfqxm-TrT8J-QB_Bf8erTG4s_6kKLhGaAhC5eB2AD8JFuTthqtT-UKQCmAo3yZ0UpsDJZ90i1W6NaxsauwTxpDd-1YxfM=) 🔗].
*   **Gemini 2.5 Flash:** This model is a highly competitive alternative for cost-sensitive applications [[googleblog.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHnPBbDVOfBxAYtOXyboxZKYDfuOMegOurRp8qL71eQ_gOero2T_4jyiszX1zIqybnFVXK9YHG3D3Cm4vFNTQJ9wPHt66zu-JkfMWtXnr-0C7sT1l59rdq6hZOrBb77usqA4BI0S3fm5f7S7muV8b5b2IN380KOuubAqirG8Dha) 🔗].
*   **Gemini 2.0 Flash:** This model is designed for fast responses and strong performance in tasks like brainstorming, learning, and writing [[google.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2cVnvym1Y5LaF_mL2xKzCmcFFpQl1z5PiWvCwBU-ZdBlfs1Q5AlJDeq5ZD9-arLbNRWjJUN6aKi-pumnI6dCeCdnGiLX8KfTcD_6Rt-gJAxZHvqCWXgSCK5UVwg==) 🔗]. An enhanced version with a more natural conversational style was released on April 19, 2025 [[google.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2cVnvym1Y5LaF_mL2xKzCmcFFpQl1z5PiWvCwBU-ZdBlfs1Q5AlJDeq5ZD9-arLbNRWjJUN6aKi-pumnI6dCeCdnGiLX8KfTcD_6Rt-gJAxZHvqCWXgSCK5UVwg==) 🔗].






Sources:
- 1. [googleblog.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHnPBbDVOfBxAYtOXyboxZKYDfuOMegOurRp8qL71eQ_gOero2T_4jyiszX1zIqybnFVXK9YHG3D3Cm4vFNTQJ9wPHt66zu-JkfMWtXnr-0C7sT1l59rdq6hZOrBb77usqA4BI0S3fm5f7S7muV8b5b2IN380KOuubAqirG8Dha) 🔗
- 2. [blog.google](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFb-r7nVjicBd8cNzKz2qhdjyvppxvm2h9uAZO9To-PutW01bZI41uCYWLkcn-tytPthuZirOTDcjVQHAnYVujnKd_VFMbE3f1YW454-RZfqxm-TrT8J-QB_Bf8erTG4s_6kKLhGaAhC5eB2AD8JFuTthqtT-UKQCmAo3yZ0UpsDJZ90i1W6NaxsauwTxpDd-1YxfM=) 🔗
- 3. [google.dev](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEkHJQG-bGilRq9-94akRz4lzd_jz9Fx_tMR3MnAI4Xz8LUGsdvWK2I0-O5B8ppOI35DF36IdQxKsvIcnNqS644uWW5L65ukjL87AsCLciqpEZSAvtmzZ8nUeU1wMkCc2mPrhcNqX_M) 🔗
- 4. [google.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2cVnvym1Y5LaF_mL2xKzCmcFFpQl1z5PiWvCwBU-ZdBlfs1Q5AlJDeq5ZD9-arLbNRWjJUN6aKi-pumnI6dCeCdnGiLX8KfTcD_6Rt-gJAxZHvqCWXgSCK5UVwg==) 🔗