In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get Started with Vertex AI Prompt Optimizer - Tool usage

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fprompts%2Fprompt_optimizer%2Fget_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/prompt_optimizer/get_started_with_vertex_ai_prompt_optimizer_tool_usage.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>  

| Author(s) |
| --- |
| [Ivan Nardini](https://github.com/inardini) |

## Overview

When developing with large language models, crafting the perfect prompt—a process known as prompt engineering—is both an art and a science. It can be time-consuming and challenging to write prompts that consistently produce the desired results. Furthermore, as new and improved models are released, prompts that worked well before may need to be updated.

To address these challenges, Vertex AI offers the **Prompt Optimizer**, a prompt optimization tool to help you refine and enhance your prompts automatically. This notebook serves as a comprehensive guide to both of its  approaches: the **Zero-Shot Optimizer** and the **Data-Driven Optimizer**.

### The two approaches to prompt optimization

#### 1\. Zero-Shot Optimizer

This is your go-to tool for rapid prompt refinement and generation *without* needing an evaluation dataset.

  * **Generate from Scratch**: Simply describe a task in plain language, and it will generate a complete, well-structured system instruction for you.
  * **Refine Existing Prompts**: Provide an existing prompt, and it will rewrite it based on established best practices for clarity, structure, and effectiveness.

#### 2\. Data-Driven Optimizer

This tool performs a deep, performance-based optimization that uses your data to measure success.

  * **Tune for Performance**: You provide a dataset of sample inputs and expected outputs, and it systematically tests and rewrites your system instructions to find the version that scores highest on the evaluation metrics you define.
  * **Task-Specific**: It's the ideal choice when you want to fine-tune a prompt for a specific task and have data to prove what "better" looks like.

In this tutorial, we'll show how to leverage the **Data-Driven Optimizer** to optimize for tool usage with a Gemini model. The goal is to use Vertex AI prompt optimizer to find a new prompt template which improves the model's ability to predict valid tool (function) calls given user's request.


## Get started

### Install required packages

This command installs the necessary Python libraries.


In [None]:
%pip install "google-cloud-aiplatform>=1.108.0" "pydantic" "etils" "protobuf==4.25.3" --force-reinstall --quiet

### Authenticate your notebook environment (Colab only)

If you are running this notebook in Google Colab, this cell handles authentication, allowing the notebook to securely access your Google Cloud resources.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

Here, we define essential variables for our Google Cloud project. The Prompt Optimizer job will run within a Google Cloud project. You need to [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com) and use the specified Cloud Storage bucket to read input data and write results.

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

PROJECT_NUMBER = !gcloud projects describe {PROJECT_ID} --format="get(projectNumber)"[0]
PROJECT_NUMBER = PROJECT_NUMBER[0]

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

BUCKET_NAME = "[your-bucket-name]"  # @param {type: "string", placeholder: "[your-bucket-name]", isTemplate: true}
BUCKET_URI = f"gs://{BUCKET_NAME}"

! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

import vertexai

client = vertexai.Client(project=PROJECT_ID, location=LOCATION)

### Service account and permissions

The Prompt Optimizer runs as a backend job that needs permission to perform actions on your behalf. We grant the necessary IAM roles to the default Compute Engine service account, which the job uses to operate.

  * `Vertex AI User`: Allows the job to call Vertex AI models.
  * `Storage Object Admin`: Allows the job to read your dataset from and write results to your GCS bucket.
  * `Artifact Registry Reader`: Allows the job to download necessary components.

[Check out the documentation](https://cloud.google.com/iam/docs/manage-access-service-accounts#iam-view-access-sa-gcloud) to learn how to grant those permissions to a single service account.

In [None]:
SERVICE_ACCOUNT = f"{PROJECT_NUMBER}-compute@developer.gserviceaccount.com"

for role in ['aiplatform.user', 'storage.objectAdmin', 'artifactregistry.reader']:

    ! gcloud projects add-iam-policy-binding {PROJECT_ID} \
      --member=serviceAccount:{SERVICE_ACCOUNT} \
      --role=roles/{role} --condition=None

### Import libraries

In [None]:
import json
import logging
from typing import Any, Dict, List, Optional, Tuple

from jsonschema import ValidationError, validate
import pandas as pd
from etils import epath
from google.cloud import storage
from vertexai.generative_models import FunctionDeclaration, Tool, ToolConfig
from pydantic import BaseModel, Field

logging.basicConfig(level=logging.INFO, force=True)

### Helpers

In [None]:
def get_company_information_api(content: dict[str, Any]) -> str:
    "A function to simulate an API call to collect company information."

    company_overviews = {
        "AAPL": "Apple maintains a robust financial position with substantial cash reserves and consistent profitability, fueled by its strong brand and loyal customer base. However, growth is slowing and the company faces competition.",
        "ADBE": "Adobe financials are robust, driven by its successful transition to a subscription-based model for its creative and document cloud software.  Profitability and revenue growth are strong.",
        "AMD": "AMD exhibits strong financial performance, gaining market share in the CPU and GPU markets.  Revenue growth and profitability are healthy, driven by strong product offerings.",
        "AMZN": "Amazon financials are mixed, with its e-commerce business facing margin pressure while its cloud computing division (AWS) delivers strong profitability and growth. Its overall revenue remains high but profitability is a concern.",
        "ASML": "ASML boasts a strong financial position due to its monopoly in the extreme ultraviolet lithography market, essential for advanced semiconductor manufacturing.  High profitability and growth are key strengths.",
        "AVGO": "Broadcom maintains healthy financials, driven by its semiconductor and infrastructure software solutions. Acquisitions have played a role in its growth strategy, with consistent profitability and cash flow.",
        "BABA": "Alibaba financials are substantial but facing challenges from regulatory scrutiny in China and increased competition.  E-commerce revenue remains strong but growth is slowing.",
        "BKNG": "Booking Holdings financials are closely tied to the travel industry.  Revenue growth is recovering post-pandemic but profitability can fluctuate based on global travel trends.",
        "CRM": "Salesforce shows robust revenue growth from its cloud-based CRM solutions.  Profitability is improving but competition remains strong.",
        "CSCO": "Cisco financials show moderate growth, transitioning from hardware to software and services.  Profitability is stable but the company faces competition in the networking market.",
        "GOOGL": "Alphabet exhibits strong financials driven by advertising revenue, though facing regulatory scrutiny.  Diversification into other ventures provides growth opportunities but profitability varies.",
        "IBM": "IBM financials are in a state of transformation, shifting focus to hybrid cloud and AI.  Revenue growth is modest, with profitability impacted by legacy businesses.",
        "INTU": "Intuit showcases healthy financials, benefiting from its strong position in tax and financial management software.  Revenue growth and profitability are consistent, fueled by recurring subscription revenue.",
        "META": "Meta Platforms financial performance is tied closely to advertising revenue, facing headwinds from competition and changing privacy regulations.  Investments in the metaverse represent a long-term, high-risk bet.",
        "MSFT": "Microsoft demonstrates healthy financials, benefiting from diversified revenue streams including cloud computing (Azure), software, and hardware.  The company exhibits consistent growth and profitability.",
        "NFLX": "Netflix exhibits strong revenue but faces challenges in maintaining subscriber growth and managing content costs. Profitability varies, and competition in the streaming market is intense.",
        "NOW": "ServiceNow demonstrates strong financials, fueled by its cloud-based workflow automation platform.  Revenue growth and profitability are high, reflecting increased enterprise adoption.",
        "NVDA": "NVIDIA boasts strong financials, driven by its dominance in the GPU market for gaming, AI, and data centers.  High revenue growth and profitability are key strengths.",
        "ORCL": "Oracle financials are in transition, shifting towards cloud-based services. Revenue growth is moderate, and profitability remains stable.  Legacy businesses still contribute significantly.",
        "QCOM": "QUALCOMM financials show strong performance driven by its leadership in mobile chipsets and licensing.  Profitability is high, and growth is tied to the mobile market and 5G adoption.",
        "SAP": "SAP demonstrates steady financials with its enterprise software solutions.  Transition to the cloud is ongoing and impacting revenue growth and profitability.",
        "SMSN": "Samsung financials are diverse, reflecting its presence in various sectors including mobile phones, consumer electronics, and semiconductors. Profitability varies across divisions but the company holds significant cash reserves.",
        "TCEHY": "Tencent financials are driven by its dominant position in the Chinese gaming and social media market. Revenue growth is strong but regulatory risks in China impact its performance.",
        "TSLA": "Tesla financials show strong revenue growth driven by electric vehicle demand, but profitability remains volatile due to production and investment costs. The company high valuation reflects market optimism for future growth.",
        "TSM": "TSMC, a dominant player in semiconductor manufacturing, showcases robust financials fueled by high demand for its advanced chips. Profitability is strong and the company enjoys a technologically advanced position.",
    }
    return company_overviews.get(content["ticker"], "No company overwiew found")


def get_stock_price_api(content: dict[str, Any]) -> str:
    "A function to simulate an API call to collect most recent stock price for a given company."
    stock_prices = {
        "AAPL": 225,
        "ADBE": 503,
        "AMD": 134,
        "AMZN": 202,
        "ASML": 658,
        "AVGO": 164,
        "BABA": 88,
        "BKNG": 4000,
        "CRM": 325,
        "CSCO": 57,
        "GOOGL": 173,
        "IBM": 201,
        "INTU": 607,
        "META": 553,
        "MSFT": 415,
        "NFLX": 823,
        "NOW": 1000,
        "NVDA": 141,
        "ORCL": 183,
        "QCOM": 160,
        "SAP": 228,
        "SMSN": 38,
        "TCEHY": 51,
        "TSLA": 302,
        "TSM": 186,
    }
    return stock_prices.get(str(content["ticker"]), "No stock price found")


def get_company_news_api(content: dict[str, Any]) -> str:
    "A function to simulate an API call to collect recent news for a given company."
    news_data = {
        "AAPL": "Apple unveils new iPhone, market reaction muted amid concerns about slowing growth.",
        "ADBE": "Adobe integrates AI features into Creative Suite, attracting creative professionals.",
        "AMD": "AMD gains market share in server CPUs, competing with Intel.",
        "AMZN": "Amazon stock dips after reporting lower-than-expected Q3 profits due to increased shipping costs.",
        "ASML": "ASML benefits from high demand for advanced chip manufacturing equipment.",
        "AVGO": "Broadcom announces new acquisition in the semiconductor space.",
        "BABA": "Alibaba stock faces uncertainty amid ongoing regulatory scrutiny in China.",
        "BKNG": "Booking Holdings stock recovers as travel demand rebounds post-pandemic.",
        "CRM": "Salesforce launches new AI-powered CRM tools for enterprise customers.",
        "CSCO": "Cisco stock rises after positive earnings report, focus on networking solutions.",
        "GOOGL": "Alphabet announces new AI-powered search features, aiming to compete with Microsoft.",
        "IBM": "IBM focuses on hybrid cloud solutions, showing steady growth in enterprise segment.",
        "INTU": "Intuit stock dips after announcing price increases for its tax software.",
        "META": "Meta shares rise after positive user growth figures in emerging markets.",
        "MSFT": "Microsoft expands AI integration across its product suite, boosting investor confidence.",
        "NFLX": "Netflix subscriber growth slows, competition heats up in streaming landscape.",
        "NOW": "ServiceNow sees strong growth in its cloud-based workflow automation platform.",
        "NVDA": "Nvidia stock jumps on strong earnings forecast, driven by AI demand.",
        "ORCL": "Oracle cloud revenue continues strong growth, exceeding market expectations.",
        "QCOM": "Qualcomm expands its 5G modem business, partnering with major smartphone manufacturers.",
        "SAP": "SAP cloud transition continues, but faces challenges in attracting new clients.",
        "SMSN": "Samsung unveils new foldable phones, looking to gain market share.",
        "TCEHY": "Tencent faces regulatory pressure in China, impacting investor sentiment.",
        "TSLA": "Tesla stock volatile after price cuts and production increases announced.",
        "TSM": "TSMC reports record chip demand but warns of potential supply chain disruptions.",
    }
    return news_data.get(content["ticker"], "No news available")


def get_company_sentiment_api(content: dict[str, Any]) -> str:
    "A function to simulate an API call to collect market company sentiment for a given company."

    company_sentiment = {
        "AAPL": "Neutral",
        "ADBE": "Neutral",
        "AMD": "Neutral",
        "AMZN": "Neutral",
        "ASML": "Bearish/Undervalued",
        "AVGO": "Neutral",
        "BABA": "Neutral",
        "BKNG": "Neutral",
        "CRM": "Neutral",
        "CSCO": "Neutral",
        "GOOGL": "Neutral",
        "IBM": "Neutral",
        "INTU": "Mixed/Bullish",
        "META": "Neutral",
        "MSFT": "Neutral",
        "NFLX": "Neutral",
        "NOW": "Bullish/Overvalued",
        "NVDA": "Neutral",
        "ORCL": "Neutral",
        "QCOM": "Neutral",
        "SAP": "Neutral",
        "SMSN": "Neutral",
        "TCEHY": "Neutral",
        "TSLA": "Slightly Overvalued",
        "TSM": "Neutral",
    }
    return company_sentiment.get(content["ticker"], "No sentiment available")

def replace_type_key(data: dict[str, Any]) -> dict[str, Any]:
    """Recursively replaces "type_" with "type" in a dictionary or list."""

    def _recursive_replace(item: Any) -> Any:
        if isinstance(item, dict):
            return {
                ("type" if k == "type_" else k): _recursive_replace(v)
                for k, v in item.items()
            }
        elif isinstance(item, list):
            return [_recursive_replace(elem) for elem in item]
        else:
            return item

    new_data = {}
    for key, value in data.items():
        if key == "function_declarations" and isinstance(value, list):
            new_data[key] = [_recursive_replace(tool) for tool in value]
        else:
            new_data[key] = value

    return new_data

def tool_config_to_dict(tool_config: ToolConfig | None) -> dict[str, Any] | None:
    """Converts a ToolConfig object to a dictionary."""

    if tool_config is None:
        return None

    # pylint: disable=protected-access
    config = tool_config._gapic_tool_config.function_calling_config
    return {
        "function_calling_config": {
            "mode": config.mode.name,
            "allowed_function_names": list(config.allowed_function_names),
        }
    }


def validate_tools(spec: str) -> None:
    """Validates the tools specification."""
    # Define the JSON schema for validation
    schema = {
        "type": "object",
        "properties": {
            "tools": {
                "type": "array",
                "minItems": 1,  # Ensures that 'tools' is not an empty array
                "items": {
                    "type": "object",
                    "properties": {
                        "function_declarations": {
                            "type": "array",
                            # Ensures this is not an empty array
                            "minItems": 1,
                            "items": {
                                "type": "object",
                                "properties": {
                                    "name": {"type": "string"},
                                    "description": {"type": "string"},
                                    "parameters": {
                                        "type": "object",
                                        "properties": {
                                            "type": {"type": "string"},
                                            "properties": {"type": "object"},
                                            "required": {
                                                "type": "array",
                                                "items": {"type": "string"},
                                            },
                                        },
                                        "required": ["type", "properties"],
                                    },
                                },
                                "required": ["name", "description", "parameters"],
                            },
                        }
                    },
                    "required": ["function_declarations"],
                },
            }
        },
        "required": ["tools"],
    }

    json_spec = json.loads(spec)
    try:
        # Validate the JSON specification against the schema
        validate(instance=json_spec, schema=schema)
    except ValidationError as e:
        raise ValueError(f"Invalid Tools specification: {e}") from e


def validate_tool_config(tool_config: str) -> None:
    """Validates the format of the tool_config."""

    schema = {
        "type": "object",
        "properties": {
            "function_calling_config": {
                "type": "object",
                "properties": {
                    "mode": {"type": "string", "enum": ["AUTO", "ANY", "NONE"]},
                    "allowed_function_names": {
                        "type": "array",
                        "items": {"type": "string"},
                    },
                },
                "required": ["mode"],
            }
        },
        "required": ["function_calling_config"],
    }

    try:
        validate(instance=json.loads(tool_config), schema=schema)
    except ValidationError as e:
        raise ValueError(f"Invalid tool_config: {tool_config}") from e

def format_demonstrations(demos: Any) -> List[str]:
    """Format demonstrations into readable strings."""
    if isinstance(demos, str):
        try:
            demos = json.loads(demos)
        except (json.JSONDecodeError, ValueError):
            return []

    if not isinstance(demos, list):
        return []

    formatted = []
    for demo in demos:
        if isinstance(demo, dict):
            demo_str = "\n".join(f"{k}: {v}" for k, v in demo.items())
            formatted.append(demo_str)
        else:
            formatted.append(str(demo))

    return formatted


def split_gcs_path(gcs_path: str) -> Tuple[str, str]:
    """Split GCS path into bucket name and prefix."""
    if not gcs_path.startswith("gs://"):
        raise ValueError(f"Invalid GCS path. Must start with gs://: {gcs_path}")

    path = gcs_path[len("gs://"):]
    parts = path.split("/", 1)
    return parts[0], parts[1] if len(parts) > 1 else ""


def list_gcs_objects(gcs_path: str) -> List[str]:
    """List all objects under given GCS path."""
    bucket_name, prefix = parse_gcs_path(gcs_path)

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=prefix)

    return [blob.name for blob in blobs]


def find_directories_with_files(
    base_path: str, required_files: List[str]
) -> List[str]:
    """Find directories containing all required files."""
    bucket_name, prefix = split_gcs_path(base_path)
    all_paths = list_gcs_objects(base_path)

    # Group files by directory
    directories: Dict[str, set] = {}
    for path in all_paths:
        dir_path = "/".join(path.split("/")[:-1])
        filename = path.split("/")[-1]

        if dir_path not in directories:
            directories[dir_path] = set()
        directories[dir_path].add(filename)

    # Find directories with all required files
    matching_dirs = []
    for dir_path, files in directories.items():
        if all(req_file in files for req_file in required_files):
            matching_dirs.append(f"gs://{bucket_name}/{dir_path}")

    return matching_dirs

def parse_gcs_path(gcs_path: str) -> Tuple[str, str]:
    """Parse GCS path into bucket name and prefix."""
    if not gcs_path.startswith("gs://"):
        raise ValueError("Invalid GCS path. Must start with gs://")

    path_without_prefix = gcs_path[5:]  # Remove 'gs://'
    parts = path_without_prefix.split("/", 1)
    bucket_name = parts[0]
    prefix = parts[1] if len(parts) > 1 else ""

    return bucket_name, prefix

def get_best_vapo_results(
    base_path: str, metric_name: Optional[str] = None
) -> Tuple[str, List[str]]:
    """Get the best system instruction and demonstrations across all VAPO runs."""
    # Find all valid runs
    required_files = ["eval_results.json", "templates.json"]
    runs = find_directories_with_files(base_path, required_files)

    if not runs:
        raise ValueError(f"No valid runs found in {base_path}")

    best_score = float("-inf")
    best_instruction = ""
    best_demonstrations: List[str] = []

    for run_path in runs:
        try:
            # Check main templates.json first
            templates_path = f"{run_path}/templates.json"
            with epath.Path(templates_path).open("r") as f:
                templates_data = json.load(f)

            if templates_data:
                df = pd.json_normalize(templates_data)

                # Find metric column
                metric_columns = [
                    col for col in df.columns
                    if "metric" in col and "mean" in col
                ]

                if metric_columns:
                    # Select appropriate metric
                    if metric_name:
                        metric_col = next(
                            (col for col in metric_columns if metric_name in col),
                            None
                        )
                    else:
                        composite_cols = [
                            col for col in metric_columns
                            if "composite_metric" in col
                        ]
                        metric_col = (
                            composite_cols[0] if composite_cols else metric_columns[0]
                        )

                    if metric_col and metric_col in df.columns:
                        best_idx = df[metric_col].argmax()
                        score = float(df.iloc[best_idx][metric_col])

                        if score > best_score:
                            best_score = score
                            best_row = df.iloc[best_idx]

                            # Extract instruction if present
                            if "prompt" in best_row or "instruction" in best_row:
                                instruction = best_row.get(
                                    "prompt", best_row.get("instruction", "")
                                )
                                if instruction:
                                    instruction = instruction.replace(
                                        "store('answer', llm())", "{{llm()}}"
                                    )
                                    best_instruction = instruction

                            # Extract demonstrations if present
                            if "demonstrations" in best_row or "demo_set" in best_row:
                                demos = best_row.get(
                                    "demonstrations", best_row.get("demo_set", [])
                                )
                                best_demonstrations = format_demonstrations(demos)

            # Check instruction-specific optimization
            instruction_path = f"{run_path}/instruction/templates.json"
            try:
                with epath.Path(instruction_path).open("r") as f:
                    instruction_data = json.load(f)

                if instruction_data:
                    inst_df = pd.json_normalize(instruction_data)
                    metric_columns = [
                        col for col in inst_df.columns
                        if "metric" in col and "mean" in col
                    ]

                    if metric_columns:
                        if metric_name:
                            metric_col = next(
                                (col for col in metric_columns if metric_name in col),
                                None,
                            )
                        else:
                            composite_cols = [
                                col for col in metric_columns
                                if "composite_metric" in col
                            ]
                            metric_col = (
                                composite_cols[0] if composite_cols else metric_columns[0]
                            )

                        if metric_col and metric_col in inst_df.columns:
                            inst_best_idx = inst_df[metric_col].argmax()
                            inst_score = float(inst_df.iloc[inst_best_idx][metric_col])

                            if inst_score > best_score:
                                best_score = inst_score
                                best_row = inst_df.iloc[inst_best_idx]

                                instruction = best_row.get(
                                    "prompt", best_row.get("instruction", "")
                                )
                                if instruction:
                                    instruction = instruction.replace(
                                        "store('answer', llm())", "{{llm()}}"
                                    )
                                    best_instruction = instruction
                                # In instruction-only mode, there might not be demonstrations
                                if "demonstrations" not in best_row and "demo_set" not in best_row:
                                    best_demonstrations = []
            except FileNotFoundError:
                pass

            # Check demonstration-specific optimization
            demo_path = f"{run_path}/demonstration/templates.json"
            try:
                with epath.Path(demo_path).open("r") as f:
                    demo_data = json.load(f)

                if demo_data:
                    demo_df = pd.json_normalize(demo_data)
                    metric_columns = [
                        col for col in demo_df.columns
                        if "metric" in col and "mean" in col
                    ]

                    if metric_columns:
                        if metric_name:
                            metric_col = next(
                                (col for col in metric_columns if metric_name in col),
                                None,
                            )
                        else:
                            composite_cols = [
                                col for col in metric_columns
                                if "composite_metric" in col
                            ]
                            metric_col = (
                                composite_cols[0] if composite_cols else metric_columns[0]
                            )

                        if metric_col and metric_col in demo_df.columns:
                            demo_best_idx = demo_df[metric_col].argmax()
                            demo_score = float(demo_df.iloc[demo_best_idx][metric_col])

                            if demo_score > best_score:
                                best_score = demo_score
                                best_row = demo_df.iloc[demo_best_idx]

                                demos = best_row.get(
                                    "demonstrations", best_row.get("demo_set", [])
                                )
                                best_demonstrations = format_demonstrations(demos)
                                # In demo-only mode, there might not be an instruction
                                if "prompt" not in best_row and "instruction" not in best_row:
                                    best_instruction = ""
                                else:
                                    instruction = best_row.get(
                                        "prompt", best_row.get("instruction", "")
                                    )
                                    if instruction:
                                        instruction = instruction.replace(
                                            "store('answer', llm())", "{{llm()}}"
                                        )
                                        best_instruction = instruction
            except (FileNotFoundError, json.JSONDecodeError):
                pass

        except Exception as e:
            logging.warning(f"Error processing run {run_path}: {e}")
            continue

    if best_score == float("-inf"):
        raise ValueError("Could not find any valid results")

    return best_instruction, best_demonstrations

## Using the Data-Driven Optimizer for long prompt optimization

The following sections will guide you through setting up your environment, preparing your data, and running an optimization job to find a better prompt using the data-driven optimizer

### Preparing the Data and Running the Job

#### The prompt template to optimize

A prompt consists of two key parts:

* **System Instruction Template** which is a fixed part of the prompt that control or alter the model's behavior across all queries for a given task.

* **Prompt Template** which is a dynamic part of the prompt that changes based on the task. Prompt template includes context, task and more. To learn more, see [components of a prompt](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-design-strategies#components-of-a-prompt) in the official documentation.

In this scenario, you use Vertex AI prompt optimizer to optimize a simple system instruction template. And you use some examples in the remaining prompt template for evaluating different instruction templates along the optimization process.


In [None]:
system_instruction = """
Answer the question using correct tools.
"""

prompt_template = """
Some examples of correct tools associated to a question are:
Question: {question}
Target tools: {target}
"""

#### The optimization dataset

The optimizer's performance depends heavily on the quality of your sample data.

For this example, we use a question-answering dataset where each row contains a `question`, and a ground-truth `target` representing a JSON string of expected tool calls. The representation is aligned with the JSON serialized string expected by Gen AI Evaluation service to evaluate [Tool use and function calling.
](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval#tool-use)


In [None]:
input_data_path = "gs://github-repo/prompts/prompt_optimizer/qa_tool_calls_opt_dataset.jsonl"
prompt_optimization_df = pd.read_json(input_data_path, lines=True)
prompt_optimization_df.head()

#### The optimization configuration

Now, we'll create a dictionary with our specific settings and use it to instantiate our `OptimizationConfig` class.

The `OptimizationConfig` class, built using `pydantic`, acts as a structured and validated blueprint for our optimization task. It ensures all necessary parameters are defined before we submit the job.

In this scenario, you set two additional parameters:

* `tools` parameter to pass tool definitions
* `tool_config` parameter to pass tool configuration

For more advanced control, you can learn and explore more about all the parameters and how to best use them in the [detailed documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/prompt-optimizer).


In [None]:
class OptimizationConfig(BaseModel):
    """
    A comprehensive prompt optimization configuration model.
    """

    # Basic Configuration
    system_instruction: str = Field(
        ...,
        description="System instructions for the target model. String. This field is required.",
    )
    prompt_template: str = Field(
        ..., description="Template for prompts. String. This field is required."
    )
    target_model: str = Field(
        "gemini-2.5-flash",
        description='Target model for optimization. Supported models: "gemini-2.5-flash", "gemini-2.5-pro"',
    )
    thinking_budget: int = Field(
        -1,
        description="Thinking budget for thinking models. -1 means auto/no thinking. Integer.",
    )
    optimization_mode: str = Field(
        "instruction",
        description='Optimization mode. Supported modes: "instruction", "demonstration", "instruction_and_demo".',
    )
    project: str = Field(
        ..., description="Google Cloud project ID. This field is required."
    )

    # Evaluation Settings
    eval_metrics_types: List[str] = Field(
        description='List of evaluation metrics. E.g., "bleu", "rouge_l", "safety".'
    )
    eval_metrics_weights: List[float] = Field(
        description="Weights for evaluation metrics. Length must match eval_metrics_types and should sum to 1."
    )
    aggregation_type: str = Field(
        "weighted_sum",
        description='Aggregation type for metrics. Supported: "weighted_sum", "weighted_average".',
    )
    custom_metric_name: str = Field(
        "",
        description="Metric name, as defined by the key that corresponds in the dictionary returned from Cloud function. String.",
    )
    custom_metric_cloud_function_name: str = Field(
        "",
        description="Cloud Run function name you previously deployed. String.",
    )

    # Data and I/O Paths
    input_data_path: str = Field(
        ...,
        description="Cloud Storage URI to input optimization data. This field is required.",
    )
    output_path: str = Field(
        ...,
        description="Cloud Storage URI to save optimization results. This field is required.",
    )

    # (Optional) Advanced Configuration
    num_steps: int = Field(
        10,
        ge=10,
        le=20,
        description="Number of iterations in instruction optimization mode. Integer between 10 and 20.",
    )
    num_demo_set_candidates: int = Field(
        10,
        ge=10,
        le=30,
        description="Number of demonstrations evaluated. Integer between 10 and 30.",
    )
    demo_set_size: int = Field(
        3,
        ge=3,
        le=6,
        description="Number of demonstrations generated per prompt. Integer between 3 and 6.",
    )

    # (Optional) Model Locations and QPS
    target_model_location: str = Field(
        "us-central1", description="Location of the target model. Default us-central1."
    )
    target_model_qps: int = Field(
        1,
        ge=1,
        description="QPS for the target model. Integer >= 1, based on your quota.",
    )
    optimizer_model_location: str = Field(
        "us-central1",
        description="Location of the optimizer model. Default us-central1.",
    )
    optimizer_model_qps: int = Field(
        1,
        ge=1,
        description="QPS for the optimization model. Integer >= 1, based on your quota.",
    )
    source_model: str = Field(
        "",
        description="Google model previously used with these prompts. Not needed if providing a target column.",
    )
    source_model_location: str = Field(
        "us-central1", description="Location of the source model. Default us-central1."
    )
    source_model_qps: Optional[int] = Field(
        None, ge=1, description="Optional QPS for the source model. Integer >= 1."
    )
    eval_qps: int = Field(
        1,
        ge=1,
        description="QPS for the eval model. Integer >= 1, based on your quota.",
    )

    # (Optional) Response, Language, and Data Handling
    response_mime_type: str = Field(
        "text/plain",
        description="MIME response type from the target model. E.g., 'text/plain', 'application/json'.",
    )
    response_schema: str = Field(
        "", description="The Vertex AI Controlled Generation response schema."
    )
    language: str = Field(
        "English",
        description='Language of the system instructions. E.g., "English", "Japanese".',
    )
    placeholder_to_content: Dict[str, Any] = Field(
        {},
        description="Dictionary of placeholders to replace parameters in the system instruction.",
    )
    data_limit: int = Field(
        10,
        ge=5,
        le=100,
        description="Amount of data used for validation. Integer between 5 and 100.",
    )
    translation_source_field_name: str = Field(
        "",
        description="Field name for source text if using translation metrics (Comet, MetricX).",
    )
    has_multimodal_inputs: bool = Field(
        False, description="Whether the input data is multimodal."
    )

##### Set tools and tools configuration

To optimize prompts for using external tools with the Vertex AI SDK, define the tools' functionalities using the `FunctionDeclaration` class. This class uses an OpenAPI-compatible schema to structure the tool definitions.  Your system prompt should be designed to effectively leverage these defined functions.  See the [Introduction to function calling](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling) for more information.  

Example function definitions for a financial assistant are provided below.


In [None]:
get_company_information = FunctionDeclaration(
    name="get_company_information",
    description="Retrieves financial performance to provide an overview for a company.",
    parameters={
        "type": "object",
        "properties": {
            "ticker": {
                "type": "string",
                "description": "Stock ticker for a given company",
            }
        },
        "required": ["ticker"],
    },
)

get_stock_price = FunctionDeclaration(
    name="get_stock_price",
    description="Only returns the current stock price (in dollars) for a company.",
    parameters={
        "type": "object",
        "properties": {
            "ticker": {
                "type": "integer",
                "description": "Stock ticker for a company",
            }
        },
        "required": ["ticker"],
    },
)

get_company_news = FunctionDeclaration(
    name="get_company_news",
    description="Get the latest news headlines for a given company.",
    parameters={
        "type": "object",
        "properties": {
            "ticker": {
                "type": "string",
                "description": "Stock ticker for a company.",
            }
        },
        "required": ["ticker"],
    },
)

get_company_sentiment = FunctionDeclaration(
    name="get_company_sentiment",
    description="Returns the overall market sentiment for a company.",
    parameters={
        "type": "object",
        "properties": {
            "ticker": {
                "type": "string",
                "description": "Stock ticker for a company",
            },
        },
        "required": ["ticker"],
    },
)

After implementing your functions, wrap each one as a `Tool` object. This allows the Gemini model to discover and execute these functions.  `ToolConfig` provides additional parameters to control how the model interacts with the tools and chooses which function to call.  

Further information can be found in the [Introduction to function calling](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling).


In [None]:
tools = Tool(
    function_declarations=[
        get_company_information,
        get_stock_price,
        get_company_news,
        get_company_sentiment,
    ]
)

tool_config = ToolConfig(
    function_calling_config=ToolConfig.FunctionCallingConfig(
        mode=ToolConfig.FunctionCallingConfig.Mode.ANY,
        allowed_function_names=[
            "get_company_information",
            "get_stock_price",
            "get_company_news",
            "get_company_sentiment",
        ],
    )
)

To use Vertex AI Prompt Optimizer for tool calling optimization, provide `FunctionDeclaration` and `ToolConfig` as JSON structures (see example below). Vertex AI Prompt Optimizer uses those structures along the optimization process.

Tool Calls json:

```json
{"tools": [{"function_declarations": [{"name": "function_1", "description": "My function 1", "parameters": {"type": "OBJECT", "properties": {"argument_1": {"type": "STRING", "description": "My argument 1"}}, "required": ["argument_1"], "property_ordering": ["argument_1"]}}, ...]}]}
```
Function Calling Configuration json:

```json
{"function_calling_config": {"mode": "your_mode", "allowed_function_names": ["tool_name_1", ...]}}
```

Below you have some helper functions to get those structures and validate them.


In [None]:
vapo_tools = json.dumps({"tools": [replace_type_key(tools.to_dict())]})
vapo_tool_config = json.dumps(tool_config_to_dict(tool_config))

validate_tools(vapo_tools)
validate_tool_config(vapo_tool_config)

##### Set the optimization configuration


In [None]:
output_path = f"{BUCKET_URI}/optimization_results/"

vapo_data_settings = {
    "system_instruction": system_instruction,
    "prompt_template": prompt_template,
    "target_model": "gemini-2.5-flash",
    "thinking_budget": -1,
    "optimization_mode": "instruction",
    "tools": vapo_tools,
    "tool_config": vapo_tool_config,
    "eval_metrics_types": ["tool_name_match", "tool_parameter_key_match", "tool_parameter_kv_match"],
    "eval_metrics_weights": [0.4, 0.3, 0.3],
    "aggregation_type": "weighted_sum",
    "input_data_path": input_data_path,
    "output_path": output_path,
    "project": PROJECT_ID,
}

vapo_data_config = OptimizationConfig(**vapo_data_settings)
vapo_data_config_json = vapo_data_config.model_dump()

#### Upload configuration to Cloud Storage

Write the Prompt Optimizer configuration to the file in your GCS bucket.


In [None]:
config_path = f"{BUCKET_URI}/config.json"

with epath.Path(config_path).open("w") as config_file:
    json.dump(vapo_data_config_json, config_file)
config_file.close()

#### Run the prompt optimization job

This is the final step. We pass the path to our configuration file and the service account to the Vertex AI client. The `optimize` method starts the custom job on the Vertex AI backend. We set `wait_for_completion` to `True` so the script will pause until the job is finished.


In [None]:
vapo_data_run_config = {
    "config_path": config_path,
    "wait_for_completion": True,
    "service_account": SERVICE_ACCOUNT,
}

result = client.prompt_optimizer.optimize(method="vapo", config=vapo_data_run_config)

### Get and use the best prompt programmatically

For use in an application, you can programmatically retrieve the top-performing instruction from the output files stored in GCS.


In [None]:
best_instruction, _ = get_best_vapo_results(output_path)
print("The optimized instruction is:\n", best_instruction)

## Cleaning up

In [None]:
delete_job = True
delete_bucket = True

if delete_job:
    from google.cloud import aiplatform
    aiplatform.init(project=PROJECT_ID, location=LOCATION)
    custom_job_list = aiplatform.CustomJob.list()
    latest_job = custom_job_list[0]
    latest_job.delete()

if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI