In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Create & Deploy Agent and Run Gen AI Agent Evaluation

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%2Fcreate_agent_and_run_evaluation.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb">
      <img width="32px" src="https://storage.googleapis.com/github-repo/generative-ai/logos/GitHub_Invertocat_Dark.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<p>
<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_agent_and_run_evaluation.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>
</p>

| Author(s) |
| --- |
| [Kelsi Lakey](https://github.com/lakeyk) |
| [Bo Zheng](https://github.com/coolalexzb) |

## Overview

This Colab notebook demonstrates how to create and deploy an Agent and then use the Gen AI Eval SDK to evaluate it.

- **Define an Agent:** Define a 'Hello World' Agent Development Kit agent with a few basic tool functions.
- **Deploy an Agent to Agent Engine:** Deploy the agent to Agent Engine.
- **Run Agent Inference:** Define an Evaluation Dataset and run agent inference to retrieve real responses.
- **Create Evaluation Run:** Create an Evaluation Run to perform Gen AI Agent Evaluation. This Evaluation Run will be persisted and accessible later.

If you already have a deployed Agent, please see:
- [Create a Gen AI Agent Evaluation for a Deployed Agent](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/create_genai_agent_evaluation.ipynb)

## Get started

### Install Google Gen AI SDK and other required packages


In [None]:
%pip install --upgrade --force-reinstall -q google-cloud-aiplatform[evaluation]

### Authenticate your notebook environment

If you are running this notebook in **Google Colab**, run the cell below to authenticate your account.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
import os

import vertexai
from google.cloud import storage
from google.genai import types as genai_types
from urllib.parse import urlparse


def get_config_value(initial_value: str, placeholder: str, env_var_name: str) -> str:
    """
    Gets a configuration value or environment variable if unspecified.
    """
    if not initial_value or initial_value == placeholder:
        return os.environ.get(env_var_name)
    return initial_value

def get_or_create_gcs_bucket(project_id: str, gcs_dest: str) -> str:
    """
    Retrieves GCS bucket or creates a default.
    """
    gcs_dest = gcs_dest or f"{project_id}/agent-evaluation"
    storage_client = storage.Client(project=project_id)
    bucket = gcs_dest.replace("gs://", "").split("/")[0]
    if not storage_client.lookup_bucket(bucket):
        print(f"Creating bucket: {bucket}")
        storage_client.create_bucket(bucket)
    if not gcs_dest.startswith("gs://"):
        return f"gs://{gcs_dest}"
    return gcs_dest

# Configuration
PROJECT_ID = ""  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
PROJECT_ID = get_config_value(PROJECT_ID, "[your-project-id]", "GOOGLE_CLOUD_PROJECT")
assert PROJECT_ID, "Please specify a valid project"

LOCATION = ""  # @param {type: "string", placeholder: "[us-central1]", isTemplate: true}
LOCATION = get_config_value(LOCATION, None, "GOOGLE_CLOUD_REGION")
assert LOCATION, "Please specify a valid location"

GCS_DEST = ""  # @param {type: "string", placeholder: "[your-gcs-bucket]", isTemplate: true}
GCS_DEST = get_config_value(GCS_DEST, "[your-gcs-bucket]", "GOOGLE_CLOUD_STORAGE_BUCKET")
GCS_DEST = get_or_create_gcs_bucket(PROJECT_ID, GCS_DEST)
assert GCS_DEST, "Please specify a valid GCS destination"
STAGING_BUCKET = f"gs://{urlparse(GCS_DEST).netloc}"

# Initialize SDK
vertexai.init(project=PROJECT_ID, location=LOCATION)
client = vertexai.Client(project=PROJECT_ID, location=LOCATION)

### Import libraries

In [None]:
import pandas as pd
import time
import inspect
from google.adk import Agent, tools
from vertexai import types

## Helper Functions

In [None]:
def deploy_adk_agent(root_agent):
  """Deploy agent to agent engine.
  Args:
    root_agent: The ADK agent to deploy.
  """
  app = vertexai.agent_engines.AdkApp(
      agent=root_agent,
  )
  print("Deploying the agent to Agent Engine. This can take up to 10 mins.", flush=True)
  remote_app = client.agent_engines.create(
      agent=app,
      config = {
          "staging_bucket": STAGING_BUCKET,
          "requirements": ["google-cloud-aiplatform[adk,agent_engines]"],
          "env_vars": {"GOOGLE_CLOUD_AGENT_ENGINE_ENABLE_TELEMETRY": "true"},
      }

  )
  return remote_app

def get_tool_declarations_from_agent(agent):
    """
    Extracts tool declarations from an agent's tools.
    Args:
      agent: The ADK agent.
    """
    tool_declarations = []
    for tool in agent.tools:
        tool_declarations.append({
            "function_declarations": [tools._automatic_function_calling_util.build_function_declaration(tool)]
        })
    return tool_declarations

def get_agent_info_from_agent(agent, agent_resource_name=None):
  """
  Extracts agent info from an agent.
  Args:
    agent: The ADK agent.
    agent_resource_name: Optional resource name of the deployed agent. Used for connecting Evaluation Runs to agents in the UI.
  """
  agent_info = types.evals.AgentInfo(
      agent=agent_resource_name,
      name=agent.name,
      instruction=agent.instruction,
      description=agent.description,
      tool_declarations=get_tool_declarations_from_agent(agent)
  )
  return agent_info

# Step 1: Create and Deploy an Agent

## Define Agent
Develop an Agent Development Kit agent by defining the model, instruction, and set of tools. \
For more information see [Develop an Agent Development Kit agent](https://cloud.google.com/agent-builder/agent-engine/develop/adk).

In [None]:
# Define Agent Tools
def search_products(query: str):
    """
    Searches for products based on a query.
    Args:
        query: The search query.
    Returns:
        A list of products that match the query.
    """
    # Mock response for demonstration
    if "headphones" in query.lower():
        return {"products": [{"name": "Wireless Headphones", "id": "B08H8H8H8H"}]}
    else:
        return {"products": []}

def get_product_details(product_id: str):
    """
    Gets the details for a given product ID.
    Args:
        product_id: The ID of the product.
    Returns:
        The details of the product.
    """
    if product_id == "B08H8H8H8H":
        return {"details": "Noise-cancelling, 20-hour battery life."}
    else:
        return {"error": "Product not found."}

def add_to_cart(product_id: str, quantity: int):
    """
    Adds a specified quantity of a product to the cart.
    Args:
        product_id: The ID of the product.
        quantity: The quantity to add to the cart.
    Returns:
        A status message indicating the addition to the cart.
    """
    return {"status": f"Added {quantity} of {product_id} to cart."}

# Define Agent
ecommerce_agent = Agent(
    model="gemini-2.5-flash",
    name="ecommerce_agent",
    instruction="You are an ecommerce expert",
    tools=[search_products, get_product_details, add_to_cart],
)

## Deploy Agent to Agent Engine
Create an Agent Development Kit agent and deploy to Agent Engine. \
For more information on deploying an agent, see [Deploy an Agent](https://cloud.google.com/agent-builder/agent-engine/deploy). \
This process may take up to 10 minutes.

In [None]:
# Deploy Agent
agent_engine = deploy_adk_agent(ecommerce_agent)
agent_engine_resource_name = agent_engine.api_resource.name

# Step 2: Run Agent Inference

## Define Agent Dataset
Define a dataset that is specific to your agent. \
`agent_prompts` should consist of prompts or requests to be made to your agent. A few example prompts are shown below. \
`session_inputs` are required for traces. For more information see [Session](https://google.github.io/adk-docs/sessions/session/).

In [None]:
session_inputs = types.evals.SessionInput(
    user_id="user_123",
    state={"my_key": "my_value"},
)
ecommerce_prompts = [
    "Search for 'noise-cancelling headphones'.",
    "Show me the details for product 'B08H8H8H8H'.",
    "Add one pair of 'B08H8H8H8H' to my shopping cart.",
    "Find 'wireless ear buds' and then add the first result to my cart.",
    "I need a new laptop for work, can you find one with at least 16GB of RAM?",
]
ecommerce_dataset = pd.DataFrame({
    "prompt": ecommerce_prompts,
    "session_inputs": [session_inputs] * len(ecommerce_prompts),
})

## Run Agent Inference

Run inference using your deployed agent. This will add `intermediate_events` and `response` columns to your dataset to be evaluated in the next step.

In [None]:
# Run inference
agent_dataset_with_inference = client.evals.run_inference(
    agent=agent_engine_resource_name,
    src=ecommerce_dataset,
)
# Display inference results
agent_dataset_with_inference.show()

# Step 3: Run Gen AI Agent Evaluation

## Option 1: Run Gen AI Evaluation with Evaluation Management Service

Run Gen AI Agent Evaluation using the Evaluation Management Service. \
This will persist your dataset and evaluation results which can be retrieved via the Agent Engine UI.

In [None]:
# Create agent_info from Agent definition and deployed resource name
ecommerce_agent_info = get_agent_info_from_agent(ecommerce_agent, agent_engine_resource_name)

# Evaluate Agent Dataset
evaluation_run = client.evals.create_evaluation_run(
    dataset=agent_dataset_with_inference,
    agent_info=ecommerce_agent_info,
    metrics=[
        types.RubricMetric.FINAL_RESPONSE_QUALITY,
        types.RubricMetric.TOOL_USE_QUALITY,
        types.RubricMetric.HALLUCINATION,
        types.RubricMetric.SAFETY,
    ],
    dest=GCS_DEST,
)

# Display status and results
evaluation_run.show()

### Poll Evaluation Run for Completion and Display Results
Retrieve the Evaluation Run and directly display the results using the .show() command. If the Evaluation Run failed the error message will be displayed. Otherwise the following results data will be displayed in an embedded report.

- **Summary metrics:** An aggregated view of all metrics, showing the mean score and standard deviation across the entire dataset.
- **Agent info:** Information describing the evaluated agent, including developer instruction, agent description, tool definitions, etc. Applied for agent evaluation only.
- **Detailed results:** A case-by-case breakdown, allowing you to inspect the prompt, reference, candidate response, and the specific score and explanation for each metric. For agent evaluation, detailed results will also include traces showing the agent interactions.

For more information, see [Visualizing Evaluation Reports](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/view-evaluation#visualizing-evaluation-reports).

In [None]:
completed_states = set(
    [
        "SUCCEEDED",
        "FAILED",
        "CANCELLED",
    ]
)

while evaluation_run.state not in completed_states:
    evaluation_run.show()
    evaluation_run = client.evals.get_evaluation_run(name=evaluation_run.name)
    time.sleep(5)
evaluation_run = client.evals.get_evaluation_run(name=evaluation_run.name, include_evaluation_items=True)

evaluation_run.show()

## [Optional] Option 2: Run Gen AI Evaluation Locally

Run Gen AI Agent Evaluation locally. \
This will run the same evaluation as `Option 1` but results will not be available outside of this colab instance.

In [None]:
# Create agent_info from Agent definition and deployed resource name
ecommerce_agent_info = get_agent_info_from_agent(ecommerce_agent, agent_engine_resource_name)

# Evaluate Agent Dataset
eval_result = client.evals.evaluate(
    dataset=agent_dataset_with_inference,
    agent_info=ecommerce_agent_info,
    metrics=[
        types.RubricMetric.FINAL_RESPONSE_QUALITY,
        types.RubricMetric.TOOL_USE_QUALITY,
        types.RubricMetric.HALLUCINATION,
        types.RubricMetric.SAFETY,
    ],
)

# Display results
eval_result.show()