In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden + Reasoning Engine - Build, Deploy and Test Agents using a Self-deployed Endpoint

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_deployed_model_reasoning_engine.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_deployed_model_reasoning_engine.ipynb">
      <img alt="GitHub logo" src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to build, deploy and test three types of agents using [Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) with self-deployed model in Vertex AI.

[Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) (LangChain on Vertex AI) is a managed service in Vertex AI that helps you build and deploy model-based agents. It gives you the flexibility to choose how much reasoning you want to delegate to the LLM and how much you want to handle with custom code.

A previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb) demonstrates how to use Llama 3.1 models as Model-as-a-service (MaaS) to build `chatbot` and `translator` agents.


### Objective
    
- Integrate with Reasoning Engine: Use the Vertex AI SDK to build three simple agents with the deployed endpoint:
    - A Chatbot Agent
    - A Translator Agent
    - An Agent that uses [an Exchange Rate Tool](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/develop#define-function)
- Test your agent locally.
- Deploy and test your agent on the Reasoning Engine.


### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Setup Google Cloud project

# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. Create a bucket created for reasoning engine.

BUCKET_NAME = ""  # @param {type:"string", placeholder: "[your-bucket-name]"}
STAGING_BUCKET = f"gs://{BUCKET_NAME}"

# @markdown 3. You can find the deployed model endpoint in the [Vertex AI console](https://console.cloud.google.com/vertex-ai/endpoints).
DEPLOYED_MODEL_ENDPOINT = ""  # @param {type:"string", placeholder: "[your-deployed-model-endpoint]"}

# Import the necessary packages

# Upgrade Vertex AI SDK.
! pip3 install --upgrade --quiet \
    "google-cloud-aiplatform>=1.64.0" \
    cloudpickle==3.0.0 \
    pydantic==2.7.4 \
    requests \
    langchain-openai
! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

import os
import requests
from typing import Tuple
from google.cloud import aiplatform

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]
    
# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

### Initialization

In [None]:
# @title Authenticate your notebook environment (Colab only)
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

In [None]:
# @title Initialize Vertex AI SDK for Python
import vertexai

vertexai.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

In [None]:
# @title Import libraries

# @markdown Import libraries to use in this tutorial.

import google.auth
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from vertexai.preview import reasoning_engines

### Chat with `Reasoning Engine`

In [None]:
# @title `Reasoning Engine` use self-deployed API endpoint with different configuration

# @markdown To use the self-deployed API endpoint with Reasoning Engine capabilities, you need to request the access token and configure the langchain ChatOpenAI to point to the API endpoint.

# @markdown In previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb), we demonstrated how to `Ask Llama 3.1 using different model configuration`.

# @markdown In this colab, we will show you how to use the `Reasoning Agent` to send a request to the self-deployed API endpoint with different model configuration.


def model_builder(
    *,
    model_name: str,
    model_kwargs=None,
    project: str,  # Specified via vertexai.init
    location: str,  # Specified via vertexai.init
    **kwargs,
):

    # Note: the credential lives for 1 hour by default.
    # After expiration, it must be refreshed.
    creds, _ = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    if model_kwargs is None:
        model_kwargs = {}

    return ChatOpenAI(
        model="",
        base_url=DEPLOYED_MODEL_ENDPOINT,
        api_key=creds.token,
        **model_kwargs,
    )


# @markdown Use the following parameters to generate different answers:
# @markdown *   `temperature` to control the randomness of the response
# @markdown *   `top_p` to control the quality of the response

temperature = 1.0  # @param {type:"number"}
top_p = 1.0  # @param {type:"number"}

agent = reasoning_engines.LangchainAgent(
    model="",  # Required.
    model_builder=model_builder,  # Required.
    model_kwargs={
        "temperature": temperature,  # Optional.
        "top_p": top_p,  # Optional.
        "extra_body": {},
    },
)

# @markdown Now we can test the model and agent behavior to ensure that it's working as expected before we deploy it:

response = agent.query(input="Hello, how are you!")
print(response)

In [None]:
# @title Deploy your agent on Vertex AI

# @markdown Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

response = remote_agent.query(input="Hello, how are you!")
print(response)

In [None]:
# @title Reusing your deployed agent from other applications or SDKs

# @markdown The remotely deployed `Reasoning Engine` is now available for import and use. You can access it within your current notebook session, a different notebook, or a Python script.

REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

# Afterwards, you can use the below code:

# from vertexai.preview import reasoning_engines`

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)`
# response = remote_agent.query(input=query)`

# @markdown Alternatively, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

### Simple Translator Agent

In [None]:
# @title Use Reasoning Engine to build a simple translator agent

# @markdown In previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb), we demonstrates how to use `LangChain Expression Language` (LCEL) to build a simple chain which translates some `text_to_translate` to the specified `target_language`.

# @markdown In this colab, we will show you how to use the `Reasoning Agent` to build and deploy the agent.


def lcel_builder(*, model, **kwargs):

    template = """Translate the following {text} to {target_language}:"""
    prompt = PromptTemplate(
        input_variables=["text", "target_language"], template=template
    )

    return prompt | model | StrOutputParser()


agent = reasoning_engines.LangchainAgent(
    model="",
    model_builder=model_builder,
    runnable_builder=lcel_builder,
)

text_to_translate = ""  # @param {type:"string", placeholder:"Hello, how are you!"}
target_language = ""  # @param {type:"string", placeholder:"Italian"}

response = agent.query(
    input={"text": text_to_translate, "target_language": target_language}
)
print(response)

In [None]:
# @title Deploy your agent on Vertex AI

# @markdown Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

response = remote_agent.query(
    input={"text": text_to_translate, "target_language": target_language}
)
print(response)

In [None]:
# @title Reusing your deployed agent from other applications or SDKs

# @markdown The remotely deployed `Reasoning Engine` is now available for import and use. You can access it within your current notebook session, a different notebook, or a Python script.

REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

# Afterwards, you can use the below code:

# from vertexai.preview import reasoning_engines`

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)`
# response = remote_agent.query(input=query)`

# @markdown Alternatively, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

### Exchange Rate Tool

[Function calling](https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/function-calling) lets developers create a description of a function in their code, then pass that description to a language model in a request. The response from the model includes the name of a function that matches the description and the arguments to call it with.

In this example, we will use an Exchange Rate tool in the Reasoning Engine.

In [None]:
# @title Agent that uses an Exchange Rate Tool

# @markdown Tools and functions enable the generative model to interact with external systems, databases, document stores, and other APIs so that the model can get the most up-to-date information or take action with those systems.

# @markdown In this example, you'll define a function called get_exchange_rate that uses the requests library to retrieve real-time currency exchange information from an API:


def get_exchange_rate(
    currency_from: str = "USD",
    currency_to: str = "EUR",
    currency_date: str = "latest",
):
    """Retrieves the exchange rate between two currencies on a specified date.
    Args:
        currency_from: The source currency code.
        currency_to: The target currency code.
        currency_date: The date to retrieve the exchange rate.
    Returns:
        Exchange rate between two currencies on a specified date.
    """
    response = requests.get(
        f"https://api.frankfurter.app/{currency_date}",
        params={"from": currency_from, "to": currency_to},
    )
    return response.json()


get_exchange_rate(currency_from="USD", currency_to="SEK")


agent = reasoning_engines.LangchainAgent(
    model="",  # Required.
    model_builder=model_builder,  # Required.
    tools=[get_exchange_rate],  # Optional.
    agent_executor_kwargs={
        "return_intermediate_steps": True,
        "stream_runnable": False,
    },  # Optional.
)

# @markdown Test the function with sample inputs to ensure that it's working as expected:
response = agent.query(
    input="What's the exchange rate from US dollars to Swedish currency at 2024-07-26?"
)
print(response)

In [None]:
# @title Deploy your agent on Vertex AI

# @markdown Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

response = remote_agent.query(
    input="What's the exchange rate from US dollars to Swedish currency at 2024-07-26?"
)
print(response)

In [None]:
# @title Reusing your deployed agent from other applications or SDKs

# @markdown The remotely deployed `Reasoning Engine` is now available for import and use. You can access it within your current notebook session, a different notebook, or a Python script.

REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

# Afterwards, you can use the below code:

# from vertexai.preview import reasoning_engines`

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)`
# response = remote_agent.query(input=query)`

# @markdown Alternatively, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

## Clean up resources

In [None]:
# @title Delete the buckets and reasoning engines

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME

delete_reasoning_engine = False  # @param {type:"boolean"}

if delete_reasoning_engine:
    remote_agent.delete()