In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden + Reasoning Engine - Build, Deploy and Test Agents Using Llama 3.1 Models

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_reasoning_engine_llama3_1.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_reasoning_engine_llama3_1.ipynb"">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_reasoning_engine_llama3_1.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_reasoning_engine_llama3_1.ipynb">
      <img src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

[Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) (LangChain on Vertex AI) is a managed service in Vertex AI that helps you build and deploy agent-based reasoning framework. It gives you the flexibility to choose how much reasoning you want to delegate to the LLM and how much you want to handle with custom code.

A previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb) demonstrates how to use Llama 3.1 models as Model-as-a-service (MaaS) to build `chatbot` and `translator` agents.

This notebook demonstrated how to build, deploy and test these agents using [Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) in Vertex AI.

### Objective

- Use the Vertex AI SDK to build three simple agents with the Llama 3.1 Completions API:
  - A Chatbot Agent
  - A Translator Agent
  - An Agent uses Exchange Rate Tool
- Test your agent locally.
- Deploy and test your agent on the Reasoning Engine.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Get started

### Install Vertex AI SDK for Python and other required packages

Install the latest version of the Vertex AI SDK for Python as well as extra dependencies related to Reasoning Engine and LangChain:

In [None]:
! pip3 install --upgrade --quiet \
    "google-cloud-aiplatform[langchain,reasoningengine]" \
    cloudpickle==3.0.0 \
    pydantic==2.7.4 \
    requests \
    langchain-openai

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [2]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).


In [1]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [2]:
PROJECT_ID = ""  # @param {type:"string", placeholder: "[your-project-id]"}
LOCATION = ""  # @param {type:"string", placeholder: "us-central1"}
BUCKET_NAME = ""  # @param {type:"string", placeholder: "[your-bucket-name]"}
STAGING_BUCKET = f"gs://{BUCKET_NAME}"

### Initialize Vertex AI SDK for Python

In [3]:
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)

### Import libraries

Import libraries to use in this tutorial.

In [4]:
from vertexai.preview import reasoning_engines

### Configure the Llama 3.1 Chat Completions API for the `Reasoning Engine`.

To use the Llama 3.1 Chat Completions API with `Reasoning Engine` capabilities, you need to request the access token and configure the langchain `ChatOpenAI` to point to the Llama 3.1 Chat Completions API endpoint.

Notice, Llama 3.1 model deployed as a Model-as-a-Service (MaaS) is currently only supported in the `us-central1` region.

In [5]:
def model_builder(
    *,
    model_name: str,
    model_kwargs=None,
    project: str,  # Specified via vertexai.init
    location: str,  # Specified via vertexai.init
    **kwargs,
):
    import google.auth
    from langchain_openai import ChatOpenAI

    # Note: the credential lives for 1 hour by default.
    # After expiration, it must be refreshed.
    creds, _ = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    if model_kwargs is None:
        model_kwargs = {}

    endpoint = f"https://{location}-aiplatform.googleapis.com"
    base_url = (
        f"{endpoint}/v1/projects/{project}/locations/{location}/endpoints/openapi"
    )

    return ChatOpenAI(
        model=model_name,
        base_url=base_url,
        api_key=creds.token,
        **model_kwargs,
    )

#### Llama 3.1 models

You can experiment with various supported Llama 3.1 models.

This tutorial uses Llama 3 8B Instruct, 70B Instruct, and 405B Instruct using Model-as-a-Service (MaaS). Using Model-as-a-Service (MaaS), you can access Llama 3.1 models in just a few clicks without any setup or infrastructure hassles.

You can also access Llama models for self-service in Vertex AI Model Garden, allowing you to choose your preferred infrastructure. [Check out Llama 3.1 model card](https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama3_1?_ga=2.31261500.2048242469.1721714335-1107467625.1721655511) to learn how to deploy a Llama 3.1 models on Vertex AI.

In [7]:
MODEL_ID = "meta/llama-3.1-405b-instruct-maas"  # @param {type:"string"} ["meta/llama-3.1-8b-instruct-maas", "meta/llama-3.1-8b-instruct-maas", "meta/llama-3.1-405b-instruct-maas"]

### Chat with `Reasoning Agent`

In previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb), we demonstrated how to `Ask Llama 3.1 using different model configuration`.

In this colab, we will show you how to use the `Reasoning Agent` to send a request to the Llama 3.1 model with different model configuration.

#### `Reasoning Engine` use Llama 3.1 with different configuration

Use the following parameters to generate different answers:

*   `temperature` to control the randomness of the response
*   `max_tokens` to limit the response length
*   `top_p` to control the quality of the response
*   `apply_llama_guard` Model-as-a-Service (MaaS) integrates [Llama Guard](https://huggingface.co/meta-llama/Llama-Guard-3-8B) as a safety filter. It is switched on by default and can be switched off. Llama Guard enables us to safeguard model inputs and outputs. If a response is filtered, it will be populated with a `finish_reason` field (with value `content_filtered`) and a `refusal` field (stating the filtering reason).


In [25]:
temperature = 1.0  # @param {type:"number"}
max_tokens = 50  # @param {type:"integer"}
top_p = 1.0  # @param {type:"number"}
apply_llama_guard = True  # @param {type:"boolean"}

In [27]:
agent = reasoning_engines.LangchainAgent(
    model=MODEL_ID,  # Required.
    model_builder=model_builder,  # Required.
    model_kwargs={
        "temperature": temperature,  # Optional.
        "max_tokens": max_tokens,  # Optional.
        "top_p": top_p,  # Optional.
        "extra_body": {  # Optional.
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        },
    },
)

Now we can test the model and agent behavior to ensure that it's working as expected before we deploy it:

In [28]:
response = agent.query(input="Hello, Llama 3.1!")
print(response)

{'input': 'Hello, Llama 3.1!', 'output': "Hello! I'm Llama 3.1, an AI developed by Meta."}


#### Deploy your agent on Vertex AI

Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

In [None]:
remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

In [42]:
response = remote_agent.query(input="Hello, Llama 3.1!")
print(response)

{'output': "Hello! I'm Llama 3.1, an artificial intelligence model developed by Meta, designed to process and generate human-like language. I can provide information, answer questions, and even create text based on a given prompt. How can I assist", 'input': 'Hello, Llama 3.1!'}


#### Reusing your deployed agent from other applications or SDKs

You can now import and use the remotely deployed Reasoning Engine in this notebook session or in a different notebook or Python script. First you need to get its resource_name by calling:

In [None]:
REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

Afterwards you can use it by uncommenting and adapting the following code:

In [None]:
# from vertexai.preview import reasoning_engines

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)
# response = remote_agent.query(input=query)

Or, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

### Use `Reasoning Engine` to build a simple translator agent:

In previous [notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb), we demonstrates how to use `LangChain Expression Language` (LCEL) to build a simple chain which translates some `text_to_translate` to the specified `target_language`.

In this colab, we will show you how to use the `Reasoning Agent` to build and deploy the agent.

In [19]:
def lcel_builder(*, model, **kwargs):
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import PromptTemplate

    template = """Translate the following {text} to {target_language}:"""
    prompt = PromptTemplate(
        input_variables=["text", "target_language"], template=template
    )

    return prompt | model | StrOutputParser()


agent = reasoning_engines.LangchainAgent(
    model=MODEL_ID,
    model_builder=model_builder,
    runnable_builder=lcel_builder,
)

##### Translate a text

In [13]:
text_to_translate = ""  # @param {type:"string", placeholder:"Hello Llama 3.1!"}
target_language = ""  # @param {type:"string", placeholder:"Italian"}

In [20]:
response = agent.query(
    input={"text": text_to_translate, "target_language": target_language}
)
print(response)

The translation of "Hello Llama 3.1!" to Italian is:

Ciao Llama 3.1!

Here's a breakdown of the translation:

- "Hello" is translated to "Ciao", which is an informal way of saying "hello" in Italian.
- "Llama" remains the same, as it's a proper noun.
- "3.1" is a version number, so it remains the same.


#### Deploy your agent on Vertex AI

Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

In [None]:
remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

In [22]:
response = remote_agent.query(
    input={"text": text_to_translate, "target_language": target_language}
)
print(response)

The translation of "Hello Llama 3.1!" to Italian is:

Ciao Llama 3.1!


#### Reusing your deployed agent from other applications or SDKs

You can now import and use the remotely deployed Reasoning Engine in this notebook session or in a different notebook or Python script. First you need to get its resource_name by calling:

In [None]:
REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

Afterwards you can use it by uncommenting and adapting the following code:

In [30]:
# from vertexai.preview import reasoning_engines

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)
# response = remote_agent.query(input=query)

Or, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

### Agent uses Exchange Rate Tool

[Function calling](https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/function-calling) lets developers create a description of a function in their code, then pass that description to a language model in a request. The response from the model includes the name of a function that matches the description and the arguments to call it with.

In this example, we will use a Exchange Rate tool in the Reasoning Engine.

#### Define Python functions (tools)
Tools and functions enable the generative model to interact with external systems, databases, document stores, and other APIs so that the model can get the most up-to-date information or take action with those systems.

In this example, you'll define a function called `get_exchange_rate` that uses the requests library to retrieve real-time currency exchange information from an API:

In [None]:
def get_exchange_rate(
    currency_from: str = "USD",
    currency_to: str = "EUR",
    currency_date: str = "latest",
):
    """Retrieves the exchange rate between two currencies on a specified date."""
    import requests
    response = requests.get(
        f"https://api.frankfurter.app/{currency_date}",
        params={"from": currency_from, "to": currency_to},
    )
    return response.json()

Test the function with sample inputs to ensure that it's working as expected:

In [None]:
get_exchange_rate(currency_from="USD", currency_to="SEK")

Here, you'll use the LangChain agent template provided in the Vertex AI SDK for Reasoning Engine, which brings together the model, tools, and reasoning that you've built up so far:

In [None]:
agent = reasoning_engines.LangchainAgent(
    model=MODEL_ID,                                            # Required.
    model_builder=model_builder,                               # Required.
    tools=[get_exchange_rate],                                 # Optional.
    agent_executor_kwargs={
        "return_intermediate_steps": True,
        "stream_runnable": False,
    },                                                         # Optional.
)

Now we can test the model and agent behavior to ensure that it's working as expected before we deploy it:

In [None]:
response = agent.query(input="What's the exchange rate from US dollars to Swedish currency at 2024-07-26?")
print(response)

{'input': "What's the exchange rate from US dollars to Swedish currency at 2024-07-26?", 'output': 'The exchange rate from US dollars to Swedish currency at 2024-07-26 is 10.8034 SEK per USD.', 'intermediate_steps': [[{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'agent', 'ToolAgentAction'], 'kwargs': {'tool': 'get_exchange_rate', 'tool_input': {'currency_date': '2024-07-26', 'currency_from': 'USD', 'currency_to': 'SEK'}, 'log': "\nInvoking: `get_exchange_rate` with `{'currency_date': '2024-07-26', 'currency_from': 'USD', 'currency_to': 'SEK'}`\n\n\n", 'type': 'AgentActionMessageLog', 'message_log': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'id': 'get_exchange_rate', 'function': {'arguments': '{"currency_date":"2024-07-26","currency_from":"USD","currency_to":"SEK"}', 'name': 'get_exchange_rate'}, 'type': 'function'}], 'refusal': None}, 'response_metadata': {'

#### Deploy your agent on Vertex AI

Now that you've specified a model, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!

In [None]:
remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "requests",
        "langchain-openai",
    ],
)

In [None]:
response = remote_agent.query(input="What's the exchange rate from US dollars to Swedish currency at 2024-07-26?")
print(response)

{'input': "What's the exchange rate from US dollars to Swedish currency at 2024-07-26?", 'output': 'The exchange rate from US dollars to Swedish currency at 2024-07-26 is 1 USD = 10.767 SEK.', 'intermediate_steps': [[{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'agent', 'ToolAgentAction'], 'kwargs': {'tool': 'get_exchange_rate', 'tool_input': {'currency_date': '2024-07-26', 'currency_from': 'USD', 'currency_to': 'SEK'}, 'log': "\nInvoking: `get_exchange_rate` with `{'currency_date': '2024-07-26', 'currency_from': 'USD', 'currency_to': 'SEK'}`\n\n\n", 'type': 'AgentActionMessageLog', 'message_log': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'id': 'get_exchange_rate', 'function': {'arguments': '{"currency_date":"2024-07-26","currency_from":"USD","currency_to":"SEK"}', 'name': 'get_exchange_rate'}, 'type': 'function'}], 'refusal': None}, 'response_metadata': {'t

#### Reusing your deployed agent from other applications or SDKs

You can now import and use the remotely deployed Reasoning Engine in this notebook session or in a different notebook or Python script. First you need to get its resource_name by calling:

In [None]:
REASONING_ENGINE_RESOURCE_NAME = remote_agent.resource_name
print(REASONING_ENGINE_RESOURCE_NAME)

Afterwards you can use it by uncommenting and adapting the following code:

In [None]:
# from vertexai.preview import reasoning_engines

# remote_agent = reasoning_engines.ReasoningEngine(REASONING_ENGINE_RESOURCE_NAME)
# response = remote_agent.query(input=query)

Or, you can query your agent from other programming languages using any of the [available client libraries in Vertex AI](https://cloud.google.com/vertex-ai/docs/start/client-libraries), including C#, Java, Node.js, Python, Go, or REST API.

## Cleaning up

If you created a new project for this tutorial, delete the project. If you used an existing project and wish to keep it without the changes added in this tutorial, delete resources created for the tutorial.

### Deleting tutorial resources

In [40]:
delete_bucket = False  # @param {type:"boolean"}
delete_reasoning_engine = False  # @param {type:"boolean"}

if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME

if delete_reasoning_engine:
    remote_agent.delete()