In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

| Author |
| --- |
| [Nardos Alemu](https://github.com/nardosalemu)|

# Getting Started with Model Optimizer

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fmodel-optimizer%2Fintro_model_optimizer.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/model-optimizer/intro_model_optimizer.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>



<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/model-optimizer/intro_model_optimizer.ipynb">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

## Overview

Model Optimizer intelligently routes user queries across models (Pro, Flash, etc.) for cost and quality optimization. This Colab notebook provides an introduction on how to use Model Optimizer using Google Gen AI SDK, and will demonstrate how to send different types of queries (single-turn, multi-turn, and function-call) to the model.

Learn more about [Model Optimizer](https://cloud.devsite.corp.google.com/vertex-ai/generative-ai/docs/model-reference/model-optimizer).

### Objectives

This tutorial shows how to:

-   Send prompt queries to Model Optimizer.
-   Utilize Model Optimizer for single-turn and multi-turn conversations.
-   Execute function calls through Model Optimizer.


### Costs
This tutorial uses billable components of Google Cloud:

- Vertex AI

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Getting Started

### Install Google Gen AI SDK

Learn more about [Google Gen AI SDK](https://cloud.google.com/vertex-ai/generative-ai/docs/sdks/overview).

In [None]:
%pip install --upgrade --user google-genai

### Import libraries

In [None]:
import os

from google import genai
from google.colab import auth
from google.genai.types import (
    Content,
    FunctionDeclaration,
    GenerateContentConfig,
    Optional,
    Part,
    Tool,
)

### Authenticate User

In [None]:
auth.authenticate_user()

### Define Google Cloud project information and initialize Vertex AI

If you are running this notebook on Google Colab, run the cell below to authenticate your environment.

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
LOCATION = "us-central1"  # @param

if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))


client = genai.Client(
    vertexai=True,
    project=PROJECT_ID,
    location=LOCATION,
)

### Load Model & set System Instruction

`model-optimizer-exp-04-09` is the model used for Model Optimizer.

In [None]:
system_instruction = ""  # @param {'type': 'string'}
model = "model-optimizer-exp-04-09"

### Send a request
The following function is defined to send a request to the model. `generate_content` and `generate_content_stream` are the methods used to generate response. `generate_content_stream` streams the response as it is being generated by the model.

In [None]:
def generate_content_with_stream_option(prompt, is_streaming=False):
    if is_streaming:
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=prompt,
            config=GenerateContentConfig(
                system_instruction=system_instruction,
            ),
        ):
            print(chunk.text)
    else:
        response = client.models.generate_content(
            model=model,
            contents=prompt,
            config=GenerateContentConfig(
                system_instruction=system_instruction,
            ),
        )
        print(response.text)

#### Single-turn query

A single-turn query is the most basic type of interaction with the model, consisting of a single user prompt and a single model response. It's suitable for simple requests where context is not essential.

In [None]:
prompt = "Write a poem"  # @param {'type': 'string'}
is_streaming = False  # @param {'type': 'boolean'}

if is_streaming:
    generate_content_with_stream_option(prompt, True)
else:
    generate_content_with_stream_option(prompt)

#### Multi-turn queries

There are two ways to structure multi-turn queries:

- **List of text prompts:** A simple list of strings, where each string represents a turn in the conversation. This format is suitable for basic multi-turn scenarios.
- **List of Content objects:** A more structured format using Content objects, where you can explicitly define the role (user or model) and parts of each turn. This format provides greater control and clarity, especially when dealing with complex conversations.


The called method supports streaming with the `stream=True` or `generate_content_stream` options. The result has the same type as the non streaming case, but you can iterate over the response chunks as they become available.

In [None]:
prompts = [
    "What is x multiplied by 2?",
    "x = 42",
]

is_streaming = False  # @param {'type': 'boolean'}

if is_streaming:
    generate_content_with_stream_option(prompts, True)
else:
    generate_content_with_stream_option(prompts)

 #### Multi-turn queries 2

In [None]:
prompts = [
    Content(role="user", parts=[Part.from_text(text="Hello")]),
    Content(role="user", parts=[Part.from_text(text="How are you?")]),
    Content(role="user", parts=[Part.from_text(text="Why is the sky blue?")]),
]

is_streaming = False  # @param {'type': 'boolean'}

if is_streaming:
    generate_content_with_stream_option(prompts, True)
else:
    generate_content_with_stream_option(prompts)

#### Function-call queries

Model Optimizer also supports calls to functions to perform actions and retrieve information. You define the function's signature (name and parameters), and the model can generate a call to that function with appropriate arguments when it determines that doing so would be helpful. In this example, we define a simple function `get_current_weather` and demonstrate how the model can call it.

In [None]:
def get_current_weather(location: str, unit: Optional[str] = "centigrade"):
    """Gets weather in the specified location.

    Args:
        location: The location for which to get the weather.
        unit: Temperature unit. Can be Centigrade or Fahrenheit. Default: Centigrade.

    Returns:
        The weather information as a dict.
    """
    return dict(
        location=location,
        unit=unit,
        weather="Super nice, but maybe a bit hot.",
    )


_REQUEST_FUNCTION_PARAMETER_SCHEMA_STRUCT = {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
        },
        "unit": {
            "type": "string",
            "enum": [
                "celsius",
                "fahrenheit",
            ],
        },
    },
    "required": ["location"],
}

_REQUEST_FUNCTION_RESPONSE_SCHEMA_STRUCT = {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
        },
        "unit": {
            "type": "string",
            "enum": [
                "celsius",
                "fahrenheit",
            ],
        },
        "weather": {
            "type": "string",
        },
    },
}

get_current_weather_func = FunctionDeclaration(
    name="get_current_weather",
    description="Get the current weather in a given location",
    parameters=_REQUEST_FUNCTION_PARAMETER_SCHEMA_STRUCT,
    response=_REQUEST_FUNCTION_RESPONSE_SCHEMA_STRUCT,
)


prompt = "What is the weather like in Boston?"

weather_tool = Tool(function_declarations=[get_current_weather_func])

response = client.models.generate_content(
    model=model, contents=prompt, config=GenerateContentConfig(tools=[weather_tool])
)

print("Called function: ", response.candidates[0].content.parts[0].function_call.name)

To preview more details:


In [None]:
function_map = {"get_current_weather": get_current_weather}

function_response_parts = []
for part in response.candidates[0].content.parts:
    function_call = part.function_call
    function = function_map[function_call.name]
    function_result = function(**function_call.args)
    function_response_part = Part.from_function_response(
        name=function_call.name,
        response=function_result,
    )
    function_response_parts.append(function_response_part)

print(function_response_parts)