In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Call Gemini by using the OpenAI Library

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fchat-completions%2Fintro_chat_completions_api.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/chat-completions/intro_chat_completions_api.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://goo.gle/4jeQztq">
      <img width="32px" src="https://cdn.qwiklabs.com/assets/gcp_cloud-e3a77215f0b8bfa9b3f611c0d2208c7e8708ed31.svg" alt="Google Cloud logo"><br> Open in  Cloud Skills Boost
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/chat-completions/intro_chat_completions_api.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

| Author |
| --- |
| [Eric Dong](https://github.com/gericdong) |

## Overview

Developers already working with OpenAI's libraries can easily tap into the power of Gemini by leveraging the Chat Completions API. The Chat Completions API offers a streamlined way to experiment with and incorporate Gemini's capabilities into your existing AI applications.

If you are not already using the OpenAI libraries, we recommend using the Google Gen AI SDK. Learn more about [calling Vertex AI models by using the OpenAI library](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library).

In this tutorial, you learn to call Gemini using the OpenAI library. You will complete the following tasks:

- Configure OpenAI SDK for the Chat Completions API
- Send a chat completions request
- Stream chat completions response
- Send a multimodal request
- Send a function calling request
- Send a function calling request with the `tool_choice` parameter
- Use controlled generation
- Control thinking budget


## Get started

### Install required packages


In [None]:
%pip install --upgrade --quiet openai google-auth requests

### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "global")

print(f"Using Vertex AI with project: {PROJECT_ID} in location: {LOCATION}")

## Chat Completions API Examples

### Configure OpenAI SDK for the Chat Completions API

#### Import libraries

The `google-auth` library is used to programmatically get Google credentials. Colab already has this library pre-installed.

In [None]:
from IPython.display import Markdown, display
from google.auth import default
from google.auth.transport.requests import Request
import openai

#### Authentication

You can request an access token from the default credentials for the current environment. Note that the access token lives for [1 hour by default](https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.


In [None]:
# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(Request())

Then configure the OpenAI SDK to point to the Chat Completions API endpoint.

In [None]:
api_host = "aiplatform.googleapis.com"
if LOCATION != "global":
    api_host = f"{LOCATION}-aiplatform.googleapis.com"

client = openai.OpenAI(
    base_url=f"https://{api_host}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/openapi",
    api_key=credentials.token,
)

### Supported models

The Chat Completions API supports both Gemini models and select self-deployed models from Model Garden. Learn more about [supported models](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-vertex-using-openai-library#supported_models).

In [None]:
MODEL_ID = "google/gemini-2.0-flash-001"  # @param {type:"string"}

### **Example**: Send a chat completions request

The Chat Completions API takes a list of messages as input and returns a generated message as output. Although the message format is designed to make multi-turn conversations easy, it's just as useful for single-turn tasks without any conversation.

In this example, you use the Chat Completions API to send a request to the Gemini model.

In [None]:
response = client.chat.completions.create(
    model=MODEL_ID, messages=[{"role": "user", "content": "Why is the sky blue?"}]
)

An example Chat Completions API response looks as follows:

In [None]:
print(response)

The generated content can be extracted with:

In [None]:
response.choices[0].message.content

You can use `Markdown` to display the formatted text.

In [None]:
Markdown(response.choices[0].message.content)

### **Example**: Stream chat completions response

By default, the model returns a response after completing the entire generation process. You can also stream the response as it is being generated, and the model will return chunks of the response as soon as they are generated.

In [None]:
output_text = ""
markdown_display_area = display(Markdown(output_text), display_id=True)

for chunk in client.chat.completions.create(
    model=MODEL_ID,
    messages=[{"role": "user", "content": "Why is the sky blue?"}],
    stream=True,
):
    output_text += chunk.choices[0].delta.content
    markdown_display_area.update(Markdown(output_text))

### **Example**: Send a multimodal request
You can send a multimodal prompt in a request to Gemini and get a text output.

In this example, you ask the model to create a blog post for [this image](https://storage.googleapis.com/cloud-samples-data/generative-ai/image/meal.png) stored in a Cloud Storage bucket.


In [None]:
response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Write a short, engaging blog post based on this picture.",
                },
                {
                    "type": "image_url",
                    "image_url": "gs://cloud-samples-data/generative-ai/image/meal.png",
                },
            ],
        }
    ],
)

Markdown(response.choices[0].message.content)

### **Example**: Send a function calling request

You can use the Chat Completions API for function calling with the Gemini models. The `tools` parameter in the API is used to provide function specifications. This is to enable models to generate function arguments which adhere to the provided specifications.

In this example, you create function specifications to interface with a hypothetical weather API, then pass these function specifications to the Chat Completions API to generate function arguments that adhere to the specification.

**Note** that in this example, the API will not actually execute any function calls. It is up to developers to execute function calls using model outputs.

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
    },
    {"role": "user", "content": "What is the weather in Boston?"},
]

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=messages,
    tools=tools,
)

print(response.choices[0].message.tool_calls)

### **Example**: Send a function calling request with the `tool_choice` parameter

Using the `tools` parameter, if the functions parameter is provided then by default the model will decide when it is appropriate to use one of the functions.

By default, `tool_choice` is set to `auto`. This lets the model decide whether to call functions and, if so, which functions to call. To disable function calling and force the model to only generate a user-facing message, you can set `tool_choice` to `none`.

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
    },
    {"role": "user", "content": "What is the weather in Boston?"},
]

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=messages,
    tools=tools,
    tool_choice="auto",
)

print(response.choices[0].message.tool_calls)

### **Example**: Use controlled generation

The Gemini models allow you to define a response schema to specify the structure of a model's output, the field names, and the expected data type for each field. The response schema is specified in the `response_format` parameter, and the model output will strictly follow that schema.

In [None]:
from pydantic import BaseModel


class Recipe(BaseModel):
    name: str
    description: str
    ingredients: list[str]


response = client.beta.chat.completions.parse(
    model=MODEL_ID,
    messages=[
        {
            "role": "user",
            "content": "List a few popular cookie recipes and their ingredients.",
        }
    ],
    response_format=Recipe,
)

print(response.choices[0].message)

### **Example**: Control thinking budget

Gemini 2.5 models are trained to think through complex problems, leading to significantly improved reasoning. The Gemini API comes with a [`thinking_budget`](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#budget) parameter which gives you control how much the model thinks during its responses.

See the following mapping between the OpenAI API parameter `reasoning_effort` and the Gemini API parameter `thinking_budget`:

| `reasoning_effort` | `thinking_budget` |
|--------------------|-------------------|
| `none`             | `0`               |
| `low`              | `1024`            |
| `medium`           | `8192`            |
| `high`             | `24576`           |

In [None]:
prompt = """
Write a bash script that takes a matrix represented as a string with 
format '[1,2],[3,4],[5,6]' and prints the transpose in the same format.
"""

response = client.chat.completions.create(
    model="google/gemini-2.5-flash-preview-04-17",
    reasoning_effort="medium",
    messages=[{"role": "user", "content": prompt}],
)

Markdown(response.choices[0].message.content)