In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Get started with DeepSeek-V3.2 models

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_deepseek3_2.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_openai_api_deepseek3_2.ipynb"">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_deepseek3_2.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_openai_api_deepseek3_2.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook demonstrates how to get started with using the OpenAI library and demonstrates how to use DeepSeek-V3.2 models as Model-as-service (MaaS) for building translation chain and document question-answer.

### Objective

- Configure OpenAI SDK for the DeepSeek-V3.2 Completions API
- Chat with DeepSeek-V3.2 models with different prompts and model parameters, and apply Llama Guard for safeguarding
- Build with DeepSeek-V3.2 models
  - Translation Chain.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Get started

### Install Vertex AI SDK for Python and other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform[langchain] openai
! pip3 install --upgrade --quiet langchain-openai

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "<YOUR PROJECT ID>"  # @param {type:"string"}

LOCATION = "global"  # @param {type:"string"}

### Import libraries

Import libraries to use in this tutorial.

In [None]:
# Chat completions API
import openai
from google.auth import default, transport
from langchain import PromptTemplate
# Build
from langchain_openai import ChatOpenAI

### Configure OpenAI SDK for the DeepSeek-V3.2 Chat Completions API

To configure the OpenAI SDK for the DeepSeek-V3.2 Chat Completions API, you need to request the access token and initialize the client pointing to the DeepSeek-V3.2 endpoint.


#### Authentication

You can request an access token from the default credentials for the current environment. Note that the access token lives for [1 hour by default](https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.


In [None]:
credentials, _ = default()
auth_request = transport.requests.Request()
credentials.refresh(auth_request)

Then configure the OpenAI SDK to point to the DeepSeek-V3.2 Chat Completions API endpoint.

Notice, only `global` is supported region for DeepSeek-V3.2 models using Model-as-a-Service (MaaS).

In [None]:
MODEL_LOCATION = "global"

client = openai.OpenAI(
    base_url=f"https://aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{MODEL_LOCATION}/endpoints/openapi/chat/completions?",
    api_key=credentials.token,
)

#### DeepSeek-V3.2 Model

This tutorial uses DeepSeek-V3.2 using Model-as-a-Service (MaaS). Using Model-as-a-Service (MaaS), you can access DeepSeek-V3.2 model in just a few clicks without any setup or infrastructure hassles. Model-as-a-Service (MaaS) integrates [Llama Guard](https://huggingface.co/meta-llama/Llama-Guard-3-8B) as a safety filter. It is switched on by default and can be switched off. Llama Guard enables us to safeguard model inputs and outputs. If a response is filtered, it will be populated with a `finish_reason` field (with value `content_filtered`) and a `refusal` field (stating the filtering reason).

In [None]:
MODEL_ID = "deepseek-ai/deepseek-v3.2-maas"  # @param {type:"string"} ["deepseek-ai/deepseek-v3.2-maas"]

### Chat with DeepSeek-V3.2

Use the Chat Completions API to send a request to the DeepSeek-V3.2model.

#### Hello, DeepSeek-V3.2!

In [None]:
apply_llama_guard = True  # @param {type:"boolean"}

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[{"role": "user", "content": "Hello, Deepseek!"}],
    extra_body={
        "extra_body": {
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        }
    },
)

In [None]:
print(response.choices[0].message.content)

#### Ask DeepSeek-V3.2 using different model configuration

Use the following parameters to generate different answers:

*   `temperature` to control the randomness of the response
*   `max_tokens` to limit the response length
*   `top_p` to control the quality of the response
*   `stream` to stream the response back or not


In [None]:
temperature = 1.0  # @param {type:"number"}
max_tokens = 256  # @param {type:"integer"}
top_p = 1.0  # @param {type:"number"}
stream = True  # @param {type:"boolean"}

Get the answer.

In [None]:
apply_llama_guard = True  # @param {type:"boolean"}

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {"role": "user", "content": "What is Vertex AI?"},
        {"role": "assistant", "content": "Sure, Vertex AI is:"},
    ],
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    stream=stream,
    extra_body={
        "extra_body": {
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        }
    },
)

Depending if `stream` parameter is enabled or not, you can print the response entirely or chunk by chunk.

In [None]:
if stream:
    for chunk in response:
        if chunk.choices:
            print(chunk.choices[0].delta.content, end="")
else:
    print(response.choices[0].message.content)

#### Use DeepSeek-V3.2 with different tasks

In this section, you will use DeepSeek-V3.2 to perform different tasks including text generation, text summarization, and code generation.

For each task, you'll define a different prompt and submit a request to the model as you did before.

##### Text Generation

In [None]:
prompt = "Write a poem about a cat who loves to code"

In [None]:
apply_llama_guard = True  # @param {type:"boolean"}

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {"role": "user", "content": prompt},
    ],
    extra_body={
        "extra_body": {
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        }
    },
)

In [None]:
print(response.choices[0].message.content)

##### Text summarization

In [None]:
article = """
Vertex AI: Google's Unified Platform for Machine Learning

Google Cloud's Vertex AI is a comprehensive platform that simplifies the process of building, deploying, and managing machine learning (ML) models and AI applications. It provides a single environment for all your AI needs, from data preparation to model deployment and monitoring.

Vertex AI offers a range of features to cater to various user levels, including:

AutoML: This feature allows you to train models on tabular, image, text, or video data without writing code. It's ideal for users without extensive ML expertise.
Custom Training: For advanced users, Vertex AI provides custom training options, allowing you to use your preferred ML framework and write your own code.
Model Garden: This feature lets you discover, test, and deploy pre-trained models from Vertex AI and open-source sources.
Generative AI: Access Google's powerful large language models (LLMs) to generate text, code, images, and speech, which can be customized and deployed for your applications.
Vertex AI seamlessly integrates with other Google Cloud services like BigQuery for data warehousing, Cloud Storage for data management, and Cloud AI Platform for custom model training. It provides managed infrastructure that can be tailored to your performance and budget needs.

Whether you're a seasoned data scientist or just starting out with AI, Vertex AI simplifies the entire ML lifecycle and empowers you to build and deploy AI solutions effectively.
"""


prompt = ("Summarize the following article in one sentence: " + article).replace(
    "\n", ""
)

In [None]:
apply_llama_guard = True  # @param {type:"boolean"}

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {"role": "user", "content": prompt},
    ],
    extra_body={
        "extra_body": {
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        }
    },
)

In [None]:
print(response.choices[0].message.content)

##### Code generation

In [None]:
prompt = "Write a Python function that takes a list of numbers and returns the average. Include error handling for empty lists."

In [None]:
apply_llama_guard = True  # @param {type:"boolean"}

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {"role": "user", "content": prompt},
    ],
    extra_body={
        "extra_body": {
            "google": {
                "model_safety_settings": {
                    "enabled": apply_llama_guard,
                    "llama_guard_settings": {},
                }
            }
        }
    },
)

In [None]:
print(response.choices[0].message.content)

### Build with DeepSeek-V3.2

In this section, you use DeepSeek-V3.2 to build a translation simple applications.

**Translation Chain** to translate text across multiple languages using DeepSeek-V3.2 and LangChain Expression Language (LCEL).


#### Translation chain

In this scenario, you use LangChain Expression Language (LCEL) to build a simple chain which translates some `text_to_translate` to the specified `target_language`.

##### Initialize the chat interface and the translation prompt template using LangChain

In [None]:
llm = ChatOpenAI(
    model=MODEL_ID,
    base_url=f"https://aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{MODEL_LOCATION}/endpoints/openapi/chat/completions?",
    api_key=credentials.token,
)

template = """Translate the following {text} to {target_language}:"""

prompt = PromptTemplate(input_variables=["text", "target_language"], template=template)

##### Initialize the chain

In [None]:
chain = prompt | llm

##### Translate a text

In [None]:
text_to_translate = "Hello Deepseek!"  # @param {type:"string"}
target_language = "Italian"  # @param {type:"string"}

response = chain.invoke({"text": text_to_translate, "target_language": target_language})

In [None]:
print(response.content)