In [None]:
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Getting Started with Camb AI's Models

## Overview

### Camb AI on Vertex AI

You can deploy the Camb AI models in your own endpoint.

### Available Camb AI models

#### MARS8

MARS8 is CAMB.AI's latest speech synthesis model. It's aimed towards high quality, multilingual, low latency TTS outputs.
It features voicecloning and fine-grained pronunciation control in over 30 languages.
Deployed on Google Cloud’s Vertex AI Launchpad, it brings latencies of less than 400ms with Blackwell 6000 GPUs, with an architectural focus on speed and voice cloning capabilites.

#### MARS7
(Multilingual AutoRegressive Speech 7) is the prior generation in CAMB.AI’s MARS series of speech synthesis models. MARS7 creates hyper-realistic, prosodic, multilingual text-to-speech (TTS) outputs, featuring optional voice cloning and fine-grained emotional control. Deployed on Google Cloud’s Vertex AI Launchpad, it brings near real-time latency, with an architectural focus on parameter efficiency and global context understanding.

## Objective
This notebook shows how to use **Vertex AI API** to deploy the Camb AI models.


## Vertex AI API

## Get Started


### Install required packages


In [1]:
! pip3 install -U -q httpx soundfile
! command -v jq >/dev/null 2>&1 || { echo >&2 "jq is not installed. Installing jq..."; sudo apt-get update -y && sudo apt-get install -y jq; }

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [2]:
import sys

if "google.colab" in sys.modules:
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [16]:
import sys
import tempfile

import IPython.display as ipd
import soundfile as sf
from IPython.display import Audio


def display_flac_in_notebook(path_to_flac):
    data, samplerate = sf.read(path_to_flac)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
        sf.write(tmp.name, data, samplerate, format="WAV")
        return Audio(filename=tmp.name)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [1]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

#### Select one of Camb AI models

In [2]:
PUBLISHER_NAME = "cambai"  # @param {type:"string"}
PUBLISHER_MODEL_NAME = "mars8"
available_regions = ["us-central1"]

#### Select a location and a version from the dropdown

In [None]:
import ipywidgets as widgets
from IPython.display import display

dropdown_loc = widgets.Dropdown(
    options=available_regions,
    description="Select a location:",
    font_weight="bold",
    style={"description_width": "initial"},
)


def dropdown_loc_eventhandler(change):
    global LOCATION
    if change["type"] == "change" and change["name"] == "value":
        LOCATION = change.new
        print("Selected:", change.new)


LOCATION = dropdown_loc.value
dropdown_loc.observe(dropdown_loc_eventhandler, names="value")
display(dropdown_loc)

#### Set Google Cloud project and model information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [4]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
ENDPOINT = f"https://{LOCATION}-aiplatform.googleapis.com"

if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    raise ValueError("Please set your PROJECT_ID")

#### Import required libraries

In [5]:
import json
import time

### Upload Model

In [None]:
UPLOAD_MODEL_PAYLOAD = {
    "model": {
        "displayName": "ModelGarden_LaunchPad_Model_" + time.strftime("%Y%m%d-%H%M%S"),
        "baseModelSource": {
            "modelGardenSource": {
                "publicModelName": f"publishers/{PUBLISHER_NAME}/models/{PUBLISHER_MODEL_NAME}",
            }
        },
    }
}

request = json.dumps(UPLOAD_MODEL_PAYLOAD)

! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/models:upload -d '{request}'

#### Get Model

In [None]:
MODEL_ID = -1  # @param {type: "number"}
# copy the model id from the upload in the previous cell into the model id above
! curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/models/{MODEL_ID}

### Create the endpoint

In [None]:
CREATE_ENDPOINT_PAYLOAD = {
    "displayName": "ModelGarden_LaunchPad_Endpoint_" + time.strftime("%Y%m%d-%H%M%S"),
}

request = json.dumps(CREATE_ENDPOINT_PAYLOAD)

! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints -d '{request}'

#### Get Endpoint

In [None]:
ENDPOINT_ID = -1  # @param {type: "number"}
# copy the endpoint id from the create endpoint in the previous cell into the endpoint id above
! curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}

### Deploy Model

In [10]:
MACHINE_TYPE = "g4-standard-48"  # @param {type: "string"}
ACCELERATOR_TYPE = "NVIDIA_RTX_PRO_6000"  # @param {type: "string"}
ACCELERATOR_COUNT = 1  # @param {type: "number"}

In [None]:
# Try CURL, if it fails, use stubby command in the next cell.
DEPLOY_PAYLOAD = {
    "deployedModel": {
        "model": f"projects/{PROJECT_ID}/locations/{LOCATION}/models/{MODEL_ID}",
        "displayName": "ModelGarden_LaunchPad_DeployedModel_"
        + time.strftime("%Y%m%d-%H%M%S"),
        "dedicatedResources": {
            "machineSpec": {
                "machineType": MACHINE_TYPE,
                "acceleratorType": ACCELERATOR_TYPE,
                "acceleratorCount": ACCELERATOR_COUNT,
            },
            "minReplicaCount": 1,
            "maxReplicaCount": 1,
        },
    },
    "trafficSplit": {"0": 100},
}

request = json.dumps(DEPLOY_PAYLOAD)
print("Request payload to Deploy Model:")
print(json.dumps(DEPLOY_PAYLOAD, indent=2))
print("\nResult:")
! curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" {ENDPOINT}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:deployModel -d '{request}'

### Audio generation

#### Unary call

Sends a POST request to the specified API endpoint to get a response from the model for a joke using the provided payload.

In [13]:
# load in env variables
import os

os.environ["ENDPOINT"] = ENDPOINT
os.environ["PROJECT_ID"] = PROJECT_ID
os.environ["LOCATION"] = LOCATION
os.environ["ENDPOINT_ID"] = str(ENDPOINT_ID)

In [None]:
%%bash
# download the ref file
curl -o ref.flac "https://storage.googleapis.com/cambai-prod-public/public/ref.flac"
# base64 encode the ref and save the payload:
b64=$(base64 -i ref.flac | tr -d '\n')
cat > body.json <<EOF
{
  "text": "The quick brown fox jumps over the lazy dog.",
  "reference_audio": "$b64",
  "reference_language": "en-us",
  "output_duration": null,
  "language": "en-us"
}
EOF

# send the request, saving the output to a .flac file:
curl -X POST ${ENDPOINT}/v1/projects/${PROJECT_ID}/locations/${LOCATION}/endpoints/${ENDPOINT_ID}:rawPredict \
    -H "Content-Type: application/json" \
    -H "Authorization: Bearer $(gcloud auth print-access-token)" \
    -d @body.json > output.flac
# NOTE: the first request to a model endpoint pod might be slow as the model compiles kernels

In [None]:
# play the output audio
ipd.Audio("output.flac")

#### Unary Call with Vertex SDK

In [22]:
import base64
from pathlib import Path

from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)
endpoint = aiplatform.Endpoint(endpoint_name=str(ENDPOINT_ID))

# encode reference audio as base64 string:
file_path = "ref.flac"
encoded_str = base64.b64encode(Path(file_path).read_bytes()).decode("utf-8")

# define payload:
data = {
    "text": "The quick brown fox jumps over the lazy dog.",
    "language": "en-us",
    "output_duration": None,
    "reference_language": "en-us",
    "reference_audio": encoded_str,
}

prediction = []
for chunk in endpoint.raw_predict(
    body=json.dumps(data).encode("utf-8"),
    headers={"Content-Type": "application/json"},
    use_dedicated_endpoint=True,
):
    if chunk:
        prediction.append(chunk)
full_audio_bytes = b"".join(prediction)
# save output to a local file
with open("output.flac", "wb") as file:
    file.write(full_audio_bytes)

In [None]:
# play the output audio
ipd.Audio("output.flac")

#### Unary call with streaming

In [None]:
import base64
import subprocess
import time
from pathlib import Path

import httpx

url = f"https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:rawPredict"
# authenticate
res = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True)
access_token = res.stdout.decode("utf-8").strip()
headers = {"Authorization": f"Bearer {access_token}", "Accept": "text/event-stream"}
# Define reference audio to use for cloning (base64 encoded)
file_path = "ref.flac"
encoded_str = base64.b64encode(Path(file_path).read_bytes()).decode("utf-8")

data = {
    "text": "そしてカールソンは、あの軽やかなスケーティングで、まるで幽霊のようにニュートラルゾーンを縫うように駆け抜ける.",  # text to synthesize
    "language": "ja-jp",
    "output_duration": None,
    "reference_language": "en-us",
    "reference_audio": encoded_str,
}

st = time.time()

prediction = []
wavs = []
with httpx.stream("POST", url, headers=headers, json=data, timeout=300) as r:
    print(r.status_code, r.headers)
    dt = time.time()
    for chunk in r.iter_bytes(4096 * 16):
        if chunk:
            # each chunk is a bytes object of the next audio chunk.
            # If you want you can render the output piece by piece, or use an async receiver.
            print(
                f"Received chunk of size {len(chunk)} at {time.time() - st:.2f}s. w/o network delay: {time.time() - dt:.2f}s"
            )
            prediction.append(chunk)
et = time.time()
full_audio_bytes = b"".join(prediction)
# Check the response status code
if r.status_code == 200:
    print(f"Request successful! Took {et-st:.2f}s")
else:
    print("Request failed with status code", r.status_code)
# display or save full output
with open("output.flac", "wb") as file:
    file.write(full_audio_bytes)
ipd.Audio(full_audio_bytes)