In [None]:
# Copyright 2026 MongoDB, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Voyage Multimodal 3.5

This notebook demonstrates how to deploy and use the Voyage Multimodal 3.5 embedding model.

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/voyage-multimodal-3.5.ipynb">
      <img src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fgenerative_ai%2Fvoyage-multimodal-3.5.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/generative_ai/voyage-multimodal-3.5.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/voyage-multimodal-3.5.ipynb">
      <img src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" alt="GitHub logo" width="32px"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

**Voyage Multimodal 3.5** is a state-of-the-art multimodal embedding model designed for cross-modal semantic search, retrieval-augmented generation (RAG), and intelligent AI applications. This model provides:

* **Multimodal Understanding**: Vectorize text, images, and video individually or interleaved together
* **Cross-Modal Search**: Excellent performance for mixed-modality searches involving text and visual content
* **Flexible Dimensions**: Support for 256, 512, 1024, and 2048 dimensions via Matryoshka learning
* **Quantization Options**: Multiple quantization formats for optimal storage and performance
* **Maximum 32K tokens input**: Support for long documents and multiple media items

### What you'll learn

In this notebook, you will:

* Deploy the Voyage Multimodal 3.5 model to a Vertex AI endpoint
* Generate embeddings for text, images, and video
* Create multimodal embeddings combining text and images
* Use embeddings for cross-modal semantic similarity
* Clean up resources after use

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI Model Garden
* Vertex AI Prediction endpoints

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Get started

### Install Vertex AI SDK for Python and other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform numpy

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK for Python

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# @title Setup Google Cloud project

# Set your Google Cloud project ID and region below:

import os

import vertexai

# @markdown Enter your project ID if not auto-detected:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

# @markdown Select your region:
LOCATION = "us-central1"  # @param ["us-central1", "us-east1", "us-west1", "europe-west1", "europe-west4", "asia-east1", "asia-southeast1"]

print(f"Project ID: {PROJECT_ID}")
print(f"Location: {LOCATION}")

vertexai.init(project=PROJECT_ID, location=LOCATION)

## Deploy model

### Initialize the Model

Initialize the Voyage Multimodal 3.5 model from Model Garden.

Use the `list_deploy_options()` method to view the verified deployment configurations for your selected model. This helps ensure you have sufficient resources (e.g., GPU quota) available to deploy it.

In [None]:
from vertexai import model_garden

MODEL_NAME = "mongodb/voyage-multimodal-3.5@latest"
model = model_garden.OpenModel(MODEL_NAME)

deploy_options = model.list_deploy_options(concise=True)
print(deploy_options)

### Deploy the Model

Now that you've reviewed the deployment options, use the `deploy()` method to serve the Voyage Multimodal 3.5 model to a Vertex AI endpoint. Deployment time may vary depending on infrastructure requirements.

You can either deploy a new model or use an existing endpoint. Set `use_dedicated_endpoint` to `True` as voyage-multimodal-3.5 requires a [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).

In [None]:
# @title Deploy or connect to endpoint
# @markdown Choose whether to deploy a new model or use an existing endpoint:

deployment_option = "deploy_new"  # @param ["deploy_new", "use_existing"]

# @markdown ---
# @markdown If using existing endpoint, provide the endpoint ID:
ENDPOINT_ID = ""  # @param {type:"string"}

if deployment_option == "deploy_new":
    print("Deploying new model...")
    endpoint = model.deploy(
        accept_eula=True,
        use_dedicated_endpoint=True,
    )
    print(f"Endpoint deployed: {endpoint.display_name}")
    print(f"Endpoint resource name: {endpoint.resource_name}")
else:
    if not ENDPOINT_ID:
        raise ValueError("Please provide an ENDPOINT_ID when using existing endpoint")

    from google.cloud import aiplatform

    print(f"Connecting to existing endpoint: {ENDPOINT_ID}")
    endpoint = aiplatform.Endpoint(
        endpoint_name=f"projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}"
    )
    print(f"Using endpoint: {endpoint.display_name}")
    print(f"Endpoint resource name: {endpoint.resource_name}")

### Advanced Deployment Configuration (Optional)

To further customize your deployment, you can configure:

- **Compute Resources**: Machine type, replica count (min/max), accelerator type and quantity.
- **Infrastructure**: Use Spot VMs, reservation affinity, or dedicated endpoints.
- **Serving Container**: Customize container image, ports, health checks, and environment variables.

See the [Model Garden SDK README](https://github.com/googleapis/python-aiplatform/blob/main/vertexai/model_garden/README.md) for advanced configuration options.

## Generate embeddings with Voyage Multimodal 3.5

Now that the model is deployed, you can generate embeddings for text, images, video, or any combination of these modalities.

The multimodal API uses a different input format than text-only models. Each input is an object with a `content` array containing typed elements:

- **Text**: `{"type": "text", "text": "your text here"}`
- **Image URL**: `{"type": "image_url", "image_url": "https://..."}`
- **Image Base64**: `{"type": "image_base64", "image_base64": "data:image/jpeg;base64,..."}`
- **Video URL**: `{"type": "video_url", "video_url": "https://..."}`
- **Video Base64**: `{"type": "video_base64", "video_base64": "data:video/mp4;base64,..."}`

### Text embeddings

Generate embeddings for text inputs:

In [None]:
import json

# Text inputs to embed
texts = [
    "A photo of a golden retriever playing in the park.",
    "Machine learning enables computers to learn from data.",
    "A beautiful sunset over the ocean with orange and purple skies.",
    "The quarterly financial report shows strong revenue growth.",
]

# Format inputs for multimodal API
inputs = [{"content": [{"type": "text", "text": t}]} for t in texts]

# Prepare the request
body = {"model": "voyage-multimodal-3.5", "inputs": inputs, "input_type": "document"}

response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)

# Extract embeddings
result = response.json()
embeddings = [item["embedding"] for item in result["data"]]

print(f"Number of texts embedded: {len(embeddings)}")
print(f"Embedding dimension: {len(embeddings[0])}")
print(f"\nFirst embedding (first 5 values): {embeddings[0][:5]}")
print(f"\nUsage: {result.get('usage', {})}")

### Image embeddings

Generate embeddings for images. You can provide images via URL or base64-encoded data.

#### Using image URLs

In [None]:
import json

# Example image from Voyage AI's documentation
image_url = "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"

# Format input with image URL
inputs = [{"content": [{"type": "image_url", "image_url": image_url}]}]

body = {"model": "voyage-multimodal-3.5", "inputs": inputs, "input_type": "document"}

response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)

result = response.json()
image_embedding = result["data"][0]["embedding"]

print(f"Embedding dimension: {len(image_embedding)}")
print(f"Embedding (first 5 values): {image_embedding[:5]}")
print(f"\nUsage: {result.get('usage', {})}")

#### Using base64-encoded images

For local images, use Google Colab's file upload interface:

In [None]:
import base64
import json
import sys


def encode_image_base64(image_bytes: bytes, filename: str) -> str:
    """Encode image bytes as a base64 data URI."""
    # Determine MIME type from extension
    extension = filename.lower().split(".")[-1]
    mime_types = {
        "jpg": "image/jpeg",
        "jpeg": "image/jpeg",
        "png": "image/png",
        "gif": "image/gif",
        "webp": "image/webp",
    }
    mime_type = mime_types.get(extension, "image/jpeg")

    b64_str = base64.b64encode(image_bytes).decode("ascii")
    return f"data:{mime_type};base64,{b64_str}"


# Upload image file (Colab only)
if "google.colab" in sys.modules:
    from google.colab import files

    print("Please upload an image file (JPG, PNG, etc.):")
    uploaded = files.upload()

    if uploaded:
        # Get the first uploaded file
        filename = list(uploaded.keys())[0]
        image_bytes = uploaded[filename]

        # Encode and generate embedding
        image_base64 = encode_image_base64(image_bytes, filename)

        body = {
            "model": "voyage-multimodal-3.5",
            "inputs": [
                {"content": [{"type": "image_base64", "image_base64": image_base64}]}
            ],
            "input_type": "document",
        }

        response = endpoint.invoke(
            request_path="/multimodalembeddings",
            body=json.dumps(body).encode("utf-8"),
            headers={"Content-Type": "application/json"},
        )

        result = response.json()
        embedding = result["data"][0]["embedding"]

        print(f"\nEmbedding dimension: {len(embedding)}")
        print(f"Embedding (first 5 values): {embedding[:5]}")
        print(f"\nUsage: {result.get('usage', {})}")
else:
    print("File upload is only available in Google Colab.")
    print(
        "For other environments, use the encode_image_base64() helper function with file bytes."
    )

### Video embeddings

Generate embeddings for video content. Videos must be:
- **Format**: MP4 container
- **Size**: Maximum 20 MB
- **Frames**: At least 2 frames

#### Using video URLs

In [None]:
import json

# Example video URL (Cooking video, ~500kb)
video_url = "https://file.garden/aTiKu4GB_i5vfop6/example_video_01.mp4"

# Format input with video URL
inputs = [{"content": [{"type": "video_url", "video_url": video_url}]}]

body = {"model": "voyage-multimodal-3.5", "inputs": inputs, "input_type": "document"}

response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)

result = response.json()
video_embedding = result["data"][0]["embedding"]
usage = result.get("usage", {})

print(f"Embedding dimension: {len(video_embedding)}")
print(f"Embedding (first 5 values): {video_embedding[:5]}")
print("\nUsage:")
print(f"  Total tokens: {usage.get('total_tokens')}")
print(f"  Video pixels: {usage.get('video_pixels')}")

#### Using base64-encoded videos

For local videos, use Google Colab's file upload interface:

In [None]:
import json
import sys


def encode_video_base64(video_bytes: bytes) -> str:
    """Encode video bytes as a base64 data URI."""
    b64_str = base64.b64encode(video_bytes).decode("ascii")
    return f"data:video/mp4;base64,{b64_str}"


# Upload video file (Colab only)
if "google.colab" in sys.modules:
    from google.colab import files

    print("Please upload an MP4 video file (max 20 MB):")
    uploaded = files.upload()

    if uploaded:
        # Get the first uploaded file
        filename = list(uploaded.keys())[0]
        video_bytes = uploaded[filename]

        file_size_mb = len(video_bytes) / (1024 * 1024)
        print(f"\nUploaded: {filename} ({file_size_mb:.2f} MB)")

        if file_size_mb > 20:
            print("Warning: File exceeds 20 MB limit and may be rejected by the API.")

        # Encode and generate embedding
        video_base64 = encode_video_base64(video_bytes)

        body = {
            "model": "voyage-multimodal-3.5",
            "inputs": [
                {"content": [{"type": "video_base64", "video_base64": video_base64}]}
            ],
            "input_type": "document",
        }

        response = endpoint.invoke(
            request_path="/multimodalembeddings",
            body=json.dumps(body).encode("utf-8"),
            headers={"Content-Type": "application/json"},
        )

        result = response.json()
        embedding = result["data"][0]["embedding"]
        usage = result.get("usage", {})

        print(f"\nEmbedding dimension: {len(embedding)}")
        print(f"Embedding (first 5 values): {embedding[:5]}")
        print(f"Total tokens: {usage.get('total_tokens')}")
        print(f"Video pixels: {usage.get('video_pixels')}")
else:
    print("File upload is only available in Google Colab.")
    print(
        "For other environments, use the encode_video_base64() helper function with video bytes."
    )

### Multimodal embeddings (text + images + video)

A key feature of Voyage Multimodal 3.5 is the ability to create embeddings from interleaved text, images, and video. This is useful for rich documents that combine multiple modalities.

In [None]:
import json

# Create a multimodal input combining text, image, and video
multimodal_input = {
    "content": [
        {"type": "text", "text": "This is a banana."},
        {
            "type": "image_url",
            "image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg",
        },
        {
            "type": "video_url",
            "video_url": "https://file.garden/aTiKu4GB_i5vfop6/example_video_01.mp4",
        },
    ]
}

body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [multimodal_input],
    "input_type": "document",
}

response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)

result = response.json()
multimodal_embedding = result["data"][0]["embedding"]
usage = result.get("usage", {})

print(f"Multimodal embedding dimension: {len(multimodal_embedding)}")
print(f"Embedding (first 5 values): {multimodal_embedding[:5]}")
print("\nUsage:")
print(f"  Text tokens: {usage.get('text_tokens')}")
print(f"  Image pixels: {usage.get('image_pixels')}")
print(f"  Video pixels: {usage.get('video_pixels')}")
print(f"  Total tokens: {usage.get('total_tokens')}")

### Cross-modal semantic similarity

One of the most powerful features of multimodal embeddings is the ability to search across modalities. You can use a text query to find relevant images or videos.

In [None]:
import json

import numpy as np


def cosine_similarity(vec1, vec2):
    """Calculate cosine similarity between two vectors."""
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))


# Text queries
queries = ["A yellow fruit", "A green vegetable"]

# Documents to search (image and video)
documents = [
    {
        "type": "image_url",
        "image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg",
        "description": "Banana image",
    },
    {
        "type": "video_url",
        "video_url": "https://file.garden/aTiKu4GB_i5vfop6/example_video_01.mp4",
        "description": "Cooking video",
    },
]

# Get document embeddings once (use input_type="document" for documents to be searched)
doc_inputs = []
for doc in documents:
    media_type = doc["type"]
    media_url = doc[media_type]
    content_item = {"type": media_type, media_type: media_url}
    doc_inputs.append({"content": [content_item]})

doc_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": doc_inputs,
    "input_type": "document",
}
doc_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(doc_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
doc_embeddings = [item["embedding"] for item in doc_response.json()["data"]]

# Test each query against the documents
for query_text in queries:
    # Get query embedding (use input_type="query" for search queries)
    query_body = {
        "model": "voyage-multimodal-3.5",
        "inputs": [{"content": [{"type": "text", "text": query_text}]}],
        "input_type": "query",
    }
    query_response = endpoint.invoke(
        request_path="/multimodalembeddings",
        body=json.dumps(query_body).encode("utf-8"),
        headers={"Content-Type": "application/json"},
    )
    query_embedding = query_response.json()["data"][0]["embedding"]

    # Calculate cross-modal similarities
    print(f'Query: "{query_text}"')
    print("Cross-modal similarity scores:")
    for doc, embedding in zip(documents, doc_embeddings):
        similarity = cosine_similarity(query_embedding, embedding)
        print(f"  {similarity:.4f} - {doc['description']}")
    print()

## Advanced parameters

Voyage Multimodal 3.5 supports several parameters to customize embedding generation.

### Understanding input_type: Query vs Document

The `input_type` parameter optimizes embeddings for retrieval tasks:

* **`query`**: Use this when the input represents a search query. The model prepends "Represent the query for retrieving supporting documents: " to optimize for retrieval.
* **`document`**: Use this when the input represents content to be indexed. The model prepends "Represent the document for retrieval: " to optimize for indexing.
* **`null`** (default): No special prompt is added. Use for general-purpose embeddings.

**Best Practice**: For retrieval applications, use `input_type="query"` for search queries and `input_type="document"` for the content you're indexing. Embeddings generated with and without the `input_type` argument are compatible.

In [None]:
import json

# Example: Different input types for retrieval
search_query = "What does a banana look like?"

# Query embedding (for search)
query_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [{"content": [{"type": "text", "text": search_query}]}],
    "input_type": "query",  # Optimized for search queries
}
query_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(query_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
query_result = query_response.json()

# Document embedding (for indexing)
doc_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [{"content": [{"type": "text", "text": search_query}]}],
    "input_type": "document",  # Optimized for documents
}
doc_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(doc_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
doc_result = doc_response.json()

# General-purpose embedding (no input_type)
general_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [{"content": [{"type": "text", "text": search_query}]}],
    # input_type defaults to null
}
general_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(general_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
general_result = general_response.json()

print(f'Text: "{search_query}"\n')
print(f"Query embedding (first 5): {query_result['data'][0]['embedding'][:5]}")
print(f"Document embedding (first 5): {doc_result['data'][0]['embedding'][:5]}")
print(f"General embedding (first 5): {general_result['data'][0]['embedding'][:5]}")

### Truncation

The `truncation` parameter controls how the model handles inputs that exceed the context window (32,000 tokens):

* **`true`** (default): Automatically truncate inputs that exceed the context limit. If truncation happens in the middle of an image, the entire image will be discarded.
* **`false`**: Return an error if any input exceeds the context limit.

When truncation occurs, you may see a warning in the response headers.

In [None]:
import json

# Example: Create input that exceeds context limit to trigger truncation
# We'll repeat a video URL multiple times to exceed 32k tokens
video_url = "https://file.garden/aTiKu4GB_i5vfop6/example_video_01.mp4"

# Create input with 4 videos (should exceed 32k token limit)
truncation_input = {
    "content": [
        {"type": "video_url", "video_url": video_url},
        {"type": "video_url", "video_url": video_url},
        {"type": "video_url", "video_url": video_url},
        {"type": "video_url", "video_url": video_url},
    ]
}

body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [truncation_input],
    "input_type": "document",
    "truncation": True,  # Enable automatic truncation (this is the default)
}

response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)

result = response.json()
usage = result.get("usage", {})

print("Embedding generated with truncation enabled")
print(f"Dimension: {len(result['data'][0]['embedding'])}")
print("\nUsage:")
print(f"  Total tokens: {usage.get('total_tokens')}")
print(f"  Video pixels: {usage.get('video_pixels')}")

# Check response headers for truncation warning
if hasattr(response, "headers"):
    warning = response.headers.get("x-api-warning", response.headers.get("warning"))
    if warning:
        print(f"\nTruncation warning: {warning}")
    else:
        print("\nNo truncation warning detected (may have fit within limit)")

### Output encoding

The `output_encoding` parameter controls the format of the embedding output:

* **`null`** (default): Embeddings are returned as a list of floating-point numbers.
* **`base64`**: Embeddings are returned as a Base64-encoded string representing a NumPy array of single-precision floats. This can be more efficient for large batch operations.

In [None]:
import json

text = "A beautiful landscape photo."

# Default output (list of floats)
default_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [{"content": [{"type": "text", "text": text}]}],
    "input_type": "document",
}
default_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(default_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
default_embedding = default_response.json()["data"][0]["embedding"]

# Base64-encoded output
base64_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [{"content": [{"type": "text", "text": text}]}],
    "input_type": "document",
    "output_encoding": "base64",
}
base64_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(base64_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
base64_embedding = base64_response.json()["data"][0]["embedding"]

# Decode the base64 embedding
decoded_embedding = np.frombuffer(base64.b64decode(base64_embedding), dtype=np.float32)

print("Default output (list of floats):")
print(f"  Type: {type(default_embedding)}")
print(f"  Length: {len(default_embedding)}")
print(f"  First 5 values: {default_embedding[:5]}")

print("\nBase64 output:")
print(f"  Type: {type(base64_embedding)}")
print(f"  Length: {len(base64_embedding)} characters")
print(f"  Decoded length: {len(decoded_embedding)}")
print(f"  Decoded first 5 values: {decoded_embedding[:5].tolist()}")

# Verify they match
print(f"\nEmbeddings match: {np.allclose(default_embedding, decoded_embedding)}")

### Using different output dimensions

Voyage Multimodal 3.5 supports multiple output dimensions: 256, 512, 1024 (default), and 2048. Smaller dimensions reduce storage and computation costs, while larger dimensions may provide better accuracy.

In [None]:
import json

# Use an image URL for testing different dimensions
image_url = "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"

# Test different output dimensions
dimensions = [256, 512, 1024, 2048]

print("Comparing different output dimensions:\n")
for dim in dimensions:
    body = {
        "model": "voyage-multimodal-3.5",
        "inputs": [
            {
                "content": [
                    {
                        "type": "text",
                        "text": "A photo of a banana on a white background.",
                    },
                    {"type": "image_url", "image_url": image_url},
                ]
            }
        ],
        "output_dimension": dim,
        "input_type": "document",
    }
    response = endpoint.invoke(
        request_path="/multimodalembeddings",
        body=json.dumps(body).encode("utf-8"),
        headers={"Content-Type": "application/json"},
    )
    result = response.json()
    embedding = result["data"][0]["embedding"]

    print(f"Dimension {dim}:")
    print(f"  Length: {len(embedding)}")
    print(f"  First 5 values: {embedding[:5]}")
    print(f"  Storage size: ~{len(embedding) * 4} bytes (float32)\n")

### Using different output data types

Voyage Multimodal 3.5 supports multiple output data types to optimize for storage and performance:

* **`float`** (default): 32-bit floating-point numbers, highest precision
* **`int8`**: 8-bit signed integers (-128 to 127), 4x smaller than float
* **`uint8`**: 8-bit unsigned integers (0 to 255), 4x smaller than float
* **`binary`**: Bit-packed signed integers (int8), 32x smaller than float
* **`ubinary`**: Bit-packed unsigned integers (uint8), 32x smaller than float

Quantized formats (int8, uint8, binary, ubinary) trade some precision for significant storage savings.

In [None]:
import json

# Use an image URL for testing different data types
image_url = "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"

# Test different output data types
output_dtypes = ["float", "int8", "uint8", "binary", "ubinary"]

print("Comparing different output data types:\n")
for dtype in output_dtypes:
    body = {
        "model": "voyage-multimodal-3.5",
        "inputs": [
            {
                "content": [
                    {
                        "type": "text",
                        "text": "A photo of a banana on a white background.",
                    },
                    {"type": "image_url", "image_url": image_url},
                ]
            }
        ],
        "output_dimension": 1024,
        "output_dtype": dtype,
        "input_type": "document",
    }
    response = endpoint.invoke(
        request_path="/multimodalembeddings",
        body=json.dumps(body).encode("utf-8"),
        headers={"Content-Type": "application/json"},
    )
    result = response.json()
    embedding = result["data"][0]["embedding"]

    # Calculate actual storage size
    if dtype == "float":
        storage_bytes = len(embedding) * 4  # 4 bytes per float32
    elif dtype in ["int8", "uint8"]:
        storage_bytes = len(embedding) * 1  # 1 byte per int8/uint8
    elif dtype in ["binary", "ubinary"]:
        storage_bytes = len(embedding) * 1  # bit-packed, 1/8 of dimension

    print(f"Output dtype: {dtype}")
    print(f"  Length: {len(embedding)}")
    print(f"  Value type: {type(embedding[0]).__name__}")
    print(f"  First 5 values: {embedding[:5]}")
    print(f"  Storage size: ~{storage_bytes} bytes")

    # Calculate compression ratio vs float
    if dtype != "float":
        compression_ratio = (1024 * 4) / storage_bytes
        print(f"  Compression: {compression_ratio:.1f}x smaller than float")
    print()

### Combining output_dimension and output_dtype

You can combine different dimensions and data types to optimize for your use case.

Please refer to our guide for details on [offset binary](https://docs.voyageai.com/docs/flexible-dimensions-and-quantization#offset-binary) and [binary embeddings](https://docs.voyageai.com/docs/flexible-dimensions-and-quantization#quantization).

In [None]:
import json

# Use an image URL for the comparison
image_url = "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"

# Example: Ultra-compact embeddings (256 dimensions + ubinary)
compact_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [
        {
            "content": [
                {"type": "text", "text": "A photo of a banana on a white background."},
                {"type": "image_url", "image_url": image_url},
            ]
        }
    ],
    "output_dimension": 256,
    "output_dtype": "ubinary",  # Most compact format
    "input_type": "document",
}
compact_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(compact_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
compact_result = compact_response.json()
compact_embedding = compact_result["data"][0]["embedding"]

# Example: High-precision embeddings (2048 dimensions + float)
precise_body = {
    "model": "voyage-multimodal-3.5",
    "inputs": [
        {
            "content": [
                {"type": "text", "text": "A photo of a banana on a white background."},
                {"type": "image_url", "image_url": image_url},
            ]
        }
    ],
    "output_dimension": 2048,
    "output_dtype": "float",  # Highest precision
    "input_type": "document",
}
precise_response = endpoint.invoke(
    request_path="/multimodalembeddings",
    body=json.dumps(precise_body).encode("utf-8"),
    headers={"Content-Type": "application/json"},
)
precise_result = precise_response.json()
precise_embedding = precise_result["data"][0]["embedding"]

# Compare storage requirements
compact_storage = len(compact_embedding) * 1  # binary is bit-packed
precise_storage = len(precise_embedding) * 4  # float32

print("Storage comparison:\n")
print("Ultra-compact (256-dim ubinary):")
print("  Dimension: 256")
print(f"  Storage: ~{compact_storage} bytes")
print(f"  First 5 values: {compact_embedding[:5]}\n")

print("High-precision (2048-dim float):")
print(f"  Dimension: {len(precise_embedding)}")
print(f"  Storage: ~{precise_storage} bytes")
print(f"  First 5 values: {precise_embedding[:5]}\n")

print(f"Storage ratio: {precise_storage / compact_storage:.1f}x")
print("\nFor 1 million vectors:")
print(f"  Ultra-compact: ~{compact_storage * 1_000_000 / (1024**2):.1f} MB")
print(f"  High-precision: ~{precise_storage * 1_000_000 / (1024**2):.1f} MB")

## Cleaning up

To avoid incurring charges to your Google Cloud account for the resources used in this tutorial, delete the endpoint and undeploy the model.

In [None]:
# Delete the endpoint (this will also undeploy all models)
print(f"Deleting endpoint: {endpoint.display_name}")
endpoint.delete(force=True)
print("Endpoint deleted successfully!")