In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - ImageBind

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_imagebind.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_imagebind.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_imagebind.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
Open in Vertex AI Workbench
    </a> (A Python-3 CPU notebook is recommended)
  </td>
</table>

## Overview

This notebook demonstrates deploying prebuilt Imagebind models for online prediction in Vertex AI.

### Objective

- Upload the ImageBind model to [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction).
- Deploy the ImageBind to a [Vertex AI Endpoint resource](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
- Run online prediction for feature embedding generation and zero-shot classification.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

**NOTE**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

### Colab only
Run the following commands for Colab and skip this section if you are using Workbench.

In [None]:
import sys

if "google.colab" in sys.modules:
    ! pip3 install --upgrade google-cloud-aiplatform
    from google.colab import auth as google_auth

    google_auth.authenticate_user()

    # Restart the notebook kernel after installs.
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Setup Google Cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI API and Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component.googleapis.com).

1. [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs.

1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console) with `Vertex AI User` and `Storage Object Admin` roles for deploying fine tuned model to Vertex AI endpoint.

Set the following variables for the experiment environment. The specified Cloud Storage bucket (BUCKET_URI) should be located in the specified region (REGION). Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1").

In [None]:
# Cloud project id.
PROJECT_ID = ""  # @param {type:"string"}

# The region you want to launch jobs in.
REGION = ""  # @param {type:"string"}

# The Cloud Storage bucket for storing experiments output.
# Start with gs:// prefix, e.g. gs://foo_bucket.
BUCKET_URI = "gs://"  # @param {type:"string"}

! gcloud config set project $PROJECT_ID

import os

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
DATA_BUCKET = os.path.join(BUCKET_URI, "data")

# The service account looks like:
# '@.iam.gserviceaccount.com'
# Please go to https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# and create service account with `Vertex AI User` and `Storage Object Admin` roles.
# The service account for deploying fine tuned model.
SERVICE_ACCOUNT = ""  # @param {type:"string"}

### Initialize Vertex AI API

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

### Define constants

In [None]:
# The pre-built serving docker image.
PREDICTION_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-imagebind-serve"

### Define common functions

In [None]:
import os
from datetime import datetime

import numpy as np


def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering deployment jobs."""
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def deploy_model(
    model_name: str,
    service_account: str,
    task: str,
    machine_type: str = "n1-standard-8",
    accelerator_type: str = "NVIDIA_TESLA_V100",
    accelerator_count: str = 1,
) -> tuple[aiplatform.Model, aiplatform.Endpoint]:
    """Deploys prebuilt model in Vertex AI."""
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-{task}-endpoint")
    serving_env = {
        "TASK": task,
    }
    model = aiplatform.Model.upload(
        display_name=f"{model_name}-{task}",
        serving_container_image_uri=PREDICTION_DOCKER_URI,
        serving_container_ports=[7080],
        serving_container_predict_route="/predictions/imagebind_serving",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
    )
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=service_account,
    )
    return model, endpoint

## Deploy prebuilt ImageBind model

This section deploys the prebuilt ImageBind model on Vertex AI endpoints for the tasks of feature embedding generation and zero-shot classification. The model deployment step will take ~15 minutes to complete.

In [None]:
# Prepares example input data.
! git clone https://github.com/facebookresearch/ImageBind.git
%cd ImageBind/.assets
! git reset --hard 95d27c7fd5a8362f3527e176c3a80ae5a4d880c0

! gsutil cp -r . $DATA_BUCKET

%cd ../..

### Deploy prebuilt ImageBind model for feature embedding generation

In this section, we deploys an ImageBind that generates feature embeddings for different data modalities.

The peak GPU memory usage for the ImageBind model is ~8G. Please adjust the machine type, accelerator type and accelerator count accordingly. We use one V100 (16G) in deployments as an example.

In [None]:
task = "feature-embedding-generation"

In [None]:
# Finds Vertex AI prediction supported accelerators and regions in
#  https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.

# Sets V100 to deploy ImageBind.
machine_type = "n1-standard-8"
accelerator_type = "NVIDIA_TESLA_V100"
accelerator_count = 1

model, endpoint = deploy_model(
    model_name=get_job_name_with_datetime(prefix="ImageBind-serve"),
    service_account=SERVICE_ACCOUNT,
    task=task,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
)
print(f"Endpoint name: {endpoint.name}")

NOTE: The prebuilt model weights will be downloaded on the fly after deployment succeeds. Thus, an additional 5 minutes of waiting time is needed **after** the above model deployment step succeeds and before you can run the next step below. Otherwise you might see a `ServiceUnavailable: 503 502:Bad Gateway` error when you send requests to the endpoint.

Once deployment succeeds, you can send requests to the endpoint with text prompts, GCS paths to image, depth, thermal, video and audio data files, and IMU data.

The input specification is:
- **Text**: A list of text prompts, indexed by the key "text"
- **Image**: A list of GCS paths to 3-channel RGB images, indexed by the key "vision"
- **Depth image**: A list of GCS paths to 1-channel depth images, indexed by the key "depth"
- **Thermal images**: A list of GCS paths to 1-channel thermal images, indexed by the key "thermal"
- **Video**: A list of GCS paths to 3-channel RGB videos, indexed by the key "video"
- **Audio**: A list of GCS paths to waveform files, indexed by the key "audio"
- **IMU**: A list of tensors of the shape [6, 2000] (the first dimension corresponds to accelerometer and gyroscope measurements along the X, Y, Z axes; the second dimension corresponds to 10-second clips with sample rate 200Hz), indexed by the key "imu"

See the example below.

In [None]:
# Loads an existing endpoint instance using the endpoint name:
# - Using `endpoint_name = endpoint.name` allows us to get the endpoint name of
#   the endpoint `endpoint` created in the cell above.
# - Alternatively, you can set `endpoint_name = "1234567890123456789"` to load
#   an existing endpoint with the ID 1234567890123456789.
# You may uncomment the code below to load an existing endpoint.

# endpoint_name = endpoint.name
# # endpoint_name = ""  # @param {type:"string"}
# aip_endpoint_name = (
#     f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_name}"
# )
# endpoint = aiplatform.Endpoint(aip_endpoint_name)


# If you encounter the issue like `ServiceUnavailable: 503 Took too long to respond when processing`,
# you can reduce the amount of input data.
instances = [
    {
        "text": ["A dog.", "A car", "A bird"],
        "vision": [
            os.path.join(DATA_BUCKET, "dog_image.jpg"),
            os.path.join(DATA_BUCKET, "car_image.jpg"),
            os.path.join(DATA_BUCKET, "bird_image.jpg"),
        ],
        "audio": [
            os.path.join(DATA_BUCKET, "dog_audio.wav"),
            os.path.join(DATA_BUCKET, "car_audio.wav"),
            os.path.join(DATA_BUCKET, "bird_audio.wav"),
        ],
    },
]
response = endpoint.predict(instances=instances)

for modality, embedding in response.predictions[0].items():
    print(f"Modality {modality}: embedding shape {np.array(embedding).shape}")

#### Clean up resources

In [None]:
# Undeploy model and delete endpoint.
endpoint.delete(force=True)

# Delete model.
model.delete()

### Deploy prebuilt ImageBind model for zero-shot classification

In this section, we deploys an ImageBind that performs zero-shot classification between pairs of data modalities.

The peak GPU memory usage for the ImageBind model is ~8G. Please adjust the machine type, accelerator type and accelerator count accordingly. We use one V100 (16G) in deployments as an example.

In [None]:
task = "zero-shot-classification"

In [None]:
# Finds Vertex AI prediction supported accelerators and regions in
#  https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.

# Sets V100 to deploy ImageBind.
machine_type = "n1-standard-8"
accelerator_type = "NVIDIA_TESLA_V100"
accelerator_count = 1

model, endpoint = deploy_model(
    model_name=get_job_name_with_datetime(prefix="ImageBind-serve"),
    service_account=SERVICE_ACCOUNT,
    task=task,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
)
print(f"Endpoint name: {endpoint.name}")

NOTE: The prebuilt model weights will be downloaded on the fly after deployment succeeds. Thus, an additional 5 minutes of waiting time is needed **after** the above model deployment step succeeds and before you can run the next step below. Otherwise you might see a `ServiceUnavailable: 503 502:Bad Gateway` error when you send requests to the endpoint.

Once deployment succeeds, you can send requests to the endpoint with text prompts, GCS paths to image, depth, thermal, video and audio data files, and IMU data.

The input specification is:
- **Text**: A list of text prompts, indexed by the key "text"
- **Image**: A list of GCS paths to 3-channel RGB images, indexed by the key "vision"
- **Depth image**: A list of GCS paths to 1-channel depth images, indexed by the key "depth"
- **Thermal images**: A list of GCS paths to 1-channel thermal images, indexed by the key "thermal"
- **Video**: A list of GCS paths to 3-channel RGB videos, indexed by the key "video"
- **Audio**: A list of GCS paths to waveform files, indexed by the key "audio"
- **IMU**: A list of tensors of the shape [6, 2000] (the first dimension corresponds to accelerometer and gyroscope measurements along the X, Y, Z axes; the second dimension corresponds to 10-second clips with sample rate 200Hz), indexed by the key "imu"

See the example below.

In [None]:
# Loads an existing endpoint instance using the endpoint name:
# - Using `endpoint_name = endpoint.name` allows us to get the endpoint name of
#   the endpoint `endpoint` created in the cell above.
# - Alternatively, you can set `endpoint_name = "1234567890123456789"` to load
#   an existing endpoint with the ID 1234567890123456789.
# You may uncomment the code below to load an existing endpoint.

# endpoint_name = endpoint.name
# # endpoint_name = ""  # @param {type:"string"}
# aip_endpoint_name = (
#     f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_name}"
# )
# endpoint = aiplatform.Endpoint(aip_endpoint_name)


# If you encounter the issue like `ServiceUnavailable: 503 Took too long to respond when processing`,
# you can reduce the amount of input data.
instances = [
    {
        "text": ["A dog.", "A car", "A bird"],
        "vision": [
            os.path.join(DATA_BUCKET, "dog_image.jpg"),
            os.path.join(DATA_BUCKET, "car_image.jpg"),
            os.path.join(DATA_BUCKET, "bird_image.jpg"),
        ],
        "audio": [
            os.path.join(DATA_BUCKET, "dog_audio.wav"),
            os.path.join(DATA_BUCKET, "car_audio.wav"),
            os.path.join(DATA_BUCKET, "bird_audio.wav"),
        ],
    },
]
response = endpoint.predict(instances=instances)

for modality_pair, probs in response.predictions[0].items():
    print(f"{modality_pair}:\n{np.array(probs)}\n")

#### Clean up resources

In [None]:
# Undeploy model and delete endpoint.
endpoint.delete(force=True)

# Delete model.
model.delete()