In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
# @title Setup Google Cloud project
# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. **[Optional]** Set region. If not set, the region will be set automatically according to Colab Enterprise environment.

REGION = ""  # @param {type:"string"}

# @markdown 3. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus). You can request for quota following the instructions at ["Request a higher quota"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).

# @markdown | Machine Type | Accelerator Type | Recommended Regions |
# @markdown | ----------- | ----------- | ----------- |
# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |
# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |

import importlib
import os

from google.cloud import aiplatform

# Import common utils
if os.environ.get("VERTEX_PRODUCT") != "COLAB_ENTERPRISE":
    ! pip install --upgrade tensorflow
! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

common_util = importlib.import_module(
    "vertex-ai-samples.notebooks.community.model_garden.docker_source_codes.notebook_util.common_util"
)

# Setup GCP & VertexAI

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
if not REGION:
    REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION)

# Gets the default SERVICE_ACCOUNT.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)

! gcloud config set project $PROJECT_ID
import vertexai

vertexai.init(
    project=PROJECT_ID,
    location=REGION,
)

# Model configuration & utils
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/remote-sensing-serve-tf-gpu:latest"
MODEL_CONFIGS = {
    "OWLVIT": (
        "earth-ai-imagery-owlvit-eap-10-2025",
        "publishers/google/models/remote_sensing_owlvit",
        "gs://vertex-model-garden-restricted-us/remote-sensing/OVD_OWL-ViT_So400M_RGB1008_V1",
    ),
    "MAMMUT": (
        "earth-ai-imagery-mammut-eap-10-2025",
        "publishers/google/models/remote_sensing_mammut",
        "gs://vertex-model-garden-restricted-us/remote-sensing/MaMMUT_So400M_RGB224_V1",
    ),
}


def _get_platform_config(accelerator: str):
    """Returns the platform config for the given accelerator type."""
    if accelerator == "CPU":
        return "cpu", "e2-standard-8", None, None
    if accelerator == "NVIDIA_L4":
        return "gpu", "g2-standard-8", "NVIDIA_L4", 1
    if accelerator == "NVIDIA_A100_80GB":
        return "gpu", "a2-ultragpu-1g", "NVIDIA_A100_80GB", 1
    raise f"Accelerator config is not supported {accelerator}"


def deploy(
    name: str,
    model_type: str,
    model_mode: str,
    platform: str,
    machine_type: str,
    accelerator_type: str,
    accelerator_count,
    service_account: str = None,
    use_dedicated_endpoint: bool = False,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
) -> tuple[aiplatform.Endpoint, aiplatform.Model]:
    """Deploys the model to a GPU endpoint with accelerator support.

    Args:
      name: the endpoint name to use for deployment.
      model_type: The model type to deploy, either MAMMUT or OWLVIT.
      model_mode: The model mode to deploy, e.g. COMBINED, IMAGE_ONLY or
        TEXT_ONLY.
      platform: The deployment platform, CPU, NVIDIA_L4 or NVIDIA_A100_80GB.
      machine_type: The instance machine type to use, see
        https://cloud.google.com/compute/docs/machine-resource
      accelerator_type: The GPU type to deploy, defaults to NVIDIA_L4, see
        https://cloud.google.com/compute/docs/gpus
      accelerator_count: The number of GPUs (Accelerators) to use.
    """
    model_id, model_name, model_path = MODEL_CONFIGS[model_type]

    common_util.check_quota(
        project_id=PROJECT_ID,
        region=REGION,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        is_for_training=False,
    )

    model = aiplatform.Model.upload(
        display_name=f"{name}-model",
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[8080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/health",
        serving_container_environment_variables={
            "DEPLOY_SOURCE": "notebook",
            "MODEL_ID": model_id,
            "MODEL_PATH": model_path,
            "MODEL_TYPE": model_type,
            "MODEL_MODE": model_mode,
            "PLATFORM": platform,
        },
        model_garden_source_model_name=model_name,
    )
    endpoint = aiplatform.Endpoint.create(
        name, dedicated_endpoint_enabled=use_dedicated_endpoint
    )
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        service_account=service_account,
        deploy_request_timeout=1800,
        enable_access_logging=True,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        sync=True,
        system_labels={"NOTEBOOK_NAME": "model_garden_remote_sensing_deployment.ipynb"},
    )
    return endpoint, model

In [None]:
# @title Deploy model

# @markdown **Choose an endpoint name (to be deployed)**
ENDPOINT_NAME = "mammut-combined-test-l4"  # @param { 'type' : 'string' }
# @markdown **Specify the model type, variant mode and accelerator (platform) config.**
MODEL_TYPE = "MAMMUT"  # @param ["MAMMUT", "OWLVIT"]
MODEL_MODE = "COMBINED"  # @param ["IMAGE_ONLY", "TEXT_ONLY", "COMBINED"]
ACCELERATOR = "NVIDIA_L4"  # @param ["CPU", "NVIDIA_L4", "NVIDIA_A100_80GB"]
# @markdown **Note:** For OWLVIT it is recommended to use a dedicated endpoint
# @markdown as it increases the input size from 1.5 MB to 10MB.
use_dedicated_endpoint = True  # @param { 'type' : 'boolean' }
platform, machine_type, acc_type, num_gpus = _get_platform_config(ACCELERATOR)

endpoint, model = deploy(
    name=ENDPOINT_NAME,
    model_type=MODEL_TYPE,
    model_mode=MODEL_MODE,
    platform=platform,
    machine_type=machine_type,
    accelerator_type=acc_type,
    accelerator_count=num_gpus,
    use_dedicated_endpoint=use_dedicated_endpoint,
)

In [None]:
# @title Cleanup Resources
# @markdown  Delete the experiment models and endpoints to recycle the resources
# @markdown  and avoid unnecessary continuous charges that may incur.

endpoint.delete(force=True)
model.delete()

## Inference examples

* Below there are 2 sets of samples: Object Detection (OWL-ViT) and  Classification (MaMMUT), make sure that the deployed endpoint has the correct model type, otherwise you can override it below.

* The samples are designed to work with the COMBINED mode, i.e. a variant
of the model that can accept text, image or both as input.

* Make sure you **cleanup unused resources** (endpoint) in the end. You can use
the cleanup section above.

* To get the best performance it is advised to use at least an **NVIDIA_L4 GPU**

In [None]:
# @title Inference setup & utils.

# @markdown If you've just deployed a new endpoint you can use it directly,
# @markdown otherwise specify an endpoint id to override it.

import base64
import io

from PIL import Image


def _b64_png(image: Image.Image) -> str:
    arr_bytes = io.BytesIO()
    image.save(arr_bytes, format="PNG")
    return base64.b64encode(arr_bytes.getvalue()).decode("utf-8")


# Override the endpoint, if kept empty uses the recently deployed endpoint.
ENDPOINT_ID = ""  # @param { 'type': 'string' }
use_dedicated_endpoint = True  # @param { 'type' : 'boolean' }

if ENDPOINT_ID:
    endpoint = aiplatform.Endpoint(ENDPOINT_ID)

# Download sample images
!wget -O harbor.jpg https://mrsg.aegean.gr/images/uploads/it2zi0eidej4ql33llj.jpg
!wget -O palace.jpeg https://www.spaceintelreport.com/wp-content/uploads/2021/05/Pleiades-NEO-US-Capitol-30cm.jpeg
harbor_img = Image.open("harbor.jpg")
palace_img = Image.open("palace.jpeg")

In [None]:
# @title Classification (MaMMUT) Inference Examples
# Make sure that the deployed endpoint above is a MaMMUT model.

# Call the image encoder with multiple images, batch_size is 1 by default.
result = endpoint.predict(
    instances=[
        {"image": _b64_png(harbor_img)},
        {"image": _b64_png(palace_img)},
    ],
    parameters={"batch_size": 2},
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)

# Call text encoder with multiple input instances
result = endpoint.predict(
    instances=[
        {"text": "text"},
        {"text": "second text"},
        {"text": "this is a longer sentence"},
        {"text": "this is a another long sentence, longer than the previous"},
    ],
    parameters={"batch_size": 2},
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)

# Call the zero-shot classification on the harbor & palace image, returns
# similarity scores for each image/text, used
labels = ["airport", "palace", "harbor", "shipyard", "park"]
result = endpoint.predict(
    instances=[
        {"image": _b64_png(harbor_img), "texts": labels},
        {"image": _b64_png(palace_img), "texts": labels},
    ],
    parameters={"batch_size": 2},
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)

In [None]:
# @title Object Detection (OWL-ViT) Inference Examples

# Make sure that the deployed endpoint above is OWL-ViT. It is advised to deploy
# a dedicated endpoint for OWL-ViT as the input size is relatively large.

# Call the image detection model, returns a list of object detections with
# bounding boxes, scores & embeddings.
result = endpoint.predict(
    instances=[
        {"image": _b64_png(harbor_img)},
    ],
    parameters={"batch_size": 1},
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)

# Call text encoder with multiple texts, returns text embeddings for each input.
result = endpoint.predict(
    instances=[
        {"text": "text"},
        {"text": "another text"},
        {"text": "this is a longer sentence"},
        {"text": "this is a very long sentence, even longer than above."},
    ],
    parameters={"batch_size": 4},
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)

# Call the Open Vocabulary Detection mode with image/texts pairs, returns
# object detections and labels, including bounding boxes, scores & embeddings.
labels = ["ship", "harbor", "dome", "building", "bridge"]
result = endpoint.predict(
    instances=[
        {"image": _b64_png(harbor_img), "texts": labels},
        {"image": _b64_png(palace_img), "texts": labels},
    ],
    parameters={
        "batch_size": 4,
        # Return only the top 100 detections based on objectness_score.
        "top_k_objects": 100,
        # Discard the object/text embeddings, overall reduces the output size.
        "keep_embeddings": False,
    },
    use_dedicated_endpoint=use_dedicated_endpoint,
)
print(result)