In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - PaliGemma (Finetuning)

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_jax_paligemma_finetuning.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_jax_paligemma_finetuning.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to do finetuning PaliGemma with a Vertex AI Custom Training Job, deploying the finetuned model to a Vertex AI Endpoint, and making online predictions.


### Objective
- Prepare data for finetuning.
- Launch a Vertex AI Custom Training Job to finetune PaliGemma, storing the resulting model to a GCS bucket.
- Deploy the finetuned PaliGemma model to a Vertex AI Endpoint.
- Make predictions to the endpoint.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Setup Google Cloud project

# @markdown ### Prerequisites
# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. [Optional] [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs. Set the BUCKET_URI for the experiment environment. The specified Cloud Storage bucket (`BUCKET_URI`) should be located in the same region as where the notebook was launched. Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1"). If not set, a unique GCS bucket will be created instead.

# Import the necessary packages
import base64
import json
import os
from datetime import datetime
from io import BytesIO
from typing import Tuple

import matplotlib.pyplot as plt
import requests
import tensorflow as tf
from google.cloud import aiplatform
from PIL import Image

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Cloud Storage bucket for storing the experiment artifacts.
# A unique GCS bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, please change the value yourself below.
now = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_URI = "gs://"  # @param {type:"string"}
assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
# Create a unique GCS bucket for this notebook, if not specified by the user
if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}"
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            f"Bucket region {bucket_region} is different from notebook region {REGION}"
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "paligemma")

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Set up default SERVICE_ACCOUNT
SERVICE_ACCOUNT = None
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)

# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

# The pre-built serving docker images.
TRAIN_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/jax-paligemma-train-gpu:latest"
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/jax-paligemma-serve-gpu:latest"

pretrained_filename_lookup = {
    "paligemma-224-float32": "stage1_224_128.params.npz",
    "paligemma-224-float16": "stage1_224_128.params.f16.npz",
    "paligemma-224-bfloat16": "stage1_224_128.params.bf16.npz",
    "paligemma-448-float32": "stage2_448_512.params.npz",
    "paligemma-448-float16": "stage2_448_512.params.f16.npz",
    "paligemma-448-bfloat16": "stage2_448_512.params.bf16.npz",
    "paligemma-896-float32": "stage2_896_512.params.npz",
    "paligemma-896-float16": "stage2_896_512.params.f16.npz",
    "paligemma-896-bfloat16": "stage2_896_512.params.bf16.npz",
}


def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering training or deployment
    jobs in Vertex AI.
    """
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def deploy_model(
    model_name: str,
    checkpoint_path: str,
    machine_type: str = "g2-standard-32",
    accelerator_type: str = "NVIDIA_L4",
    accelerator_count: int = 1,
    resolution: int = 224,
) -> Tuple[aiplatform.Model, aiplatform.Endpoint]:
    """Create a Vertex AI Endpoint and deploy the specified model to the endpoint."""
    model_name_with_time = get_job_name_with_datetime(model_name)
    endpoint = aiplatform.Endpoint.create(
        display_name=f"{model_name_with_time}-endpoint"
    )
    model = aiplatform.Model.upload(
        display_name=model_name_with_time,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[8080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/health",
        serving_container_environment_variables={
            "CKPT_PATH": checkpoint_path,
            "RESOLUTION": resolution,
            "MODEL_ID": model_name,
        },
    )
    print(
        f"Deploying {model_name_with_time} on {machine_type} with {accelerator_count} {accelerator_type} GPU(s)."
    )
    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        service_account=SERVICE_ACCOUNT,
        enable_access_logging=True,
        min_replica_count=1,
        sync=True,
    )
    return model, endpoint


def download_image(url: str) -> Image.Image:
    """Downloads an image from the specified URL."""
    response = requests.get(url)
    return Image.open(BytesIO(response.content))


def resize_image(image: Image.Image, new_width: int = 1000) -> Image.Image:
    width, height = image.size
    print(f"original input image size: {width}, {height}")
    new_height = int(height * new_width / width)
    new_img = image.resize((new_width, new_height))
    print(f"resized input image size: {new_width}, {new_height}")
    return new_img


def image_to_base64(image: Image.Image, format="JPEG") -> str:
    """Converts an image to a base64 string."""
    buffer = BytesIO()
    image.save(buffer, format=format)
    image_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
    return image_str


def caption_predict(
    endpoint: aiplatform.Endpoint,
    image: Image.Image = None,
    language_code: str = "en",
    new_width: int = 1000,
) -> str:
    """Predicts a caption for a given image using an Endpoint."""
    # Resize and convert image to base64 string.
    resized_image = resize_image(image, new_width)
    resized_image_base64 = image_to_base64(resized_image)

    # Format caption prompt
    caption_prompt = f"caption en {language_code}\n"

    instances = [
        {
            "prompt": caption_prompt,
            "image": resized_image_base64,
        },
    ]
    response = endpoint.predict(instances=instances)
    return response.predictions[0].get("response")

In [None]:
# @title Access PaliGemma models on Vertex AI for GPU based serving


# @markdown Accept the model agreement to access the models:
# @markdown 1. Open the [PaliGemma model card](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/363) from [Vertex AI Model Garden](https://cloud.google.com/model-garden).
# @markdown 1. Review and accept the agreement in the pop-up window on the model card page. If you have previously accepted the model agreement, there will not be a pop-up window on the model card page and this step is not needed.
# @markdown 1. After accepting the agreement of PaliGemma, a `gs://` URI containing PaliGemma pretrained models will be shared.
# @markdown 1. Paste the link in the `VERTEX_AI_MODEL_GARDEN_PALIGEMMA` field below.
# @markdown 1. The PaliGemma models will be copied into `BUCKET_URI`.
VERTEX_AI_MODEL_GARDEN_PALIGEMMA = "gs://"  # @param {type:"string", isTemplate:true}
assert (
    VERTEX_AI_MODEL_GARDEN_PALIGEMMA
), "Please click the agreement of PaliGemma in Vertex AI Model Garden, and get the GCS path of PaliGemma model artifacts."
print(
    "Copying PaliGemma model artifacts from",
    VERTEX_AI_MODEL_GARDEN_PALIGEMMA,
    "to ",
    MODEL_BUCKET,
)

! gsutil -m cp -R $VERTEX_AI_MODEL_GARDEN_PALIGEMMA/* $MODEL_BUCKET

model_path_prefix = MODEL_BUCKET

## Finetune with Vertex AI Custom Training Jobs

In [None]:
# @title Data preparation

# @markdown The dataset file format is to be image-string pairs stored in a jsonl file.
# @markdown The value for `"image"` can be a GCS path or a URL.

# @markdown ```
# @markdown {"image": "gs://bucket-name/image.jpg", "prompt", "prefix": "what animal is this?", "suffix": "suffix": "cat"}
# @markdown {"image": "https://website.com/image.jpg", "prompt", "prefix": "what drink is this?", "suffix": "suffix": "soda"}
# @markdown ```

dataset_gcs_uri = "gs://"  # @param {type: "string"}
num_train_example = 90  # @param {type:"integer"}

# @markdown [Optional] You can optionally specify the image fields in the JSONL file to use the
# @markdown filename and fill in the `dataset_image_dir` with the location where the images are stored.
dataset_image_dir = ""  # @param {type:"string"}

In [None]:
# @title Finetune
# @markdown Use the Vertex AI SDK to create and run the custom training jobs. It takes around X minutes to finetune PaliGemma for X steps on 1 NVIDIA_L4.

model_variant = "mix"  # @param ["mix", "pt"]
model_resolution = 224  # @param [224, 448, 896]
model_precision_type = "float32"  # @param ["float32", "float16", "bfloat16"]

if model_variant == "mix":
    model_name_prefix = "paligemma-mix"
else:
    model_name_prefix = "paligemma"

base_model_name = f"{model_name_prefix}-{model_resolution}-{model_precision_type}"
base_model_filename = pretrained_filename_lookup[base_model_name]
base_model_uri = os.path.join(model_path_prefix, base_model_filename)

# The accelerator to use.
ACCELERATOR_TYPE = "NVIDIA_L4"  # @param ["NVIDIA_TESLA_V100", "NVIDIA_L4"]

# Batch size for finetuning.
batch_size = 64  # @param {type:"integer"}
# Number of epochs to train.
epochs = 1  # @param {type:"integer"}
# Learning rate.
learning_rate = 2e-4  # @param{type:"number"}

# Worker pool spec.

if ACCELERATOR_TYPE == "NVIDIA_TESLA_V100":
    machine_type = "n1-standard-8"
    accelerator_count = 2
elif ACCELERATOR_TYPE == "NVIDIA_L4":
    machine_type = "g2-standard-24"
    accelerator_count = 2
else:
    raise ValueError(
        f"Cannot automatically determine machine type from {ACCELERATOR_TYPE}."
    )

replica_count = 1

# Setup training job.
job_name = get_job_name_with_datetime("paligemma-finetune")

# Pass training arguments and launch job.
train_job = aiplatform.CustomContainerTrainingJob(
    display_name=job_name,
    container_uri=TRAIN_DOCKER_URI,
)

# Designate a GCS folder to store the LORA adapter.
finetune_output_dir_name = get_job_name_with_datetime("paligemma-finetune")
finetune_output_dir = os.path.join(STAGING_BUCKET, finetune_output_dir_name)

train_job.run(
    args=[
        "--config=big_vision/configs/proj/paligemma/transfers/vertexai_l4.py",
        f"--workdir={finetune_output_dir}",
        f"--config.model_init={base_model_uri}",
        f"--config.input.data.fname={dataset_gcs_uri}",
        f"--config.input.data.stop={num_train_example}",
        f"--config.input.data.fopen_keys.image={dataset_image_dir}",
        f"--config.input.batch_size={batch_size}",
        f"--config.total_epochs={epochs}",
        f"--config.lr={learning_rate}",
    ],
    replica_count=replica_count,
    machine_type=machine_type,
    accelerator_type=ACCELERATOR_TYPE,
    accelerator_count=accelerator_count,
    boot_disk_size_gb=500,
    service_account=SERVICE_ACCOUNT,
)

print("Checkpoint and log files was saved in: ", finetune_output_dir)

# @markdown Click "Show Code" to see more details.

In [None]:
# @title View training loss

# @markdown Metrics will be stored in a file named `big_vision_metrics.txt` in the GCS bucket, including training loss, examples seen, and core hours throughout training.

# @markdown Run this cell to get and plot the training loss.

# Get relevant metrics from metrics file.
metrics_path = os.path.join(finetune_output_dir, "big_vision_metrics.txt")
steps = []
training_losses = []
with tf.io.gfile.GFile(metrics_path, "r") as f:
    for line in f:
        metric = json.loads(line)
        steps.append(metric["step"])
        training_losses.append(metric["training_loss"])

# Plot training plot
plt.plot(steps, training_losses)
plt.title("Steps vs. Training Loss")
plt.xlabel("Steps")
plt.ylabel("Training Loss")
plt.show()

In [None]:
# @title Deploy

# @markdown This section uploads the finetuned PaliGemma model to Model Registry and deploys it to a Vertex AI Endpoint. It takes approximately 15 minutes to finish.

# @markdown Note: You cannot use accelerator type `NVIDIA_TESLA_V100` to serve prebuilt or finetuned PaliGemma models with resolution `896` and precision_type `float32`.

last_checkpoint_path = os.path.join(finetune_output_dir, "checkpoint.bv-LAST")
with tf.io.gfile.GFile(last_checkpoint_path, "r") as f:
    final_checkpoint_name = "checkpoint.bv-" + f.read()
    checkpoint_path = os.path.join(finetune_output_dir, final_checkpoint_name)

model_name = f"paligemma-{model_resolution}-{model_precision_type}-custom"
print(f"Deploying custom PaliGemma model: {model_name}")

# @markdown Select the accelerator type to use to deploy the model:
accelerator_type = "NVIDIA_L4"  # @param ["NVIDIA_L4", "NVIDIA_TESLA_V100"]
if accelerator_type == "NVIDIA_L4":
    machine_type = "g2-standard-16"
    accelerator_count = 1
elif accelerator_type == "NVIDIA_TESLA_V100":
    if model_resolution == 896 and model_precision_type == "float32":
        raise ValueError(
            "NVIDIA_TESLA_V100 is not sufficient. Multi-gpu is not supported for PaLIGemma."
        )
    else:
        machine_type = "n1-highmem-8"
        accelerator_count = 1
else:
    raise ValueError(
        f"Recommended machine settings not found for: {accelerator_type}. To use another another accelerator, please edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model function by clicking `Show Code` and then modifying the code."
    )
# @markdown Find other Vertex AI prediction supported accelerators and regions at https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.

# @markdown You may need to manually set the `machine_type`, `accelerator_type`, and `accelerator_count` after clicking "Show code".

model, endpoint = deploy_model(
    model_name=model_name,
    checkpoint_path=checkpoint_path,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    resolution=model_resolution,
)

In [None]:
# @title Image Captioning

# @markdown This section uses the deployed PaliGemma model to caption and describe an image in a chosen language.

# @markdown ![](https://images.pexels.com/photos/20427316/pexels-photo-20427316/free-photo-of-a-moped-parked-in-front-of-a-blue-door.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2)

image_url = "https://images.pexels.com/photos/20427316/pexels-photo-20427316/free-photo-of-a-moped-parked-in-front-of-a-blue-door.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=2"  # @param {type:"string"}

image = download_image(image_url)
display(image)

# Make a prediction.
image_base64 = image_to_base64(image)
language_code = "en"  # @param {type: "string"}
caption = caption_predict(endpoint, image, language_code)

print("Caption: ", caption)
# @markdown Please click "Show Code" to see more details.

## Clean up resources

In [None]:
# @markdown Delete the experiment models and endpoints to recycle the resources
# @markdown and avoid unnecessary continuous charges that may incur.

# Delete the training job.
train_job.delete()

# Undeploy model and delete endpoint.
endpoint.delete(force=True)

# Delete models.
model.delete()

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI