In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Keras YOLOv8 (Finetuning)

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_keras_yolov8.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_keras_yolov8.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to use [Keras YOLOv8](https://keras.io/api/keras_cv/models/tasks/yolo_v8_detector/) in Vertex AI Model Garden.

### Objective

- Run local inferences for pretrained or customized models

- Deploy pretrained or customized models in Google Cloud Vertex AI

- Finetune models in Google Cloud Vertex AI

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

### Dataset

The dataset used for this tutorial is the Salads category of the [OpenImages dataset](https://www.tensorflow.org/datasets/catalog/open_images_v4) from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/overview). This dataset does not require any feature engineering. The version of the dataset you will use in this tutorial is stored in a public Cloud Storage bucket. The trained model predicts the bounding box locations and corresponding type of salad items in an image from a class of five items: Salad, Seafood, Tomato, Baked Goods, or Cheese.

## Before you begin

In [None]:
# @title Setup Google Cloud project

# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. [Optional] [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs. Set the BUCKET_URI for the experiment environment. The specified Cloud Storage bucket (`BUCKET_URI`) should be located in the same region as where the notebook was launched. Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1"). If not set, a unique GCS bucket will be created instead.

# Configs for all notebooks.
! pip3 install --quiet keras-cv==0.9.0
! pip3 install --quiet keras-core==0.1.0

! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

import base64
import importlib
import io
import os
import tempfile
import uuid
from datetime import datetime
from typing import Dict, List, Union

import keras_cv
import numpy as np
import tensorflow as tf
import yaml
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
from keras_cv import visualization
from PIL import Image

common_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util"
)

models, endpoints = {}, {}


# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Cloud Storage bucket for storing the experiment artifacts.
# A unique GCS bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, change the value yourself below.
now = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_URI = "gs://"  # @param {type:"string"}
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])

if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}"
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            "Bucket region %s is different from notebook region %s"
            % (bucket_region, REGION)
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "keras_yolov8")


# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Gets the default SERVICE_ACCOUNT.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)


# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

! gcloud config set project $PROJECT_ID

TRAIN_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/keras-yolov8-train"
)
SERVING_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai-restricted/prediction/tf_opt-gpu.2-12:latest"
)

SERVING_CONTAINER_ARGS = ["--allow_precompilation", "--allow_compression"]
RESOLUTION = 512


def load_img(path):
    """Reads image from path and return PIL.Image instance."""
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    return Image.fromarray(np.uint8(img)).convert("RGB")


def decode_image(image_str_tensor: tf.string) -> tf.float32:
    """Converts and resizes image bytes to image tensor."""
    image = tf.io.decode_image(image_str_tensor, 3, expand_animations=False)
    image = tf.image.resize(image, (RESOLUTION, RESOLUTION))
    return image


def get_label_map(label_map_yaml_filepath):
    """Returns class id to label mapping given a filepath to the label map."""

    temp_dir = tempfile.TemporaryDirectory()
    label_map_yaml_filename = os.path.basename(label_map_yaml_filepath)
    local_metrics_path = os.path.join(temp_dir.name, label_map_yaml_filename)

    ! gsutil cp $label_map_yaml_filepath $local_metrics_path
    with open(local_metrics_path, "r") as input_file:
        label_map = yaml.safe_load(input_file.read())["label_map"]
    temp_dir.cleanup()
    return label_map


def get_prediction_instances(test_filepath, new_width=-1):
    """Generate instance from image path to pass to Vertex AI Endpoint for prediction."""
    if new_width <= 0:
        with tf.io.gfile.GFile(test_filepath, "rb") as input_file:
            encoded_string = base64.b64encode(input_file.read()).decode("utf-8")
    else:
        img = load_img(test_filepath)
        width, height = img.size
        print("original input image size: ", width, " , ", height)
        new_height = int(height * new_width / width)
        new_img = img.resize((new_width, new_height))
        print("resized input image size: ", new_width, " , ", new_height)
        buffered = io.BytesIO()
        new_img.save(buffered, format="JPEG")
        encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8")

    instances = [
        {
            "encoded_image": {"b64": encoded_string},
        }
    ]
    return instances


def predict_custom_trained_model(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str = "us-central1",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": f"{location}-aiplatform.googleapis.com"}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    return response.predictions, response.deployed_model_id

In [None]:
# @title Run local inferences with pretrained model

# @markdown This section shows how to run inferences locally with YOLOv8-M pretrained on PascalVOC 2012 object detection task, which consists of 20 classes.

test_filepath = ""  # @param {type:"string"}
img_bytes = tf.io.read_file(test_filepath)
image = tf.expand_dims(decode_image(img_bytes), axis=0)

# Load model pretrained on PascalVOC 2012.
model = keras_cv.models.YOLOV8Detector.from_preset(
    "yolo_v8_m_pascalvoc",
    bounding_box_format="xywh",
)

decoded = model.predict(image)

# Classes in PascalVOC 2012 dataset.
class_ids = [
    "Aeroplane",
    "Bicycle",
    "Bird",
    "Boat",
    "Bottle",
    "Bus",
    "Car",
    "Cat",
    "Chair",
    "Cow",
    "Dining Table",
    "Dog",
    "Horse",
    "Motorbike",
    "Person",
    "Potted Plant",
    "Sheep",
    "Sofa",
    "Train",
    "Tvmonitor",
    "Total",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

# Visualize the results.
visualization.plot_bounding_box_gallery(
    image,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=decoded,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=class_mapping,
)

## Finetune with Vertex AI Custom Training Jobs

In [None]:
# @title Finetune

# @markdown This section shows how to finetune the Keras YOLOv8 model and deploy to Vertex AI Endpoint resource.

# @markdown `input_csv_path` : The input dataset in CSV format. For further details, kindly check [AutoML Image Object Detection](https://cloud.google.com/vertex-ai/docs/image-data/object-detection/prepare-data).

input_csv_path = "gs://cloud-samples-data/vision/salads.csv"  # @param {type:"string"}

# Hyperparameters

# @markdown `epochs`: Number of training epochs.
epochs = 10  # @param{type:"integer"}
# @markdown `learning_rate`: The learning rate of this training job.
learning_rate = 0.0005  # @param{type:"number"}
# @markdown `fpn_depth`: The depth of the CSP blocks in the Feature Pyramid Network. This is usually 1, 2, or 3, depending on the size of your YOLOV8Detector model. We recommend using 3 for 'yolo_v8_l_backbone' and 'yolo_v8_xl_backbone'.Defaults to 2.
fpn_depth = 3  # @param{type:"integer"}
# @markdown `confidence_threshold`: Only probabilities greater than this threshold will contribute to the final result
confidence_threshold = 0.02  # @param{type:"number"}
# @markdown `iou_threshold`: Intersection over Union (IoU) is a measure that shows how well the prediction bounding box aligns with the ground truth box.
iou_threshold = 0.3  # @param{type:"number"}
# @markdown `backbone`: The pretrained backbone. [Click here](https://keras.io/api/keras_cv/models/backbones/yolo_v8/) for the full list of available backbones.
backbone = "yolo_v8_xl_backbone_coco"  # @param["yolo_v8_xs_backbone", "yolo_v8_s_backbone", "yolo_v8_m_backbone", "yolo_v8_l_backbone", "yolo_v8_xl_backbone", "yolo_v8_xs_backbone_coco", "yolo_v8_s_backbone_coco", "yolo_v8_m_backbone_coco", "yolo_v8_l_backbone_coco", "yolo_v8_xl_backbone_coco"]

MACHINE_TYPE = "n1-highmem-16"
ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
ACCELERATOR_COUNT = 2

train_job_name = common_util.get_job_name_with_datetime("train_yolov8")
model_dir = os.path.join(STAGING_BUCKET, train_job_name)
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": MACHINE_TYPE,
            "accelerator_type": ACCELERATOR_TYPE,
            "accelerator_count": ACCELERATOR_COUNT,
        },
        "replica_count": 1,
        "disk_spec": {
            "boot_disk_type": "pd-ssd",
            "boot_disk_size_gb": 500,
        },
        "container_spec": {
            "image_uri": TRAIN_CONTAINER_URI,
            "command": [],
            "env": [
                {
                    "name": "RESOLUTION",
                    "value": f"{RESOLUTION}",
                },
            ],
            "args": [
                f"--input_csv_path={input_csv_path}",
                f"--output_model_dir={model_dir}",
                f"--epochs={epochs}",
                f"--pretrained_backbone={backbone}",
                f"--fpn_depth={fpn_depth}",
                f"--learning_rate={learning_rate}",
                f"--confidence_threshold={confidence_threshold}",
                f"--iou_threshold={iou_threshold}",
            ],
        },
    }
]

common_util.check_quota(
    project_id=PROJECT_ID,
    region=REGION,
    accelerator_type=ACCELERATOR_TYPE,
    accelerator_count=ACCELERATOR_COUNT,
    is_for_training=True,
)

train_job = aiplatform.CustomJob(
    display_name=train_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

train_job.run()

print("The trained model is saved in: ", model_dir)

## Deploy and Predict

In [None]:
# @title Upload model

upload_job_name = common_util.get_job_name_with_datetime("upload_yolov8")

common_util.check_quota(
    project_id=PROJECT_ID,
    region=REGION,
    accelerator_type=ACCELERATOR_TYPE,
    accelerator_count=ACCELERATOR_COUNT,
    is_for_training=False,
)

serving_env = {
    "MODEL_ID": "keras-yolov8",
    "DEPLOY_SOURCE": "notebook",
}

model = aiplatform.Model.upload(
    display_name=upload_job_name,
    artifact_uri=model_dir,
    serving_container_image_uri=SERVING_CONTAINER_URI,
    serving_container_args=SERVING_CONTAINER_ARGS,
    serving_container_environment_variables=serving_env,
)

print("The model name is: ", upload_job_name)

In [None]:
# @title Deploy model

deploy_model_name = common_util.get_job_name_with_datetime("deploy_yolov8")

endpoint = model.deploy(
    deployed_model_display_name=deploy_model_name,
    machine_type="n1-standard-4",
    traffic_split={"0": 100},
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1,
    min_replica_count=1,
    max_replica_count=1,
)


endpoint_id = endpoint.name
print("The endpoint id is: ", endpoint_id)

In [None]:
# @title Predict

test_filepath = "gs://cloud-ml-data/img/openimage/1302/4677521502_6f2767039c_o.jpg"  # @param {type:"string"}
image_bytes = tf.io.read_file(test_filepath)
image_resized = tf.expand_dims(decode_image(image_bytes), axis=0)

instances = get_prediction_instances(test_filepath, new_width=640)

predictions, _ = predict_custom_trained_model(
    project=PROJECT_ID, location=REGION, endpoint_id=endpoint_id, instances=instances
)

predictions_dict = {
    "boxes": tf.expand_dims(predictions[0]["boxes"], axis=0),
    "classes": tf.expand_dims(predictions[0]["classes"], axis=0),
    "confidence": tf.expand_dims(predictions[0]["confidence"], axis=0),
    "num_detections": predictions[0]["num_detections"],
}

label_map = get_label_map(os.path.join(model_dir, "label_map.yaml"))

visualization.plot_bounding_box_gallery(
    image_resized,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=predictions_dict,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=label_map,
)

## Clean up resources

In [None]:
# @title Delete the models and endpoints
# @markdown  Delete the experiment models and endpoints to recycle the resources
# @markdown  and avoid unnecessary continuous charges that may incur.

# Undeploy model and delete endpoint.
for endpoint in endpoints.values():
    endpoint.delete(force=True)

# Delete models.
for model in models.values():
    model.delete()

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME

## References

- [Efficient Object Detection with YOLOV8 and KerasCV](https://keras.io/examples/vision/yolov8/)
- [Keras YOLOv8 API Documentation](https://keras.io/api/keras_cv/models/tasks/yolo_v8_detector/)
- [Keras YOLOv8 Backbones](https://keras.io/api/keras_cv/models/backbones/yolo_v8/)