In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Detectron2

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_detectron2.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_detectron2.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_detectron2.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
Open in Vertex AI Workbench
    </a>
  </td>
</table>

## Overview

This notebook demonstrates finetuning Detectron2 based [Faster R-CNN](https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn) and
[RetinaNet](https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#retinanet)
for image detection task and [Mask R-CNN](https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#coco-instance-segmentation-baselines-with-mask-r-cnn) for segmentation task and deploying them on Vertex AI for online prediction. This notebook assumes that the input training data is in [COCO format](https://opencv.org/introduction-to-the-coco-dataset/). If you do not have your own dataset, this notebook also shows how to download and prepare the Balloon dataset for training.

### Objective

- Finetune a Detectron2 based Faster R-CNN, RetinaNet, or Mask R-CNN model.
- Upload the model to [Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction).
- Deploy the model on [Endpoint](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
- Run online predictions for image object detection and segmentation.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Setup environment

**NOTE**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

### Colab only

In [None]:
!pip3 install --upgrade google-cloud-aiplatform

In [None]:
from google.colab import auth as google_auth

google_auth.authenticate_user()

### Install dependencies

In [None]:
# Install gdown for downloading example training images.
!pip install gdown
# Install gsutil for downloading/uploading data from/to Cloud Storage buckets.
!pip install gsutil
# Install libraries for COCO format conversion of datasets.
!pip install pycocotools==2.0.6
!pip install opencv-python==4.7.0.72

Restart the notebook kernel after installs.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Setup Google Cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI API and Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component).

1. [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs.

1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console) with `Vertex AI User`, `Storage Object Admin`, and `GCS Storage Bucket Owner` roles for deploying fine tuned model to Vertex AI endpoint.

Fill following variables for experiments environment:

In [None]:
# Cloud project id.
PROJECT_ID = ""  # @param {type:"string"}

# The region you want to launch jobs in.
REGION = "us-central1"  # @param {type:"string"}

# The Cloud Storage bucket for storing experiments output. For example 'gs://my_bucket'.
BUCKET_URI = ""  # @param {type:"string"}

# The service account for deploying fine tuned model.
# The service account looks like:
# '<account_name>@<project>.iam.gserviceaccount.com'
# Follow step 5 above to create this account.
SERVICE_ACCOUNT = ""  # @param {type:"string"}

### Define constants

In [None]:
# The pre-built training docker image. It contains training scripts and models.
TRAIN_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-detectron2-train"
# The pre-built serving docker image. It contains serving scripts and models.
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-detectron2-serve"

### Define common functions

In [None]:
import json
import os
from typing import Dict, List, Union

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def gcs_fuse_path(path: str) -> str:
    """Try to convert path to gcsfuse path if it starts with gs:// else do not modify it."""
    path = path.strip()
    if path.startswith("gs://"):
        return "/gcs/" + path[5:]
    return path


# Training
def upload_model(
    project: str,
    location: str,
    display_name: str,
    serving_container_image_uri: str,
    model_pth_file: str,
    model_cfg_yaml_file: str,
    test_threshold: float = 0.5,
):

    aiplatform.init(project=project, location=location)

    serving_env = {
        "MODEL_PTH_FILE": model_pth_file,
        "CONFIG_YAML_FILE": model_cfg_yaml_file,
        "TEST_THRESHOLD": test_threshold,
    }

    model = aiplatform.Model.upload(
        display_name=display_name,
        serving_container_image_uri=serving_container_image_uri,
        serving_container_ports=[7080],
        serving_container_predict_route="/predictions/detectron2_serving",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


def predict_custom_trained_model(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str,
    api_endpoint: str,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    return response.predictions, response.deployed_model_id


# Prediction
import base64


def get_prediction_instances(local_test_filepath):
    with open(local_test_filepath, "rb") as input_file:
        encoded_string = base64.b64encode(input_file.read()).decode("utf-8")

    instances = [
        {
            "data": {"b64": encoded_string},
        }
    ]
    return instances


import numpy as np
# Mask encoding related
import pycocotools.mask as mask_util


def decode_rle_masks(pred_masks_rle):
    return np.stack([mask_util.decode(rle) for rle in pred_masks_rle])


import collections

# Visualization
from PIL import Image, ImageColor, ImageDraw, ImageFont


def load_img(local_path):
    return Image.open(local_path).convert("RGB")


def draw_bounding_box_on_image_array(
    image,
    ymin,
    xmin,
    ymax,
    xmax,
    color="red",
    thickness=4,
    display_str_list=(),
    use_normalized_coordinates=True,
):
    """Adds a bounding box to an image (numpy array).

    Bounding box coordinates can be specified in either absolute (pixel) or
    normalized coordinates by setting the use_normalized_coordinates argument.

    Args:
      image: a numpy array with shape [height, width, 3].
      ymin: ymin of bounding box.
      xmin: xmin of bounding box.
      ymax: ymax of bounding box.
      xmax: xmax of bounding box.
      color: color to draw bounding box. Default is red.
      thickness: line thickness. Default value is 4.
      display_str_list: list of strings to display in box
                        (each to be shown on its own line).
      use_normalized_coordinates: If True (default), treat coordinates
        ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
        coordinates as absolute.
    """
    image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
    draw_bounding_box_on_image(
        image_pil,
        ymin,
        xmin,
        ymax,
        xmax,
        color,
        thickness,
        display_str_list,
        use_normalized_coordinates,
    )
    np.copyto(image, np.array(image_pil))


def draw_bounding_box_on_image(
    image,
    ymin,
    xmin,
    ymax,
    xmax,
    color="red",
    thickness=4,
    display_str_list=(),
    use_normalized_coordinates=True,
):
    """Adds a bounding box to an image.

    Bounding box coordinates can be specified in either absolute (pixel) or
    normalized coordinates by setting the use_normalized_coordinates argument.

    Each string in display_str_list is displayed on a separate line above the
    bounding box in black text on a rectangle filled with the input 'color'.
    If the top of the bounding box extends to the edge of the image, the strings
    are displayed below the bounding box.

    Args:
      image: a PIL.Image object.
      ymin: ymin of bounding box.
      xmin: xmin of bounding box.
      ymax: ymax of bounding box.
      xmax: xmax of bounding box.
      color: color to draw bounding box. Default is red.
      thickness: line thickness. Default value is 4.
      display_str_list: list of strings to display in box
                        (each to be shown on its own line).
      use_normalized_coordinates: If True (default), treat coordinates
        ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
        coordinates as absolute.
    """
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    if use_normalized_coordinates:
        (left, right, top, bottom) = (
            xmin * im_width,
            xmax * im_width,
            ymin * im_height,
            ymax * im_height,
        )
    else:
        (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
    draw.line(
        [(left, top), (left, bottom), (right, bottom), (right, top), (left, top)],
        width=thickness,
        fill=color,
    )
    try:
        font = ImageFont.truetype("arial.ttf", 24)
    except IOError:
        font = ImageFont.load_default()

    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = bottom + total_display_str_height
    # Reverse list and print from bottom to top.
    for display_str in display_str_list[::-1]:
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle(
            [
                (left, text_bottom - text_height - 2 * margin),
                (left + text_width, text_bottom),
            ],
            fill=color,
        )
        draw.text(
            (left + margin, text_bottom - text_height - margin),
            display_str,
            fill="black",
            font=font,
        )
        text_bottom -= text_height - 2 * margin


def draw_mask_on_image_array(image, mask, color="red", alpha=0.4):
    """Draws mask on an image.

    Args:
      image: uint8 numpy array with shape (img_height, img_height, 3)
      mask: a uint8 numpy array of shape (img_height, img_height) with
        values between either 0 or 1.
      color: color to draw the keypoints with. Default is red.
      alpha: transparency value between 0 and 1. (default: 0.4)

    Raises:
      ValueError: On incorrect data type for image or masks.
    """
    if image.dtype != np.uint8:
        raise ValueError("`image` not of type np.uint8")
    if mask.dtype != np.uint8:
        raise ValueError("`mask` not of type np.uint8")
    if np.any(np.logical_and(mask != 1, mask != 0)):
        raise ValueError("`mask` elements should be in [0, 1]")
    if image.shape[:2] != mask.shape:
        raise ValueError(
            "The image has spatial dimensions %s but the mask has "
            "dimensions %s" % (image.shape[:2], mask.shape)
        )
    rgb = ImageColor.getrgb(color)
    pil_image = Image.fromarray(image)

    solid_color = np.expand_dims(np.ones_like(mask), axis=2) * np.reshape(
        list(rgb), [1, 1, 3]
    )
    pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert("RGBA")
    pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert("L")
    pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
    np.copyto(image, np.array(pil_image.convert("RGB")))


STANDARD_COLORS = [
    "AliceBlue",
    "Chartreuse",
    "Aqua",
    "Aquamarine",
    "Azure",
    "Beige",
    "Bisque",
    "BlanchedAlmond",
    "BlueViolet",
    "BurlyWood",
    "CadetBlue",
    "AntiqueWhite",
    "Chocolate",
    "Coral",
    "CornflowerBlue",
    "Cornsilk",
    "Crimson",
    "Cyan",
    "DarkCyan",
    "DarkGoldenRod",
    "DarkGrey",
    "DarkKhaki",
    "DarkOrange",
    "DarkOrchid",
    "DarkSalmon",
    "DarkSeaGreen",
    "DarkTurquoise",
    "DarkViolet",
    "DeepPink",
    "DeepSkyBlue",
    "DodgerBlue",
    "FireBrick",
    "FloralWhite",
    "ForestGreen",
    "Fuchsia",
    "Gainsboro",
    "GhostWhite",
    "Gold",
    "GoldenRod",
    "Salmon",
    "Tan",
    "HoneyDew",
    "HotPink",
    "IndianRed",
    "Ivory",
    "Khaki",
    "Lavender",
    "LavenderBlush",
    "LawnGreen",
    "LemonChiffon",
    "LightBlue",
    "LightCoral",
    "LightCyan",
    "LightGoldenRodYellow",
    "LightGray",
    "LightGrey",
    "LightGreen",
    "LightPink",
    "LightSalmon",
    "LightSeaGreen",
    "LightSkyBlue",
    "LightSlateGray",
    "LightSlateGrey",
    "LightSteelBlue",
    "LightYellow",
    "Lime",
    "LimeGreen",
    "Linen",
    "Magenta",
    "MediumAquaMarine",
    "MediumOrchid",
    "MediumPurple",
    "MediumSeaGreen",
    "MediumSlateBlue",
    "MediumSpringGreen",
    "MediumTurquoise",
    "MediumVioletRed",
    "MintCream",
    "MistyRose",
    "Moccasin",
    "NavajoWhite",
    "OldLace",
    "Olive",
    "OliveDrab",
    "Orange",
    "OrangeRed",
    "Orchid",
    "PaleGoldenRod",
    "PaleGreen",
    "PaleTurquoise",
    "PaleVioletRed",
    "PapayaWhip",
    "PeachPuff",
    "Peru",
    "Pink",
    "Plum",
    "PowderBlue",
    "Purple",
    "Red",
    "RosyBrown",
    "RoyalBlue",
    "SaddleBrown",
    "Green",
    "SandyBrown",
    "SeaGreen",
    "SeaShell",
    "Sienna",
    "Silver",
    "SkyBlue",
    "SlateBlue",
    "SlateGray",
    "SlateGrey",
    "Snow",
    "SpringGreen",
    "SteelBlue",
    "GreenYellow",
    "Teal",
    "Thistle",
    "Tomato",
    "Turquoise",
    "Violet",
    "Wheat",
    "White",
    "WhiteSmoke",
    "Yellow",
    "YellowGreen",
]


def visualize_boxes_and_labels_on_image_array(
    image,
    boxes,
    classes,
    scores,
    category_index,
    instance_masks=None,
    use_normalized_coordinates=False,
    max_boxes_to_draw=20,
    min_score_thresh=0.5,
    agnostic_mode=False,
    line_thickness=4,
    groundtruth_box_visualization_color="black",
    skip_scores=False,
    skip_labels=False,
):
    """Overlay labeled boxes on an image with formatted scores and label names.

    This function groups boxes that correspond to the same location
    and creates a display string for each detection and overlays these
    on the image. Note that this function modifies the image in place, and returns
    that same image.

    Args:
      image: uint8 numpy array with shape (img_height, img_width, 3)
      boxes: a numpy array of shape [N, 4]
      classes: a numpy array of shape [N]. Note that class indices are 1-based,
        and match the keys in the label map.
      scores: a numpy array of shape [N] or None.  If scores=None, then
        this function assumes that the boxes to be plotted are groundtruth
        boxes and plot all boxes as black with no classes or scores.
      category_index: a dict containing category dictionaries (each holding
        category index `id` and category name `name`) keyed by category indices.
      instance_masks: a numpy array of shape [N, image_height, image_width] with
        values ranging between 0 and 1, can be None.
      instance_boundaries: a numpy array of shape [N, image_height, image_width]
        with values ranging between 0 and 1, can be None.
      keypoints: a numpy array of shape [N, num_keypoints, 2], can
        be None
      use_normalized_coordinates: whether boxes is to be interpreted as
        normalized coordinates or not.
      max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
        all boxes.
      min_score_thresh: minimum score threshold for a box to be visualized
      agnostic_mode: boolean (default: False) controlling whether to evaluate in
        class-agnostic mode or not.  This mode will display scores but ignore
        classes.
      line_thickness: integer (default: 4) controlling line width of the boxes.
      groundtruth_box_visualization_color: box color for visualizing groundtruth
        boxes
      skip_scores: whether to skip score when drawing a single detection
      skip_labels: whether to skip label when drawing a single detection

    Returns:
      uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
    """
    # Create a display string (and color) for every box location, group any boxes
    # that correspond to the same location.
    box_to_display_str_map = collections.defaultdict(list)
    box_to_color_map = collections.defaultdict(str)
    box_to_instance_masks_map = {}
    if not max_boxes_to_draw:
        max_boxes_to_draw = boxes.shape[0]
    for i in range(min(max_boxes_to_draw, boxes.shape[0])):
        if scores is None or scores[i] > min_score_thresh:
            box = tuple(boxes[i].tolist())
            if instance_masks is not None:
                box_to_instance_masks_map[box] = instance_masks[i]
            if scores is None:
                box_to_color_map[box] = groundtruth_box_visualization_color
            else:
                display_str = ""
                if not skip_labels:
                    if not agnostic_mode:
                        if classes[i] in category_index.keys():
                            class_name = category_index[classes[i]]["name"]
                        else:
                            class_name = "N/A"
                        display_str = str(class_name)
                if not skip_scores:
                    if not display_str:
                        display_str = "{}%".format(int(100 * scores[i]))
                    else:
                        display_str = "{}: {}%".format(
                            display_str, int(100 * scores[i])
                        )
                box_to_display_str_map[box].append(display_str)
                if agnostic_mode:
                    box_to_color_map[box] = "DarkOrange"
                else:
                    box_to_color_map[box] = STANDARD_COLORS[
                        classes[i] % len(STANDARD_COLORS)
                    ]

    # Draw all boxes onto image.
    for box, color in box_to_color_map.items():
        # Using Detectron2 style output.
        xmin, ymin, xmax, ymax = box
        if instance_masks is not None:
            draw_mask_on_image_array(image, box_to_instance_masks_map[box], color=color)
        draw_bounding_box_on_image_array(
            image,
            ymin,
            xmin,
            ymax,
            xmax,
            color=color,
            thickness=line_thickness,
            display_str_list=box_to_display_str_map[box],
            use_normalized_coordinates=use_normalized_coordinates,
        )

    return image

### (Optional) Download and prepare Balloon dataset

You only need this step if you do not have your own dataset and want to use the Balloon dataset as a demo. If using your own dataset, convert it into [COCO format](https://opencv.org/introduction-to-the-coco-dataset/).

In [None]:
# Download Balloon data.
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
!unzip balloon_dataset.zip > /dev/null

In [None]:
local_balloon_data_directory = "balloon"  # @param {type:"string"}
BALLOON_DATA_GCS_PATH = os.path.join(BUCKET_URI, "balloon_dataset")

In [None]:
# Convert Balloon data to COCO format
import cv2


def save_coco_format_json(img_dir, output_coco_format_json_filename):
    # Load original balloon data json file
    json_file = os.path.join(img_dir, "via_region_data.json")
    with open(json_file) as f:
        imgs_anns = json.load(f)

    output_coco_format_dict = {}
    # We only have one class: balloon.
    output_coco_format_dict["categories"] = [{"id": 0, "name": "balloon"}]
    output_coco_format_dict["images"] = []
    output_coco_format_dict["annotations"] = []
    annotation_idx = 0
    for image_idx, v in enumerate(imgs_anns.values()):
        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]
        image_item = {
            "id": image_idx,
            "width": width,
            "height": height,
            "file_name": v["filename"],
        }
        output_coco_format_dict["images"].append(image_item)

        # Process all regions in this image.
        annos = v["regions"]
        for _, anno in annos.items():
            assert not anno["region_attributes"]
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            annotation_idx += 1
            annotation_item = {
                "id": annotation_idx,
                "image_id": image_idx,
                # x, y, width, height
                "bbox": [
                    int(np.min(px)),
                    int(np.min(py)),
                    int(np.max(px) - np.min(px)),
                    int(np.max(py) - np.min(py)),
                ],
                "iscrowd": 0,
                # Only have one category.
                "category_id": 0,
                "segmentation": [poly],
            }
            RLEs = mask_util.frPyObjects([poly], width, height)
            RLE = mask_util.merge(RLEs)
            annotation_item["area"] = float(mask_util.area(RLE))
            output_coco_format_dict["annotations"].append(annotation_item)

    # Save output file.
    json_file = os.path.join(img_dir, output_coco_format_json_filename)
    with open(json_file, "w") as f:
        json.dump(output_coco_format_dict, f)


save_coco_format_json(
    os.path.join(local_balloon_data_directory, "train"),
    "balloon_train_coco_format.json",
)
save_coco_format_json(
    os.path.join(local_balloon_data_directory, "val"), "balloon_val_coco_format.json"
)

In [None]:
# Move Balloon data from local directory to Cloud Storage.

import glob

from google.cloud import storage


def get_bucket_and_blob_name(filepath):
    # The gcs path is of the form gs://<bucket-name>/<blob-name>
    gs_suffix = filepath.split("gs://", 1)[1]
    return tuple(gs_suffix.split("/", 1))


def upload_local_dir_to_gcs(local_dir_path, gcs_dir_path):
    """Uploads files in a local directory to a GCS directory."""
    client = storage.Client()
    bucket_name = gcs_dir_path.split("/")[2]
    bucket = client.get_bucket(bucket_name)
    for local_file in glob.glob(local_dir_path + "/**"):
        if not os.path.isfile(local_file):
            continue
        filename = local_file[1 + len(local_dir_path) :]
        gcs_file_path = os.path.join(gcs_dir_path, filename)
        _, blob_name = get_bucket_and_blob_name(gcs_file_path)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(local_file)
        print("Copied {} to {}.".format(local_file, gcs_file_path))


upload_local_dir_to_gcs(
    os.path.join(local_balloon_data_directory, "train"),
    os.path.join(BALLOON_DATA_GCS_PATH, "train"),
)
upload_local_dir_to_gcs(
    os.path.join(local_balloon_data_directory, "val"),
    os.path.join(BALLOON_DATA_GCS_PATH, "val"),
)

## Finetune with Detectron2

You will use the Vertex AI SDK to create and run the training job with the model-garden detectron2 training docker. You can choose one of the Faster R-CNN, RetinaNet, or Mask R-CNN models to finetune by uncommenting the corresponding code sections below. The training uses one V100 GPU and runs for around 3 mins once the training job begins.

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
JOB_NAME = "detectron2_balloon_" + TIMESTAMP

NUM_GPU = 1
container_uri = TRAIN_DOCKER_URI
staging_bucket = os.path.join(BUCKET_URI, "training/temporal")

# Dataset and output directory related parameters.
train_dataset_name = "balloon_train"  # @param {type:"string"}
train_coco_json_file = os.path.join(
    BALLOON_DATA_GCS_PATH, "train/balloon_train_coco_format.json"
)
train_coco_json_file = gcs_fuse_path(train_coco_json_file)
train_image_root = os.path.join(BALLOON_DATA_GCS_PATH, "train")
train_image_root = gcs_fuse_path(train_image_root)
val_dataset_name = "balloon_val"  # @param {type:"string"}
val_coco_json_file = os.path.join(
    BALLOON_DATA_GCS_PATH, "val/balloon_val_coco_format.json"
)
val_coco_json_file = gcs_fuse_path(val_coco_json_file)
val_image_root = os.path.join(BALLOON_DATA_GCS_PATH, "val")
val_image_root = gcs_fuse_path(val_image_root)
output_dir = os.path.join(BUCKET_URI, JOB_NAME)

#################################################
# Model and dataset related parameters for Mask R-CNN.
config_file = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
config_file = gcs_fuse_path(config_file)
remainder_args_list = []
remainder_args_list += ["DATASETS.TRAIN"] + [
    '("{train_dataset_name}",)'.format(train_dataset_name=train_dataset_name)
]
remainder_args_list += ["DATASETS.TEST"] + [
    '("{val_dataset_name}",)'.format(val_dataset_name=val_dataset_name)
]
remainder_args_list += ["DATALOADER.NUM_WORKERS"] + ["2"]
remainder_args_list += ["SOLVER.IMS_PER_BATCH"] + ["2"]
remainder_args_list += ["SOLVER.MAX_ITER"] + ["300"]
remainder_args_list += ["SOLVER.STEPS"] + ["[]"]
remainder_args_list += ["MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE"] + ["128"]
remainder_args_list += ["MODEL.ROI_HEADS.NUM_CLASSES"] + ["1"]
#################################################

# #################################################
# # Model and dataset related parameters for RetinaNet.
# config_file='COCO-Detection/retinanet_R_50_FPN_3x.yaml'
# config_file = gcs_fuse_path(config_file)
# remainder_args_list = []
# remainder_args_list += ['DATASETS.TRAIN'] + ['("{train_dataset_name}",)'.format(train_dataset_name=train_dataset_name)]
# remainder_args_list += ['DATASETS.TEST'] + ['("{val_dataset_name}",)'.format(val_dataset_name=val_dataset_name)]
# remainder_args_list += ['DATALOADER.NUM_WORKERS'] + ['2']
# remainder_args_list += ['SOLVER.IMS_PER_BATCH'] + ['2']
# remainder_args_list += ['SOLVER.MAX_ITER'] + ['300']
# remainder_args_list += ['SOLVER.STEPS'] + ['[]']
# remainder_args_list += ['MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE'] + ['128']
# remainder_args_list += ['MODEL.ROI_HEADS.NUM_CLASSES'] + ['1']
# remainder_args_list += ['MODEL.RETINANET.NUM_CLASSES'] + ['1']
# #################################################

# #################################################
# # Model and dataset related parameters for Faster R-CNN.
# config_file='COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml'
# config_file = gcs_fuse_path(config_file)
# remainder_args_list = []
# remainder_args_list += ['DATASETS.TRAIN'] + ['("{train_dataset_name}",)'.format(train_dataset_name=train_dataset_name)]
# remainder_args_list += ['DATASETS.TEST'] + ['("{val_dataset_name}",)'.format(val_dataset_name=val_dataset_name)]
# remainder_args_list += ['DATALOADER.NUM_WORKERS'] + ['2']
# remainder_args_list += ['SOLVER.IMS_PER_BATCH'] + ['2']
# remainder_args_list += ['SOLVER.MAX_ITER'] + ['300']
# remainder_args_list += ['SOLVER.STEPS'] + ['[]']
# remainder_args_list += ['MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE'] + ['128']
# remainder_args_list += ['MODEL.ROI_HEADS.NUM_CLASSES'] + ['1']
# #################################################

# Create argument list for docker.
# NOTE: Config file flag name has hyphen instead
# of underscore: 'config-file'.
lr = 0.00025
docker_args_list = [
    "--train_dataset_name",
    f"{train_dataset_name}",
    "--train_coco_json_file",
    f"{train_coco_json_file}",
    "--train_image_root",
    f"{train_image_root}",
    "--val_dataset_name",
    f"{val_dataset_name}",
    "--val_coco_json_file",
    f"{val_coco_json_file}",
    "--val_image_root",
    f"{val_image_root}",
    "--lr",
    f"{lr}",
    "--num-gpus",
    f"{NUM_GPU}",
    "--output_dir",
    f"{gcs_fuse_path(output_dir)}",
    "--config-file",
    f"{config_file}",
]
docker_args_list += remainder_args_list

In [None]:
# Create and run training job.
# Click on the generated link in the output under "View backing custom job:" to see your run in the Cloud Console.
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=staging_bucket)
job = aiplatform.CustomContainerTrainingJob(
    display_name=JOB_NAME,
    container_uri=container_uri,
)
model = job.run(
    args=docker_args_list,
    base_output_dir=f"{output_dir}",
    replica_count=1,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_V100",
    accelerator_count=NUM_GPU,
)

## Upload and deploy Models

This section uploads the model to Model Registry and deploys it on an Endpoint resource. The model deployment step will take ~15 minutes to complete. You need to set the model path below from the training output Cloud Storage directory.

In [None]:
# Upload models to model registry
from datetime import datetime

PRETRAINED_MODEL_PTH_FILE = os.path.join(output_dir, "model_final.pth")
PRETRAINED_MODEL_CFG_YAML_FILE = os.path.join(output_dir, "config.yaml")
TEST_THRESHOLD = 0.7
PREDICTION_CONTAINER_URI = SERVE_DOCKER_URI
PREDICTION_DISPLAY_NAME = "upload_detectron2_" + datetime.now().strftime(
    "%Y%m%d_%H%M%S"
)

model = upload_model(
    project=PROJECT_ID,
    location=REGION,
    display_name=PREDICTION_DISPLAY_NAME,
    serving_container_image_uri=PREDICTION_CONTAINER_URI,
    model_pth_file=PRETRAINED_MODEL_PTH_FILE,
    model_cfg_yaml_file=PRETRAINED_MODEL_CFG_YAML_FILE,
    test_threshold=TEST_THRESHOLD,
)
print("The uploaded model name is: ", PREDICTION_DISPLAY_NAME)

In [None]:
# Deploy uploaded models
from datetime import datetime

DEPLOYED_NAME = "deploy_iod_" + datetime.now().strftime("%Y%m%d_%H%M%S")
MACHINE_TYPE = "n1-highmem-16"  # @param {type:"string"}

TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1

endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=MACHINE_TYPE,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    service_account=SERVICE_ACCOUNT,
)

print("endpoint id is: ", endpoint.name)

In [None]:
# Run predictions
# Fill the "endpoint_id" from previous step below.
# For example 'endpoint_id = "8211918096324100096"'.

endpoint_id = ""  # @param {type:"string"}
local_test_filepath = os.path.join(
    local_balloon_data_directory, "val/410488422_5f8991f26e_b.jpg"
)
instances = get_prediction_instances(local_test_filepath)
api_endpoint = REGION + "-aiplatform.googleapis.com"

predictions, deployed_model_id = predict_custom_trained_model(
    project=PROJECT_ID,
    endpoint_id=endpoint_id,
    instances=instances,
    location=REGION,
    api_endpoint=api_endpoint,
)

print("The deployed model id: ", deployed_model_id)
print("Predict the test image: ", local_test_filepath)
prediction = json.loads(predictions[0])
print(prediction)

# Draw boxes and masks.
img = load_img(local_test_filepath)

boxes = prediction["boxes"]
classes = prediction["classes"]
scores = prediction["scores"]
if prediction["masks_rle"]:
    masks_numpy = decode_rle_masks(prediction["masks_rle"])
else:
    masks_numpy = None
img.save("./sample.jpg")
output_image_array = visualize_boxes_and_labels_on_image_array(
    image=np.array(img),
    boxes=np.array(boxes),
    classes=np.array(classes),
    scores=np.array(scores),
    category_index={0: {"name": "balloon"}},
    instance_masks=masks_numpy,
)
output_image = Image.fromarray(np.uint8(output_image_array))
output_image.save("./sample_preds.jpg")
print('Prediction image saved to "./sample_preds.jpg" ')

## Clean up

In [None]:
# Cleans up endpoint from previous step.
# Or you can overwrite "endpoint_id" for a different one.

aip_endpoint_name = f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_id}"
endpoint = aiplatform.Endpoint(aip_endpoint_name)

# Undeploy model and delete endpoint
endpoint.delete(force=True)

# Delete the model resource
model.delete()