In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden MoViNet video action recognition

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_movinet_action_recognition.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_movinet_action_recognition.ipynb">
      <img alt="GitHub logo" src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to use [MoViNet](https://github.com/tensorflow/models/tree/master/official/projects/movinet) for video action recognition in Vertex AI Model Garden.

### Objective

* Train new models
  * Convert input data to training formats
  * Create [hyperparameter tuning jobs](https://cloud.google.com/vertex-ai/docs/training/hyperparameter-tuning-overview) to train new models
  * Find and export best models

* Test trained models
  * Upload models to the [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)
  * Run batch predictions

* Clean up resources

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Setup Google Cloud project

# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. For finetuning, **[click here](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Frestricted_image_training_nvidia_a100_80gb_gpus)** to check if your project already has the required 8 Nvidia A100 80 GB GPUs in the us-central1 region. If yes, then run this notebook in the us-central1 region. If you do not have 8 Nvidia A100 80 GPUs or have more GPU requirements than this, then schedule your job with Nvidia H100 GPUs via Dynamic Workload Scheduler using [these instructions](https://cloud.google.com/vertex-ai/docs/training/schedule-jobs-dws). For Dynamic Workload Scheduler, check the [us-central1](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Fcustom_model_training_preemptible_nvidia_h100_gpus) or [europe-west4](https://console.cloud.google.com/iam-admin/quotas?location=europe-west4&metric=aiplatform.googleapis.com%2Fcustom_model_training_preemptible_nvidia_h100_gpus) quota for Nvidia H100 GPUs. If you do not have enough GPUs, then you can follow [these instructions](https://cloud.google.com/docs/quotas/view-manage#viewing_your_quota_console) to request quota.

# @markdown 3. For serving, **[click here](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_l4_gpus)** to check if your project already has the required 1 L4 GPU in the us-central1 region.  If yes, then run this notebook in the us-central1 region. If you need more L4 GPUs for your project, then you can follow [these instructions](https://cloud.google.com/docs/quotas/view-manage#viewing_your_quota_console) to request more. Alternatively, if you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus).

# @markdown > | Machine Type | Accelerator Type | Recommended Regions |
# @markdown | ----------- | ----------- | ----------- |
# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |
# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |

# @markdown 4. **[Optional]** [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs. Set the BUCKET_URI for the experiment environment. The specified Cloud Storage bucket (`BUCKET_URI`) should be located in the same region as where the notebook was launched. Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1"). If not set, a unique GCS bucket will be created instead.

BUCKET_URI = "gs://"  # @param {type:"string"}

# @markdown 5. **[Optional]** Set region. If not set, the region will be set automatically according to Colab Enterprise environment.

REGION = ""  # @param {type:"string"}

! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

import datetime
import importlib
import json
import os
import subprocess
import uuid
from typing import Dict

import yaml
from google.cloud import aiplatform
from google.cloud.aiplatform import hyperparameter_tuning as hpt

common_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util"
)

models, endpoints = {}, {}

# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
if not REGION:
    if not os.environ.get("GOOGLE_CLOUD_REGION"):
        raise ValueError(
            "REGION must be set. See"
            " https://cloud.google.com/vertex-ai/docs/general/locations for"
            " available cloud locations."
        )
    REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Cloud Storage bucket for storing the experiment artifacts.
# A unique GCS bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, change the value yourself below.
now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])

if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}"
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            "Bucket region %s is different from notebook region %s"
            % (bucket_region, REGION)
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "movinet_ar")


# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Gets the default SERVICE_ACCOUNT.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)


# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

! gcloud config set project $PROJECT_ID
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/storage.admin"
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/aiplatform.user"

# @markdown Kindly note Regions of "us", "asia", or "europe" are supported.

REGION_PREFIX = REGION.split("-")[0]
assert REGION_PREFIX in (
    "us",
    "europe",
    "asia",
), f'{REGION} is not supported. It must be prefixed by "us", "asia", or "europe".'

CHECKPOINT_BUCKET = os.path.join(BUCKET_URI, "ckpt")

# Download config files.
CONFIG_DIR = os.path.join(BUCKET_URI, "config")

## Training

In [None]:
# @title Set the model for training

# @markdown This section shows how to train new models.
# @markdown 1. Convert input data to training formats
# @markdown 2. Create hyperparameter tuning jobs to train new models
# @markdown 3. Find and export best models

# @markdown If you already trained models, kindly go to the section `Test Trained models`.

# @markdown Select a model:
# @markdown * `model_id`: MoViNet model variant ID, one of `a0`, `a1`, `a2`, `a3`, `a4`, `a5`. The model with a larger number requires more resources to train, and is expected to have a higher accuracy and latency. Here, we use `a3` for demonstration purpose. **`a0`, `a1`, and `a2` are not recommended for now as we are currently investigating some inference issues with them.**
# @markdown * `model_mode`: MoViNet model type, either `base` or `stream`. The base model has a slightly higher accuracy, while the streaming model is optimized for streaming and faster CPU inference. See [official MoViNet docs](https://github.com/tensorflow/models/tree/master/official/projects/movinet) for more information.

# @markdown **Note**: The prediction container only supports base model (non-streaming) for now. If you train a streaming model, you need to download the model and refer to the [MoViNet official guide](https://github.com/tensorflow/models/blob/master/official/projects/movinet/movinet_streaming_model_training_and_inference.ipynb) for running predictions locally.

IMAGE_SIZES = {
    "a0": 172,
    "a1": 172,
    "a2": 224,
    "a3": 256,
    "a4": 290,
    "a5": 320,
}

model_id = "a3"  # @param ["a0", "a1", "a2", "a3", "a4", "a5"]
model_mode = "base"  # @param ["base", "stream"]
is_stream = model_mode == "stream"
model_name = f"movinet_{model_id}_{model_mode}"
image_size = IMAGE_SIZES[model_id]

if is_stream:
    export_container_args = {
        "conv_type": "2plus1d",
        "se_type": "2plus3d",
        "activation": "hard_swish",
        "gating_activation": "hard_sigmoid",
        "use_positional_encoding": model_id in {"a3", "a4", "a5"},
    }
else:
    export_container_args = {
        "conv_type": "3d",
        "se_type": "3d",
        "activation": "swish",
        "gating_activation": "sigmoid",
        "use_positional_encoding": False,
    }

In [None]:
# @title Prepare training data

# @markdown Prepare data in the format as described [here](https://cloud.google.com/vertex-ai/docs/video-data/action-recognition/prepare-data), and then convert them to the training formats by running the cell below:

# @markdown * `input_file_path`: The input file path to the prepared data.

# @markdown * `input_file_type`: The input file type, such as `csv` or `jsonl`.

# @markdown * `output_fps`: The sampling rate of the video; Frames per second.

# @markdown * `num_frames`: Number of frame to sample around keyframe inputs.

# @markdown * `min_duration_sec`: Minimum duration in seconds for sampling video clips around keyframe inputs. This is for validation purpose - an error will be thrown if there is not enough context around a keyframe.

# @markdown * `pos_neg_ratio`: Sampling ratio between positive and negative segments. For example, a pos_neg_ratio of 0.5 samples 1 negative instance every 2 positive instances.

# @markdown * `split_ratio`: Three comma separated floats indicating the proportion of data to split into train/validation/test. They must add up to 1.

# @markdown * `num_shard`: Three comma separated integers indicating the shards for train/validation/test.

# @markdown **Note**: For JSONL input, use `aiplatform.googleapis.com/ml_use` instead of `ml_use` as the JSON key for ML use in `dataItemResourceLabels`. This is to be consistent with other objectives.

# This job will convert input data as training format, with given split ratios
# and number of shards on train/test/validation.

OBJECTIVE = "var"

# Data converter constants.
DATA_CONVERTER_JOB_PREFIX = "data_converter"
DATA_CONVERTER_CONTAINER = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter"
DATA_CONVERTER_MACHINE_TYPE = "n1-highmem-8"

data_converter_job_name = common_util.get_job_name_with_datetime(
    DATA_CONVERTER_JOB_PREFIX + "_" + OBJECTIVE
)

input_file_path = ""  # @param {type:"string"}
input_file_type = "csv"  # @param ["csv", "jsonl"]
output_fps = 10  # @param {type:"integer"}
num_frames = 32  # @param {type:"integer"}
min_duration_sec = 1.0  # @param {type:"number"}
pos_neg_ratio = 1.0  # @param {type:"number"}
split_ratio = "0.8,0.1,0.1"
num_shard = "10,10,10"
data_converter_output_dir = os.path.join(BUCKET_URI, data_converter_job_name)

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": DATA_CONVERTER_MACHINE_TYPE,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": DATA_CONVERTER_CONTAINER,
            "command": [],
            "args": [
                "--input_file_path=%s" % input_file_path,
                "--input_file_type=%s" % input_file_type,
                "--objective=%s" % OBJECTIVE,
                "--num_shard=%s" % num_shard,
                "--split_ratio=%s" % split_ratio,
                "--output_dir=%s" % data_converter_output_dir,
                "--output_fps=%d" % output_fps,
                "--num_frames=%d" % num_frames,
                "--min_duration_sec=%f" % min_duration_sec,
                "--pos_neg_ratio=%f" % pos_neg_ratio,
                "--output_shape=%d,%d" % (image_size, image_size),
            ],
        },
    }
]

data_converter_custom_job = aiplatform.CustomJob(
    display_name=data_converter_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

data_converter_custom_job.run(service_account=SERVICE_ACCOUNT)

input_train_data_path = os.path.join(data_converter_output_dir, "train.tfrecord*")
input_validation_data_path = os.path.join(data_converter_output_dir, "val.tfrecord*")
label_map_path = os.path.join(data_converter_output_dir, "label_map.yaml")
print("input_train_data_path for training: ", input_train_data_path)
print("input_validation_data_path for training: ", input_validation_data_path)
print("label_map_path for prediction: ", label_map_path)

In [None]:
# @title Hyperparameter tuning

# @markdown Use the Vertex AI SDK to create and run the [hyperparameter tuning job](https://cloud.google.com/vertex-ai/docs/training/hyperparameter-tuning-overview) with Vertex AI Model Garden training docker images.

# @markdown #### Define the following specifications

# @markdown * `worker_pool_specs`: A list of dictionaries specifying the machine type and docker image. This example defines a single node cluster with one `n1-highmem-32` machine with 8 `NVIDIA_TESLA_V100` GPUs.

# @markdown   **Note**: We recommend using 8 GPUs for MoViNet-A2 and larger. Since loading video data requires a lot of GPU memory, it is recommended to experiment with a small batch size first.
# @markdown * `parameter_spec`: Dictionary specifying the parameters to optimize. The dictionary key is the string assigned to the command line argument for each hyperparameter in your training application code, and the dictionary value is the parameter specification. The parameter specification includes the type, min/max values, and scale for the hyperparameter.
# @markdown * `metric_spec`: Dictionary specifying the metric to optimize. The dictionary key is the `hyperparameter_metric_tag` that you set in your training application code, and the value is the optimization goal.
# @markdown * `max_trial_count`: Sets an upper bound on the number of trials the service will run. The recommended practice is to start with a smaller number of trials and get a sense of how impactful your chosen hyperparameters are before scaling up.
# @markdown * `parallel_trial_count`:  If you use parallel trials, the service provisions multiple training processing clusters. The worker pool spec that you specify when creating the job is used for each individual training cluster.  Increasing the number of parallel trials reduces the amount of time the hyperparameter tuning job takes to run; however, it can reduce the effectiveness of the job overall. This is because the default tuning strategy uses results of previous trials to inform the assignment of values in subsequent trials.
# @markdown * `search_algorithm`: The available search algorithms are grid, random, or default (None). The default option applies Bayesian optimization to search the space of possible hyperparameter values and is the recommended algorithm.

# @markdown Click on the generated link in the output to see your run in the Cloud Console.

# Input train and validation datasets can be found from the section above
# `Prepare input data for training`.
# Or, set prepared datasets paths if already exist.
# input_train_data_path = ""
# input_validation_data_path = ""
# label_map_path = ""

# Training constants.
TRAINING_JOB_PREFIX = "train"
TRAIN_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-train"
TRAIN_MACHINE_TYPE = "n1-highmem-32"
TRAIN_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
TRAIN_NUM_GPU = 8


def get_label_map(label_map_yaml_filepath: str) -> Dict[int, str]:
    """Returns class id to label mapping given a filepath to the label map.

    Args:
      label_map_yaml_filepath: A string of label map yaml file path.

    Returns:
      A dictionary of class id to label mapping.
    """
    label_map_filename = os.path.basename(label_map_yaml_filepath)
    subprocess.check_output(
        ["gsutil", "cp", label_map_yaml_filepath, label_map_filename],
        stderr=subprocess.STDOUT,
    )
    with open(label_map_filename, "rb") as input_file:
        label_map = yaml.safe_load(input_file.read())["label_map"]
        num_classes = max(label_map.keys()) + 1
        return label_map, num_classes


def find_checkpoint_in_dir(directory: str) -> str:
    """Finds a checkpoint path relative to the directory."""
    for item in os.listdir(directory):
        path = os.path.join(directory, item)
        if os.path.isfile(path) and path.endswith(".index"):
            return (
                os.path.dirname(path)
                + "/"
                + os.path.splitext(os.path.basename(path))[0]
            )
        elif os.path.isdir(path):
            result = find_checkpoint_in_dir(path)
            if result:
                return result
    return None


def upload_checkpoint_to_gcs(checkpoint_url: str) -> str:
    """Uploads a compressed .tar.gz checkpoint at the given URL to Cloud Storage."""
    filename = os.path.basename(checkpoint_url)
    checkpoint_name = filename.replace(".tar.gz", "")
    print("Download checkpoint from", checkpoint_url, "and store to", CHECKPOINT_BUCKET)
    ! wget $checkpoint_url -O $filename
    ! mkdir -p $checkpoint_name
    ! tar -xvzf $filename -C $checkpoint_name

    checkpoint_path = find_checkpoint_in_dir(checkpoint_name)
    checkpoint_path = os.path.relpath(checkpoint_path, checkpoint_name)

    ! gsutil cp -r $checkpoint_name $CHECKPOINT_BUCKET/
    checkpoint_uri = os.path.join(CHECKPOINT_BUCKET, checkpoint_name, checkpoint_path)
    print("Checkpoint uploaded to", checkpoint_uri)
    return checkpoint_uri


def upload_config_to_gcs(url: str) -> str:
    """Uploads a config file at the given URL to Cloud Storage."""
    filename = os.path.basename(url)
    destination = os.path.join(CONFIG_DIR, filename)
    print("Copy", url, "to", destination)
    ! wget "$url" -O "$filename"
    ! gsutil cp "$filename" "$destination"
    return destination


train_job_name = common_util.get_job_name_with_datetime(
    f"{TRAINING_JOB_PREFIX}_{model_name}"
)
model_dir = os.path.join(BUCKET_URI, train_job_name)
label_map, num_classes = get_label_map(label_map_path)

# Uploads pretained checkpoint to GCS bucket.
init_checkpoint = f"https://storage.googleapis.com/tf_model_garden/vision/movinet/{model_name}_with_backbone.tar.gz"
init_checkpoint = upload_checkpoint_to_gcs(init_checkpoint)

# Uploads config file according to model_id and streaming options.
config_file = f"{model_id}_stream" if is_stream else model_id
config_file = f"https://raw.githubusercontent.com/tensorflow/models/master/official/projects/movinet/configs/yaml/movinet_{config_file}_gpu.yaml"
config_file = upload_config_to_gcs(config_file)

# The parameters here are mainly for demonstration purpose. Kindly update them
# for better performance.
trainer_args = {
    "experiment": "movinet_kinetics600",
    "config_file": config_file,
    "input_train_data_path": input_train_data_path,
    "input_validation_data_path": input_validation_data_path,
    "init_checkpoint": init_checkpoint,
    "model_dir": model_dir,
    "num_classes": num_classes,
    "global_batch_size": 16,
    "prefetch_buffer_size": 16,
    "shuffle_buffer_size": 32,
    "train_steps": 10,
}

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAIN_MACHINE_TYPE,
            "accelerator_type": TRAIN_ACCELERATOR_TYPE,
            # Each training job uses TRAIN_NUM_GPU GPUs.
            "accelerator_count": TRAIN_NUM_GPU,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": TRAIN_CONTAINER_URI,
            "args": [
                "--mode=train_and_eval",
                "--params_override=runtime.num_gpus=%d" % TRAIN_NUM_GPU,
            ]
            + ["--{}={}".format(k, v) for k, v in trainer_args.items()],
        },
    }
]

metric_spec = {"model_performance": "maximize"}

# These learning rates might not be optimal for your selected model type; To
# tune learning rates, try hpt.DoubleParameterSpec with more trials.
# LEARNING_RATES = [1e-3, 3e-3]
LEARNING_RATES = [0.001]
MAX_TRIAL_COUNT = len(LEARNING_RATES)
parameter_spec = {
    "learning_rate": hpt.DiscreteParameterSpec(values=LEARNING_RATES, scale="linear"),
}

print(worker_pool_specs, metric_spec, parameter_spec)

common_util.check_quota(
    project_id=PROJECT_ID,
    region=REGION,
    accelerator_type=TRAIN_ACCELERATOR_TYPE,
    accelerator_count=TRAIN_NUM_GPU,
    is_for_training=True,
)

# Add labels for the finetuning job.
labels = {
    "mg-source": "notebook",
    "mg-notebook-name": "model_garden_movinet_action_recognition.ipynb".split(".")[0],
}

labels["mg-tune"] = "publishers-google-models-movinet"
versioned_model_id = model_name.lower().replace("_", "-")
labels["versioned-mg-tune"] = f"{labels['mg-tune']}-{versioned_model_id}"

train_custom_job = aiplatform.CustomJob(
    display_name=train_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
    labels=labels,
)

train_hpt_job = aiplatform.HyperparameterTuningJob(
    display_name=train_job_name,
    custom_job=train_custom_job,
    metric_spec=metric_spec,
    parameter_spec=parameter_spec,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=1,
    project=PROJECT_ID,
    search_algorithm=None,
)

train_hpt_job.run(service_account=SERVICE_ACCOUNT)

print("model_dir is:", model_dir)

In [None]:
# @title Export model in Tensorflow SavedModel format

# This job will export models from TF checkpoints to TF saved model format.
# model_dir is from the section above.

EVALUATION_METRIC = "accuracy"

# Export constants.
EXPORT_JOB_PREFIX = "export"
EXPORT_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-model-export"
EXPORT_MACHINE_TYPE = "n1-highmem-8"


def get_best_trial(model_dir, max_trial_count, evaluation_metric):
    best_trial_dir = ""
    best_trial_evaluation_results = {}
    best_performance = -1

    for i in range(max_trial_count):
        current_trial = i + 1
        current_trial_dir = os.path.join(model_dir, "trial_" + str(current_trial))
        current_trial_best_ckpt_dir = os.path.join(current_trial_dir, "best_ckpt")
        current_trial_best_ckpt_evaluation_filepath = os.path.join(
            current_trial_best_ckpt_dir, "info.json"
        )
        ! gsutil cp $current_trial_best_ckpt_evaluation_filepath .
        with open("info.json", "r") as f:
            eval_metric_results = json.load(f)
            current_performance = eval_metric_results[evaluation_metric]
            if current_performance > best_performance:
                best_performance = current_performance
                best_trial_dir = current_trial_dir
                best_trial_evaluation_results = eval_metric_results
    print("best_trial_dir: ", current_trial_best_ckpt_evaluation_filepath)
    return best_trial_dir, best_trial_evaluation_results


def get_best_ckpt(checkpoint_dir: str) -> str:
    """Finds the best checkpoint path."""
    try:
        checkpoint_files = (
            subprocess.check_output(["gsutil", "ls", checkpoint_dir])
            .decode("utf-8")
            .strip()
        )
        for file in checkpoint_files.splitlines():
            if file.endswith(".index"):
                return (
                    os.path.dirname(file)
                    + "/"
                    + os.path.splitext(os.path.basename(file))[0]
                )
    except subprocess.CalledProcessError as e:
        print(f"Error listing checkpoints: {e}")


best_trial_dir, best_trial_evaluation_results = get_best_trial(
    model_dir, MAX_TRIAL_COUNT, EVALUATION_METRIC
)
best_checkpoint_path = get_best_ckpt(f"{best_trial_dir}/best_ckpt/")
print("best_trial_dir: ", best_trial_dir)
print("best_trial_evaluation_results: ", best_trial_evaluation_results)
print("best_checkpoint: ", best_checkpoint_path)

container_args = {
    "export_path": f"{model_dir}/best_model",
    "model_id": model_id,
    "num_classes": num_classes,
    "causal": is_stream,
    "checkpoint_path": best_checkpoint_path,
    "assert_checkpoint_objects_matched": False,
    **export_container_args,
}

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": EXPORT_MACHINE_TYPE,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": EXPORT_CONTAINER_URI,
            "args": ["--{}={}".format(k, v) for k, v in container_args.items()],
        },
    }
]

model_export_job_name = common_util.get_job_name_with_datetime(
    EXPORT_JOB_PREFIX + "_" + OBJECTIVE
)
model_export_custom_job = aiplatform.CustomJob(
    display_name=model_export_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

model_export_custom_job.run(service_account=SERVICE_ACCOUNT)

print("best model is saved to: ", container_args["export_path"])

## Upload
This section shows the way to test with trained models.
1. Upload and deploy models to the [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction).
2. Run batch predictions.

**Note:** The prediction container only works with the base model. If you trained a streaming model, download the model from the exported path and refer to the [MoViNet official guide](https://github.com/tensorflow/models/blob/master/official/projects/movinet/movinet_streaming_model_training_and_inference.ipynb) for running predictions locally.

In [None]:
# @title Upload model to Vertex AI Model Registry

# @markdown The following cell uploads the trained model to Vertex AI Model Registry. Skip it if you want to run batch predictions on an already uploaded model instead.

# @markdown  **Configurable environment variables**
# @markdown *  `MODEL_PATH`: Cloud Storage URI to the MoViNet model.
# @markdown * `BATCH_SIZE`: Batch size for inference. Use a larger value to accelerate GPU prediction.
# @markdown * `NUM_FRAMES`: Number of frames for a single prediction with the model.
# @markdown * `FPS`: Video sampling frame per second.
# @markdown * `OVERLAP_FRAMES`: Allowed overlapping frames between consecutive prediction windows. Set a smaller value for faster inference but less accurate.


# Prediction constants.
# You can adjust accelerator types and machine types to get faster predictions.
PREDICTION_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-serve"
PREDICTION_PORT = 8080
PREDICTION_ACCELERATOR_COUNT = 1
PREDICTION_ACCELERATOR_TYPE = "NVIDIA_TESLA_T4"
PREDICTION_MACHINE_TYPE = "n1-standard-4"
PREDICTION_JOB_PREFIX = "predict"

serving_env = {
    "MODEL_ID": "tfvision-movinet-var",
    "MODEL_PATH": container_args["export_path"],
    "BATCH_SIZE": 1,
    "NUM_FRAMES": 32,
    "FPS": output_fps,
    "OVERLAP_FRAMES": 24,
    "OBJECTIVE": OBJECTIVE,
    "IMAGE_WIDTH": image_size,
    "IMAGE_HEIGHT": image_size,
    "DEPLOY_SOURCE": "notebook",
}

models["model_var"] = aiplatform.Model.upload(
    display_name=model_name,
    serving_container_image_uri=PREDICTION_CONTAINER_URI,
    serving_container_ports=[PREDICTION_PORT],
    serving_container_predict_route="/predict",
    serving_container_health_route="/ping",
    serving_container_environment_variables=serving_env,
)

models["model_var"].wait()

print("The uploaded model name is: ", model_name)

## Predict

In [None]:
# @title Run batch predictions

# @markdown We will now run batch predictions with the trained MoViNet action recognition model with [Vertex AI Batch Prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions).

# @markdown Prepare an input JSONL file where each line follows [this format](https://cloud.google.com/vertex-ai/docs/video-data/action-recognition/get-predictions?hl=en#input_data_requirements) and store it in a Cloud Storage bucket. The service account should have read access to the buckets containing the trained model and the input data. See [Service accounts overview](https://cloud.google.com/iam/docs/service-account-overview) for more information.

# @markdown The [Vertex AI Batch Prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions) has a default timeout of 10 minutes. Therefore, make sure the input video clip is around 5 minutes at 5~10 FPS or you may experience a timeout error.

# @markdown To use this model at a larger scale beyond this notebook demontration, you can try one of the following:
# @markdown - Pull the serving docker image to a VM or a local machine and send prediction requests directly.
# @markdown - To process more data concurrently, write a custom [DataFlow](https://cloud.google.com/dataflow) pipeline to send prediction requests to the movinet serving container.
# @markdown - Divide videos into 5-minute clips and run batch prediction with a small batch size.

# Path to the prediction input JSONL file.
test_jsonl_path = ""  # @param {type:"string"}

predict_job_name = common_util.get_job_name_with_datetime(
    f"{PREDICTION_JOB_PREFIX}_{model_name}"
)
predict_destination_prefix = os.path.join(STAGING_BUCKET, predict_job_name)

batch_prediction_job = models["model_var"].batch_predict(
    job_display_name=predict_job_name,
    gcs_source=test_jsonl_path,
    gcs_destination_prefix=predict_destination_prefix,
    machine_type=PREDICTION_MACHINE_TYPE,
    accelerator_count=PREDICTION_ACCELERATOR_COUNT,
    accelerator_type=PREDICTION_ACCELERATOR_TYPE,
    max_replica_count=1,
    service_account=SERVICE_ACCOUNT,
)

batch_prediction_job.wait()

print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)

In [None]:
# @title Read the prediction response


def print_response_instance(json_str: str, label_map: dict[int, str]):
    """Prints summary of a prediction JSON result from the model response."""
    json_obj = json.loads(json_str)
    if "prediction" not in json_obj:
        print("Error:", json_str)
        return
    instance = json_obj["instance"]
    prediction = json_obj["prediction"]
    gcs_uri = instance["content"]
    time_start = instance.get("timeSegmentStart", "0.0s")
    time_end = instance.get("timeSegmentEnd", "Infinity")
    print(f"---------- Predict {gcs_uri}, {time_start} to {time_end}:")
    for predicted in prediction:
        time = predicted["timeSegmentStart"]
        label = label_map[predicted["label"]]
        confidence = predicted["confidence"]
        print(f"At {time}, detected {label} with {confidence} confidence.")


# The label map file was generated from the section above (`Prepare input data for training`).

dir_name = os.path.basename(predict_destination_prefix)
! gsutil -m cp -R $predict_destination_prefix /tmp

local_path = os.path.join("/tmp", dir_name)
file_paths = []
for root, _, files in os.walk(local_path):
    for file in files:
        file_path = os.path.join(root, file)
        file_paths.append(file_path)

for file in file_paths:
    with open(file, "r") as f:
        for line in f:
            print_response_instance(line, label_map)

! rm -rf /tmp/$dir_name

## Clean up

In [None]:
# @title Clean up training jobs, models, endpoints and buckets

# Delete custom and hpt jobs.
if data_converter_custom_job.list(filter=f'display_name="{data_converter_job_name}"'):
    data_converter_custom_job.delete()
if train_hpt_job.list(filter=f'display_name="{train_job_name}"'):
    train_hpt_job.delete()
if model_export_custom_job.list(filter=f'display_name="{model_export_job_name}"'):
    model_export_custom_job.delete()
if batch_prediction_job.list(filter=f'display_name="{predict_job_name}"'):
    batch_prediction_job.delete()

# Delete models.
for model in models.values():
    model.delete()

# @markdown Delete temporary GCS buckets.

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME