In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden MediaPipe with Face Stylizer

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_mediapipe_face_stylizer.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_mediapipe_face_stylizer.ipynb">
      <img alt="GitHub logo" src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to use [MediaPipe Model Maker](https://developers.google.com/mediapipe/solutions/model_maker) to customize an on-device face stylizer model in Vertex AI Model Garden.

The MediaPipe face stylizer solution provides several models you can use immediately to transform the face to the styles including (cartoon, oil painting, etc.) in your application. However, if you need to transfer the face to an unseen style not covered by the provided models, you can customize the pretrained model with your own data and MediaPipe Model Maker. This model modification tool fine-tune a portion of the model using data you provide. This method is faster than training a new model from scatch and can produce a model adapt to your specific application.

The following sections show you how to use Model Maker to retrain a pre-built model for face stylization with your own data on Vertex AI, which you can then use with the MediaPipe Face Stylizer.

### Objective

* Customize a Face Stylizer model
  * Convert input data to training formats
  * Create [custom jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) to customize new models
  * Export customized models

* Cleanup resources

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Setup Google Cloud project

# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. For finetuning, **[click here](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Frestricted_image_training_nvidia_a100_80gb_gpus)** to check if your project already has the required 8 Nvidia A100 80 GB GPUs in the us-central1 region. If yes, then run this notebook in the us-central1 region. If you do not have 8 Nvidia A100 80 GPUs or have more GPU requirements than this, then schedule your job with Nvidia H100 GPUs via Dynamic Workload Scheduler using [these instructions](https://cloud.google.com/vertex-ai/docs/training/schedule-jobs-dws). For Dynamic Workload Scheduler, check the [us-central1](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Fcustom_model_training_preemptible_nvidia_h100_gpus) or [europe-west4](https://console.cloud.google.com/iam-admin/quotas?location=europe-west4&metric=aiplatform.googleapis.com%2Fcustom_model_training_preemptible_nvidia_h100_gpus) quota for Nvidia H100 GPUs. If you do not have enough GPUs, then you can follow [these instructions](https://cloud.google.com/docs/quotas/view-manage#viewing_your_quota_console) to request quota.

# @markdown 3. For serving, **[click here](https://console.cloud.google.com/iam-admin/quotas?location=us-central1&metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_l4_gpus)** to check if your project already has the required 1 L4 GPU in the us-central1 region.  If yes, then run this notebook in the us-central1 region. If you need more L4 GPUs for your project, then you can follow [these instructions](https://cloud.google.com/docs/quotas/view-manage#viewing_your_quota_console) to request more. Alternatively, if you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus).

# @markdown > | Machine Type | Accelerator Type | Recommended Regions |
# @markdown | ----------- | ----------- | ----------- |
# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |
# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |

# @markdown 4. **[Optional]** [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs. Set the BUCKET_URI for the experiment environment. The specified Cloud Storage bucket (`BUCKET_URI`) should be located in the same region as where the notebook was launched. Note that a multi-region bucket (eg. "us") is not considered a match for a single region covered by the multi-region range (eg. "us-central1"). If not set, a unique GCS bucket will be created instead.

BUCKET_URI = "gs://"  # @param {type:"string"}

# @markdown 5. **[Optional]** Set region. If not set, the region will be set automatically according to Colab Enterprise environment.

REGION = ""  # @param {type:"string"}

import datetime
import importlib
import json
import os
import uuid

from google.cloud import aiplatform

! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

common_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util"
)


# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
if not REGION:
    if not os.environ.get("GOOGLE_CLOUD_REGION"):
        raise ValueError(
            "REGION must be set. See"
            " https://cloud.google.com/vertex-ai/docs/general/locations for"
            " available cloud locations."
        )
    REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Vertex AI API and Compute Engine API, if not already.
print("Enabling Vertex AI API and Compute Engine API.")
! gcloud services enable aiplatform.googleapis.com compute.googleapis.com

# Cloud Storage bucket for storing the experiment artifacts.
# A unique GCS bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, change the value yourself below.
now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])

if BUCKET_URI is None or BUCKET_URI.strip() == "" or BUCKET_URI == "gs://":
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}"
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    ! gsutil mb -l {REGION} {BUCKET_URI}
else:
    assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
    shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep "Location constraint:" | sed "s/Location constraint://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            "Bucket region %s is different from notebook region %s"
            % (bucket_region, REGION)
        )
print(f"Using this GCS Bucket: {BUCKET_URI}")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
MODEL_BUCKET = os.path.join(BUCKET_URI, "mediapipe_face_stylizer")


# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Gets the default SERVICE_ACCOUNT.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)


# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

! gcloud config set project $PROJECT_ID
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/storage.admin"
! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role="roles/aiplatform.user"

REGION_PREFIX = REGION.split("-")[0]
assert REGION_PREFIX in (
    "us",
    "europe",
    "asia",
), f'{REGION} is not supported. It must be prefixed by "us", "asia", or "europe".'

## Train your customized models

In [None]:
# @title Prepare input data for training

# @markdown Retraining the face stylizer model requires user to provide a single stylized face image. The stylized face is expected to be forward facing with visible left right eyes and mouth. The face should only have minor rotation, i.e. less than 30 degress around the yaw, pitch, and roll axes.

# @markdown You can upload an image to Google Cloud Storage or use our [provided example](https://storage.googleapis.com/mediapipe-assets/face_stylizer_style_color_sketch.jpg).

training_data_path = "gs://mediapipe-assets/face_stylizer_style_color_sketch.jpg"  # @param {type:"string"}

## Retrain model

Once you have provided an input image, you can begin retraining the face stylizer model to adapt to the new style. This type of model modification is called transfer learning. The instructions below use the data prepared in the previous section to retrain a face stylizer model to apply cartoon style to the raw human face.

**_NOTE_**: For this type of model, the retraining process causes the model to forget any style it can apply before. Once the retraining is complete, the new model can only apply the new style defined by the new stylized image.


In [None]:
# @title Set training parameters

# @markdown There are a few required settings to run a training aside from your training dataset:
# @markdown * `swap_layers` : The swap_layers parameter is used to determine how to mix the latent code layers between the learned style and the raw face images. The latent code is represented as a tensor of shape [1, 12, 512]. The second dimension of the latent code tensor is called the layer. The face stylizer mixes the learned style and raw face images by generating a weighted sum of the two latent codes on the swap layers. The swap layers are therefore integers within [1, 12]. The more layers are set, the more style will be applied to the output image. Although there is no explicit mapping between the style semantics and the layer index, the shallow layers, e.g. 8, 9, represent the global features of the face, while the deep layers, e.g. 10, 11, represent the fine-grained features of the face. The output stylized image is sensitive to the setting of swap layers. By default, it is set to [8, 9, 10, 11].
# @markdown * `learning_rate and epochs` : Use learning_rate and epochs` to specify the these two hyperparameters. learning_rate is set to 4e-4 by default. epochs defines the number of iterations to fine-tune the BlazeStyleGAN model and are set to 100 by default. The lower the learning rate is, the greater the epochs is expected to retrain the model to converge.
# @markdown * `batch_size` : The batch_size is used to define the number of latent code samples we sample around the latent code extracted by the encoder with the input image. The batch of latent codes are used to fine-tune the decoder. The greater the batch size usually yield to better performance. It is also limited by the hardware memory. For A100 GPU, the maximum batch size is 8. For P100 and T4 GPU, the maximum batch size is 2.

# The layers of feature to be interpolated between encoding features and
# StyleGAN input features.
swap_layers: str = "[8, 9, 10, 11]"  # @param {type:"string"}
# The learning rate to use for gradient descent training.
learning_rate: float = 0.0001  # @param {type:"number"}
# Number of training iterations over the dataset.
epochs: int = 100  # @param {type:"slider", min:0, max:100, step:1}
# Batch size for training.
batch_size: int = 2  # @param {type:"number"}

# @markdown Further more advanced parameters that you can configure are `alpha`, `perception_loss_weight`, `adv_loss_weight`, `beta_1` and `beta_2`.

# Weighting coefficient of style latent for swapping layer interpolation.
# Its valid range is [0, 1]. The greater weight means stronger style is
# applied to the output image. Expect to set it to a small value,
# i.e. < 0.1.
alpha: float = 0.1  # @param {type:"number"}

# Weighting coefficients of image perception quality loss. It contains three
# coefficients, l1, content, and style which control the difference between the
# generated image and raw input image, the content difference between generated
# face and raw input face, and the how similar the style between the generated
# image and raw input image. Users can increase the style weight to enforce
# stronger style or the content weight to reserve more raw input face details.
# Weight for L1 loss.
perception_loss_l1: float = 0.5  # @param {type:"number"}
# Weight for content loss.
perception_loss_content: float = 4.0  # @param {type:"number"}
# Weight for stlye loss.
perception_loss_style: float = 1.0  # @param {type:"number"}

# Weighting coeffcieint of adversarial loss versus image perceptual quality loss.
# This hyperparameter is used to control the realism of the generated image. It
# expects a small value, i.e. < 0.2.
adv_loss_weight: float = 0.2  # @param {type:"number"}
# beta_1 used in tf.keras.optimizers.Adam.
beta_1: float = 0.0  # @param {type:"number"}
# beta_2 used in tf.keras.optimizers.Adam.
beta_2: float = 0.99  # @param {type:"number"}

# @markdown Click "Show code" to see more details.

In [None]:
# @title Run training job

# @markdown With the training dataset and options prepared, you are ready to start the retraining process. This process requires running on GPU and can take a few minutes to a few hours depending on your available compute resources.

# @markdown The training job takes around 5 minutes to complete.

TRAINING_JOB_DISPLAY_NAME = "mediapipe_face_stylizer_%s" % now
TRAINING_CONTAINER = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/mediapipe-train"
TRAINING_MACHINE_TYPE = "n1-highmem-16"
TRAINING_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
TRAINING_ACCELERATOR_COUNT = 2

EXPORTED_MODEL_OUTPUT_DIRECTORY = os.path.join(STAGING_BUCKET, "model")
model_export_path = EXPORTED_MODEL_OUTPUT_DIRECTORY

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAINING_MACHINE_TYPE,
            "accelerator_type": TRAINING_ACCELERATOR_TYPE,
            "accelerator_count": TRAINING_ACCELERATOR_COUNT,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": TRAINING_CONTAINER,
            "command": [],
            "args": [
                "--task_name=face_stylizer",
                "--training_data_path=%s" % training_data_path,
                "--model_export_path=%s" % model_export_path,
                "--evaluation_result_path=%s" % model_export_path,
                "--hparams=%s"
                % json.dumps(
                    {
                        "learning_rate": learning_rate,
                        "batch_size": batch_size,
                        "epochs": epochs,
                        "beta_1": beta_1,
                        "beta_2": beta_2,
                    }
                ),
                "--model_options=%s"
                % json.dumps(
                    {
                        "swap_layers": json.loads(swap_layers),
                        "alpha": alpha,
                        "perception_loss_l1": perception_loss_l1,
                        "perception_loss_content": perception_loss_content,
                        "perception_loss_style": perception_loss_style,
                        "adv_loss_weight": adv_loss_weight,
                    }
                ),
            ],
        },
    }
]

common_util.check_quota(
    project_id=PROJECT_ID,
    region=REGION,
    accelerator_type=TRAINING_ACCELERATOR_TYPE,
    accelerator_count=TRAINING_ACCELERATOR_COUNT,
    is_for_training=True,
)

# Add labels for the finetuning job.
labels = {
    "mg-source": "notebook",
    "mg-notebook-name": "model_garden_mediapipe_face_stylizer.ipynb".split(".")[0],
}

labels["mg-tune"] = "publishers-google-models-mediapipe"
versioned_model_id = "face-stylizer"
labels["versioned-mg-tune"] = f"{labels['mg-tune']}-{versioned_model_id}"

training_job = aiplatform.CustomJob(
    display_name=TRAINING_JOB_DISPLAY_NAME,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
    labels=labels,
)

training_job.run()

## Export model

In [None]:
# @title Export model

# @markdown After retraining the model, you can save the Tensorflow Lite model and integrate it with your on-device application by following the [Face stylization task guide](https://developers.google.com/mediapipe/solutions/vision/face_stylizer).

EXPORTED_MODEL_OUTPUT_FILE = os.path.join(
    EXPORTED_MODEL_OUTPUT_DIRECTORY, "model.tflite"
)


def copy_model(model_source, model_dest):
    ! gsutil cp {model_source} {model_dest}


copy_model(EXPORTED_MODEL_OUTPUT_FILE, "face_stylizer.task")

## Clean up

In [None]:
# @title Clean up training jobs and buckets
# @markdown Delete temporary GCS buckets.

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME

# Delete training data and jobs.
if training_job.list(filter=f'display_name="{TRAINING_JOB_DISPLAY_NAME}"'):
    training_job.delete()