In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden MediaPipe with Face Stylizer

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_mediapipe_face_stylizer.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_mediapipe_face_stylizer.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_mediapipe_face_stylizer.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
Open in Vertex AI Workbench
    </a>
  </td>
</table>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

**_NOTE_**: The checkpoint and the dataset linked in this Colab are not owned or distributed by Google, and are made available by third parties. Please review the terms and conditions made available by the third parties before using the checkpoint and data.

## Overview

This notebook demonstrates how to use [MediaPipe Model Maker](https://developers.google.com/mediapipe/solutions/model_maker) to customize an on-device face stylizer model in Vertex AI Model Garden.

The MediaPipe face stylizer solution provides several models you can use immediately to transform the face to the styles including (cartoon, oil painting, etc.) in your application. However, if you need to transfer the face to an unseen style not covered by the provided models, you can customize the pretrained model with your own data and MediaPipe Model Maker. This model modification tool fine-tune a portion of the model using data you provide. This method is faster than training a new model from scatch and can produce a model adapt to your specific application.

The following sections show you how to use Model Maker to retrain a pre-built model for face stylization with your own data on Vertex AI, which you can then use with the MediaPipe Face Stylizer.

### Objective

* Customize a Face Stylizer model
  * Convert input data to training formats
  * Create [custom jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) to customize new models
  * Export customized models

* Cleanup resources

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Before you begin

### Colab only
Run the following commands to install dependencies and to authenticate with Google Cloud if running on Colab.

In [None]:
! pip3 install --upgrade pip

import sys

if "google.colab" in sys.modules:
    ! pip3 install --upgrade google-cloud-aiplatform

    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

    from google.colab import auth as google_auth

    google_auth.authenticate_user()

#### Set your project ID

**If you don't know your project ID**, see the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"  # @param {type: "string"}
REGION_PREFIX = REGION.split("-")[0]
assert REGION_PREFIX in (
    "us",
    "europe",
    "asia",
), f'{REGION} is not supported. It must be prefixed by "us", "asia", or "europe".'

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

### Import libraries

In [None]:
import json
import os
from datetime import datetime

from google.cloud import aiplatform

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [None]:
now = datetime.now().strftime("%Y%m%d-%H%M%S")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temp/%s" % now)


EXPORTED_MODEL_OUTPUT_DIRECTORY = os.path.join(STAGING_BUCKET, "model")
EXPORTED_MODEL_OUTPUT_FILE = os.path.join(
    EXPORTED_MODEL_OUTPUT_DIRECTORY, "model.tflite"
)

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

### Define training machine specs

In [None]:
TRAINING_JOB_DISPLAY_NAME = "mediapipe_face_stylizer_%s" % now
TRAINING_CONTAINER = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/mediapipe-train"
TRAINING_MACHINE_TYPE = "n1-highmem-16"
TRAINING_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
TRAINING_ACCELERATOR_COUNT = 2

## Train your customized models

### Prepare input data for training

Retraining the face stylizer model requires user to provide a single stylized face image. The stylized face is expected to be forward facing with visible left right eyes and mouth. The face should only have minor rotation, i.e. less than 30 degress around the yaw, pitch, and roll axes.

You can upload an image to Google Cloud Storage or use our [provided example](https://storage.googleapis.com/mediapipe-assets/face_stylizer_style_color_sketch.jpg).

In [None]:
training_data_path = "gs://mediapipe-assets/face_stylizer_style_color_sketch.jpg"  # @param {type:"string"}

# Retrain model

Once you have provided an input image, you can begin retraining the face stylizer model to adapt to the new style. This type of model modification is called transfer learning. The instructions below use the data prepared in the previous section to retrain a face stylizer model to apply cartoon style to the raw human face.

**_NOTE_**: For this type of model, the retraining process causes the model to forget any style it can apply before. Once the retraining is complete, the new model can only apply the new style defined by the new stylized image.


## Set retraining options
There are a few required settings to run a retraining aside from your training dataset:

* **Swap layers:** The `swap_layers` parameter is used to determine how to mix the latent code layers between the learned style and the raw face images. The latent code is represented as a tensor of shape [1, 12, 512]. The second dimension of the latent code tensor is called the layer. The face stylizer mixes the learned style and raw face images by generating a weighted sum of the two latent codes on the swap layers. The swap layers are therefore integers within [1, 12]. The more layers are set, the more style will be applied to the output image. Although there is no explicit mapping between the style semantics and the layer index, the shallow layers, e.g. 8, 9, represent the global features of the face, while the deep layers, e.g. 10, 11, represent the fine-grained features of the face. The output stylized image is sensitive to the setting of swap layers. By default, it is set to [8, 9, 10, 11].
* **Learning rate and epochs:** Use `learning_rate` and epochs` to specify the these two hyperparameters. learning_rate is set to 4e-4 by default. epochs defines the number of iterations to fine-tune the BlazeStyleGAN model and are set to 100 by default. The lower the learning rate is, the greater the epochs is expected to retrain the model to converge.
* **Batch size:** The `batch_size` is used to define the number of latent code samples we sample around the latent code extracted by the encoder with the input image. The batch of latent codes are used to fine-tune the decoder. The greater the batch size usually yield to better performance. It is also limited by the hardware memory. For A100 GPU, the maximum batch size is 8. For P100 and T4 GPU, the maximum batch size is 2.

Further more advanced parameters that you can configure are `alpha`, `perception_loss_weight`, `adv_loss_weight`, `beta_1` and `beta_2`.

In [None]:
# The layers of feature to be interpolated between encoding features and
# StyleGAN input features.
swap_layers: str = "[8, 9, 10, 11]"  # @param {type:"string"}
# The learning rate to use for gradient descent training.
learning_rate: float = 0.0001  # @param {type:"number"}
# Number of training iterations over the dataset.
epochs: int = 100  # @param {type:"slider", min:0, max:100, step:1}
# Batch size for training.
batch_size: int = 2  # @param {type:"number"}


# Other supported options

# Weighting coefficient of style latent for swapping layer interpolation.
# Its valid range is [0, 1]. The greater weight means stronger style is
# applied to the output image. Expect to set it to a small value,
# i.e. < 0.1.
alpha: float = 0.1  # @param {type:"number"}

# Weighting coefficients of image perception quality loss. It contains three
# coefficients, l1, content, and style which control the difference between the
# generated image and raw input image, the content difference between generated
# face and raw input face, and the how similar the style between the generated
# image and raw input image. Users can increase the style weight to enforce
# stronger style or the content weight to reserve more raw input face details.
# Weight for L1 loss.
perception_loss_l1: float = 0.5  # @param {type:"number"}
# Weight for content loss.
perception_loss_content: float = 4.0  # @param {type:"number"}
# Weight for stlye loss.
perception_loss_style: float = 1.0  # @param {type:"number"}

# Weighting coeffcieint of adversarial loss versus image perceptual quality loss.
# This hyperparameter is used to control the realism of the generated image. It
# expects a small value, i.e. < 0.2.
adv_loss_weight: float = 0.2  # @param {type:"number"}
# beta_1 used in tf.keras.optimizers.Adam.
beta_1: float = 0.0  # @param {type:"number"}
# beta_2 used in tf.keras.optimizers.Adam.
beta_2: float = 0.99  # @param {type:"number"}

### Run retraining
With your training dataset and retraining options prepared, you are ready to start the retraining process. This process requires running on GPU and can take a few minutes to a few hours depending on your available compute resources. On Vertex AI with GPU processing, the example retraining below takes about 2 minutes.

To begin the fine-tuning process, use the following code:


In [None]:
model_export_path = EXPORTED_MODEL_OUTPUT_DIRECTORY

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAINING_MACHINE_TYPE,
            "accelerator_type": TRAINING_ACCELERATOR_TYPE,
            "accelerator_count": TRAINING_ACCELERATOR_COUNT,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": TRAINING_CONTAINER,
            "command": [],
            "args": [
                "--task_name=face_stylizer",
                "--training_data_path=%s" % training_data_path,
                "--model_export_path=%s" % model_export_path,
                "--evaluation_result_path=%s" % model_export_path,
                "--hparams=%s"
                % json.dumps(
                    {
                        "learning_rate": learning_rate,
                        "batch_size": batch_size,
                        "epochs": epochs,
                        "beta_1": beta_1,
                        "beta_2": beta_2,
                    }
                ),
                "--model_options=%s"
                % json.dumps(
                    {
                        "swap_layers": json.loads(swap_layers),
                        "alpha": alpha,
                        "perception_loss_l1": perception_loss_l1,
                        "perception_loss_content": perception_loss_content,
                        "perception_loss_style": perception_loss_style,
                        "adv_loss_weight": adv_loss_weight,
                    }
                ),
            ],
        },
    }
]

training_job = aiplatform.CustomJob(
    display_name=TRAINING_JOB_DISPLAY_NAME,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

training_job.run()

## Export model
After retraining the model, you can save the Tensorflow Lite model and integrate it with your on-device application by following the [Face stylization task guide](https://developers.google.com/mediapipe/solutions/vision/face_stylizer).

In [None]:
import sys

def copy_model(model_source, model_dest):
    ! gsutil cp {model_source} {model_dest}

copy_model(EXPORTED_MODEL_OUTPUT_FILE, "face_stylizer.task")

if "google.colab" in sys.modules:
    from google.colab import files

    files.download("face_stylizer.task")

## Clean up

In [None]:
# Delete training data and jobs.
if training_job.list(filter=f'display_name="{TRAINING_JOB_DISPLAY_NAME}"'):
    training_job.delete()

!gsutil rm -r {STAGING_BUCKET}