In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Stable Diffusion V2.1 (Local Dreambooth Finetune)

<table align="left">
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_sd_2_1_local_finetuning_dreambooth.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_sd_2_1_local_finetuning_dreambooth.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook demonstrates how to finetune [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1) with [Dreambooth](https://huggingface.co/docs/diffusers/training/dreambooth) locally in a Colab notebook and to test it with a local `text-2-image` prediction pipeline.

### Objective

- Finetune the stabilityai/stable-diffusion-2-1 model with [Dreambooth](https://huggingface.co/docs/diffusers/training/dreambooth) locally in a notebook.
- Run predictions for text-to-image in a local pipeline.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Colab Enterprise

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloab Enterprise pricing](https://cloud.google.com/colab/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Dreambooth Finetune in a notebook

In [None]:
# @title Check if the Colab VM has GPU

# @markdown **Important:** This notebook requires a GPU runtime to function correctly.
# @markdown The default Colab runtime does not have a GPU and will not work. Please
# @markdown create a GPU runtime by following the instructions at:
# @markdown   1. [Create a runtime template](https://cloud.google.com/vertex-ai/docs/colab/create-runtime-template#create)
# @markdown   1. [Create a runtime](https://cloud.google.com/vertex-ai/docs/colab/create-runtime#create) \
# @markdown
# @markdown Once you have created a GPU runtime, you can use this notebook to run Dreambooth training locally within Colab.

import subprocess

if subprocess.run("nvidia-smi").returncode:
    raise RuntimeError(
        "Cannot communicate with GPU. Make sure you are using a GPU Colab runtime. "
        "Go to the Runtimes menu and select/create a runtime with GPUs."
    )

In [None]:
# @title Prepare the virtual environment

! pip install --upgrade pip
# Git clone the Huggingface diffusers code repo
! git clone --depth 1 --branch v0.25.1 https://github.com/huggingface/diffusers.git

print("Installing diffusers from the source")
! pip install -e /content/diffusers
print("Installing the requirements for dreambooth finetune")
! pip install -r /content/diffusers/examples/dreambooth/requirements.txt

! pip install bitsandbytes==0.43.1

In [None]:
# @title Prepare the example dataset

# @markdown For this example, we'll download some images from Huggingface. If you
# @markdown have already had a dataset you wish to use, please choose the option
# @markdown `Use your own` and upload from your local computer.
#
import glob
import shutil

from google.colab import files
from huggingface_hub import snapshot_download
from PIL import Image

local_dir = "/content/dreambooth/dog/"
! rm -rf $local_dir
! mkdir -p $local_dir

dataset_source = "Huggingface"  # @param ["Huggingface", "Use your own"]
if dataset_source == "Huggingface":
    snapshot_download(
        "diffusers/dog-example",
        local_dir=local_dir,
        repo_type="dataset",
        ignore_patterns=".gitattributes",
    )
else:
    uploaded = files.upload()
    for name, data in uploaded.items():
        shutil.copy2(name, local_dir)


def image_grid(imgs, rows, cols, resize=256):
    if resize is not None:
        imgs = [img.resize((resize, resize)) for img in imgs]
    w, h = imgs[0].size
    grid = Image.new("RGB", size=(cols * w, rows * h))

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w, i // cols * h))
    return grid


# change path to display images from your local dir
img_paths = "/content/dreambooth/dog/*.jpeg"
imgs = [Image.open(path) for path in glob.glob(img_paths)]

num_imgs_to_preview = 5
image_grid(imgs[:num_imgs_to_preview], 1, num_imgs_to_preview)

! rm -rf $local_dir/.huggingface
! ls -alt $local_dir

In [None]:
# @title Train

import locale

locale.getpreferredencoding = lambda: "UTF-8"
! accelerate config default

model_id = "stabilityai/stable-diffusion-2-1" # @param {type:"string"}
instance_prompt = "a photo of sks dog" # @param {type:"string"}
learning_rate = 2e-6 # @param {type:"number"}
resolution = 768 # @param {type:"number"}
train_steps = 200 # @param {type:"number"}
output_dir = "/content/dreambooth/output_dir"

local_dir = "/content/dreambooth/dog/"

!accelerate launch \
  /content/diffusers/examples/dreambooth/train_dreambooth.py \
  --pretrained_model_name_or_path="$model_id" \
  --instance_data_dir="$local_dir" \
  --class_data_dir="$local_dir" \
  --output_dir="$output_dir" \
  --instance_prompt="$instance_prompt" \
  --resolution="$resolution" \
  --learning_rate="$learning_rate" \
  --max_train_steps="$train_steps" \
  --mixed_precision="fp16" \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --gradient_checkpointing \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --use_8bit_adam \
  --checkpointing_steps=100 \
  --seed=42

In [None]:
# @title Load the finetuned model checkpoint to a local diffusion pipeline.

# @markdown `text-to-image` lets you send text prompts to the pipeline to generate images.
import torch
from diffusers import AutoPipelineForText2Image

pipe = AutoPipelineForText2Image.from_pretrained(
    pretrained_model_or_path=output_dir,
    torch_dtype=torch.float16,
    use_safetensors=True,
).to("cuda")

prompt = "a photo of sks dog in a bucket"  # @param {type: "string"}
height = 768  # @param {type:"number"}
width = 768  # @param {type:"number"}
num_inference_steps = 25  # @param {type:"number"}
guidance_scale = 7.5  # @param {type:"number"}

images = pipe(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    guidance_scale=guidance_scale,
).images
display(images[0])