In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -U transformers ftfy gradio accelerate torch

In [None]:
!pip install git+https://github.com/huggingface/diffusers

In [None]:
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py

In [None]:
import inspect
from typing import List, Optional, Union
import numpy as np
import torch
import os
import PIL
import gradio as gr
from diffusers import StableDiffusionInpaintPipeline
import requests
from io import BytesIO
from PIL import Image

In [None]:
def image_grid(imgs, rows, cols, resize=256):

    if resize is not None:
        imgs = [img.resize((resize, resize)) for img in imgs]
    w, h = imgs[0].size
    grid = PIL.Image.new("RGB", size=(cols * w, rows * h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w, i // cols * h))
    return grid

In [None]:
# test vanilla stable diffusion inpainting with empty mask
device = "cuda"
model_path = "stabilityai/stable-diffusion-2-inpainting"

vanilla_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "POV, walkthrough, castle from Frozen, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 3
generator = torch.Generator(device="cuda").manual_seed(1) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = vanilla_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

In [None]:
image_grid(images, 1, num_samples)

In [None]:
prompt = "POV, walkthrough, train from Studio Ghibli's Spirited Away, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 3
generator = torch.Generator(device="cuda").manual_seed(1) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = vanilla_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

In [None]:
image_grid(images, 1, num_samples)

In [None]:
# test vanilla stable diffusion non-inpainting with empty mask
device = "cuda"
model_path = "stabilityai/stable-diffusion-2-1-base"

vanilla_sd_noninpaint = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

prompt = "POV, walkthrough, castle from Frozen, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 3
generator = torch.Generator(device="cuda").manual_seed(0) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = vanilla_sd_noninpaint(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

image_grid(images, 1, num_samples)

In [None]:
local_dir = "/content/drive/MyDrive/3d_project/"

In [None]:
!pip install torch

In [None]:
%cd /content/drive/MyDrive/3d_project/

In [None]:
!accelerate config

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# FROZEN

## dreambooth on stable diffusion 2.1, non-inpaint

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-1-base"  \
    --instance_data_dir="frozen/data" \
    --output_dir="frozen/frozen-stable-diffusion-non-inpaint" \
    --instance_prompt="a photo of sks frozen" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "emily49/frozen-stable-diffusion-non-inpaint"

finetuned_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "POV, walkthrough, castle from sks frozen, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 6
generator = torch.Generator(device="cuda").manual_seed(0) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

In [None]:
image_grid(images, 2, 3)

# dreambooth on stable diffusion 1.5, non inpaint. for testing with control net

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="frozen/data" \
    --output_dir="frozen/frozen-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks frozen" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "emily49/frozen-stable-diffusion-non-inpaint-1-5"

finetuned_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)



In [None]:
prompt = "POV, walkthrough, castle from sks frozen, masterpiece, indoor scene, best quality"

guidance_scale=7.5
num_samples = 3
generator = torch.Generator(device="cuda").manual_seed(4) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
    num_inference_steps=50,
    height=600,
    width=600,
).images

In [None]:
image_grid(images, 1, 3)

# dreambooth on stable diffusion 2, inpaint. tested with 3d pipeline, didn't work well

In [None]:
# finetune an inpainting version too

from pathlib import Path

!accelerate launch train_dreambooth_inpaint.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-inpainting"  \
    --instance_data_dir="frozen/data" \
    --output_dir="frozen/frozen-stable-diffusion-inpaint" \
    --instance_prompt="a photo of sks frozen" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "emily49/frozen-stable-diffusion-inpaint"

finetuned_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

prompt = "POV, walkthrough, castle from sks frozen, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 3
generator = torch.Generator(device="cuda").manual_seed(0) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
    height=600,
    width=600,
).images

In [None]:
image_grid(images, 1, 3)

# SPIRITED AWAY

In [None]:
# no train text encoder
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-1-base"  \
    --instance_data_dir="spirited_away/data" \
    --output_dir="spirited_away/spirited-away-stable-diffusion-non-inpaint-non-text" \
    --instance_prompt="a photo of sks spiritedaway" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1
    # --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "spirited_away/spirited-away-stable-diffusion-non-inpaint-non-text"

finetuned_sd_non_text = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "POV, walkthrough, train from sks spiritedaway, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 6
generator = torch.Generator(device="cuda").manual_seed(1) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd_non_text(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

In [None]:
image_grid(images, 2, 3)

In [None]:
# finetune text encoder
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-1-base"  \
    --instance_data_dir="spirited_away/data" \
    --output_dir="spirited_away/spirited-away-stable-diffusion-non-inpaint" \
    --instance_prompt="a photo of sks spiritedaway" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --train_text_encoder \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "spirited_away/spirited-away-stable-diffusion-non-inpaint"

finetuned_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "POV, walkthrough, train from sks spiritedaway, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 6
generator = torch.Generator(device="cuda").manual_seed(2) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
).images

In [None]:
image_grid(images, 2, 3)

## dreambooth on stable diffusion 2, inpaint

In [None]:
# finetune an inpainting version too

from pathlib import Path

!accelerate launch train_dreambooth_inpaint.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-inpainting"  \
    --instance_data_dir="spirited_away/data" \
    --output_dir="spirited_away/spirited-away-stable-diffusion-inpaint" \
    --instance_prompt="a photo of sks spiritedaway" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


In [None]:
device = "cuda"
model_path = "emily49/spirited-away-stable-diffusion-inpaint"

finetuned_sd = StableDiffusionInpaintPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
).to(device)

In [None]:
prompt = "POV, walkthrough, train from sks spiritedaway, masterpiece, indoor scene"

guidance_scale=7.5
num_samples = 6
generator = torch.Generator(device="cuda").manual_seed(2) # change the seed to get different results

empty_mask = Image.open("/content/drive/MyDrive/3d_project/empty_mask.png").convert("RGB")
images = finetuned_sd(
    prompt=prompt,
    image=empty_mask,
    mask_image=empty_mask, # entire image unmasked
    guidance_scale=guidance_scale,
    generator=generator,
    num_images_per_prompt=num_samples,
    height=600,
    width=600,
).images

In [None]:
image_grid(images, 2, 3)

# dreambooth on stable diffusion 1.5, non inpaint. for testing with control net

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="spirited_away/data" \
    --output_dir="spirited_away/spirited-away-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks spiritedaway" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


# HARRY POTTER

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="harry_potter/data" \
    --output_dir="harry_potter/harry-potter-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks harrypotter" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


# RATATOUILLE

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="ratatouille/data" \
    --output_dir="ratatouille/ratatouille-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks ratatouille" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


# MARNIE

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="marnie/data" \
    --output_dir="marnie/marnie-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks marnie" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"


# BARBIE

In [None]:
# finetune text encoder too
from pathlib import Path

!accelerate launch train_dreambooth.py \
    --train_text_encoder \
    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5"  \
    --instance_data_dir="barbie/data" \
    --output_dir="barbie/barbie-stable-diffusion-non-inpaint-1-5" \
    --instance_prompt="a photo of sks barbie" \
    --resolution=512 \
    --mixed_precision="fp16" \
    --train_batch_size=1 \
    --learning_rate=5e-6 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=0 \
    --max_train_steps=500 \
    --gradient_accumulation_steps=1 \
    --push_to_hub
    #2 ?
    # --train_text_encoder \
 # --seed="0"
