# Imports and parameters

In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import pandas as pd
import tqdm
import torch
import shutil
import sys

from diffusers import DDIMScheduler, DiffusionPipeline, StableDiffusionPipeline
# from lora_diffusion import inject_trainable_lora, extract_lora_ups_down
from pathlib import Path
from PIL import Image
from typing import Optional

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def find_project_root() -> Optional[Path]:
    current = Path(".").resolve()
    
    while True:
        if (current / ".git").exists():
            return current
        
        if current.parent == current:
            print("WARNING: No .git dir found")
            return current
              
        current = current.parent
        

PROJECT_ROOT = find_project_root()

In [3]:
# Params

BASE_MODEL = 'runwayml/stable-diffusion-v1-5'
INPUT_DATA = 'data/external/pokemon'
POKEMON_STATS = 'data/external/pokemon_stats.csv'
TRAIN_DATA = 'data/processed/pokemon'
TARGET_POKEMON_TYPE = "water"
TARGET_POKEMON_GENERATION = 1
MODEL_DIRECTORY = 'models'
LORA_NAME = f"type-{TARGET_POKEMON_TYPE}-generation-{TARGET_POKEMON_GENERATION}"
LORA_PATH = f"{MODEL_DIRECTORY}/{LORA_NAME}"

# Get subset of relevant Pokémon

In [4]:
pokemon = pd.read_csv(PROJECT_ROOT / POKEMON_STATS)
pokemon = pokemon[["pokedex_number", "name", "type1", "type2", "generation"]]

pokemon.head()

if not TARGET_POKEMON_TYPE.lower() in ['all', 'none', '']:
    subset_1 = pokemon.loc[pokemon['type1'] == TARGET_POKEMON_TYPE]
    subset_2 = pokemon.loc[pokemon['type2'] == TARGET_POKEMON_TYPE]
    
    subset = pd.concat([subset_1, subset_2]).sort_values('pokedex_number')
    
if not TARGET_POKEMON_GENERATION in [-1, 0]:
    subset = subset.loc[subset['generation'] == TARGET_POKEMON_GENERATION]
    
subset.head()

Unnamed: 0,pokedex_number,name,type1,type2,generation
6,7,Squirtle,water,,1
7,8,Wartortle,water,,1
8,9,Blastoise,water,,1
53,54,Psyduck,water,,1
54,55,Golduck,water,,1


# Resize training images to desired resolution

In [5]:
# Create directory and clear if it already exists
if not os.path.exists(PROJECT_ROOT / TRAIN_DATA):
    os.mkdir(PROJECT_ROOT / TRAIN_DATA)
else:
    shutil.rmtree(PROJECT_ROOT / TRAIN_DATA)
    os.mkdir(PROJECT_ROOT / TRAIN_DATA)
          
# Resize training images and save to processed directory
for image_name in os.listdir(PROJECT_ROOT/INPUT_DATA):
    
    # Only keep sprites of our defined subset (e.g. water type generation 1)
    pokedex_number = int(image_name.split('.')[0])
    
    if pokedex_number in list(subset['pokedex_number']):
        pokemon_sprite = Image.open(PROJECT_ROOT/ INPUT_DATA / image_name)
        pokemon_sprite_resized = pokemon_sprite.resize((512, 512))

        # Add leading zeroes
        while len(image_name) < 8:
            image_name = "0" + image_name

        pokemon_sprite_resized.save(PROJECT_ROOT / TRAIN_DATA / image_name)


# Train LoRA  (bash)

In [6]:
%%bash -s "$PROJECT_ROOT" "$BASE_MODEL" "$TRAIN_DATA" "$LORA_PATH"

export PROJECT_ROOT=$1
export BASE_MODEL=$2
export TRAIN_DATA=$3
export LORA_PATH=$4

# NEED TO RUN THESE ONCE
# git clone --depth 1 --branch v0.14.0 https://github.com/huggingface/diffusers.git $PROJECT_ROOT/diffusers
# pip3.10 install -r "${PROJECT_ROOT}/diffusers/examples/dreambooth/requirements.txt"
# accelerate config default

# --mps needed for Mac M1+
accelerate launch --mps "${PROJECT_ROOT}/diffusers/examples/dreambooth/train_dreambooth_lora.py" \
  --pretrained_model_name_or_path=$BASE_MODEL  \
  --instance_data_dir=$PROJECT_ROOT/$TRAIN_DATA \
  --output_dir=$PROJECT_ROOT/$LORA_PATH \
  --instance_prompt="Digital art of rcdw pokemon" \
  --resolution=512 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=1 \
  --checkpointing_steps=100 \
  --learning_rate=1e-4 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=500 \
  --validation_prompt="Digital art of rcdw pokemon" \
  --validation_epochs=50 \
  --report_to="wandb" \
  --seed="0"

04/12/2023 11:21:34 - INFO - __main__ - Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: mps
Mixed precision type: no

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
{'clip_sample_range', 'variance_type', 'prediction_type'} was not found in config. Values will be initialized to default values.
{'scaling_factor'} was not found in config. Values will be initialized to default values.
{'projection_class_embeddings_input_dim', 'only_cross_attention', 'conv_in_kernel', 'use_linear_projection', 'num_class_embeds', 'dual_cross_attention', 'upcast_attention', 'timestep_post_act', 'time_cond_proj_dim', 'conv_out_kernel', 'resnet_time_scale_shift', 'time_embedding_type', 'mid_block_type', 'class_embed_type'} was not found in config. Values will be initialized to default values.
wandb: Currently logged in as: robdewit. Use `wandb login --relogin`

04/12/2023 11:37:10 - INFO - accelerate.checkpointing - Optimizer state saved in /Users/robdewit/Documents/text2image/models/type-water-generation-1/checkpoint-500/optimizer.bin
04/12/2023 11:37:10 - INFO - accelerate.checkpointing - Random states saved in /Users/robdewit/Documents/text2image/models/type-water-generation-1/checkpoint-500/random_states_0.pkl
04/12/2023 11:37:10 - INFO - accelerate.checkpointing - Saving the state of AttnProcsLayers to /Users/robdewit/Documents/text2image/models/type-water-generation-1/checkpoint-500/custom_checkpoint_0.pkl
04/12/2023 11:37:10 - INFO - __main__ - Saved state to /Users/robdewit/Documents/text2image/models/type-water-generation-1/checkpoint-500
Steps: 100%|██████████| 500/500 [15:29<00:00,  1.68s/it, loss=0.0907, lr=0.0001]Model weights saved in /Users/robdewit/Documents/text2image/models/type-water-generation-1/pytorch_lora_weights.bin

Fetching 15 files: 100%|██████████| 15/15 [00:00<00:00, 130528.13it/s]
{'requires_safety_checker'} was 

In [None]:
# accelerate launch --mps "diffusers/examples/dreambooth/train_dreambooth_lora.py" \
#   --pretrained_model_name_or_path='runwayml/stable-diffusion-v1-5' \
#   --instance_data_dir='data/processed/pokemon' \
#   --output_dir='models/test1' \
#   --instance_prompt="a drawing of a pokemon" \
#   --resolution=512 \
#   --train_batch_size=1 \
#   --gradient_accumulation_steps=1 \
#   --checkpointing_steps=100 \
#   --learning_rate=1e-4 \
#   --lr_scheduler="constant" \
#   --lr_warmup_steps=0 \
#   --max_train_steps=300 \
#   --validation_prompt="a drawing of a pokemon" \
#   --validation_epochs=50 \
#   --seed="0" \
#   --report_to="wandb"

# Set up SD pipeline

In [None]:
pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)

In [None]:
# Load LoRA on top of base model weights
pipeline.unet.load_attn_procs(model_path)

In [None]:
# These settings work for Apple M1/M2 silicon
# Docs for configuring to your hardware: https://huggingface.co/docs/diffusers/optimization/fp16
pipeline.to("mps")

# Recommended if your computer has < 64 GB of RAM
pipeline.enable_attention_slicing()

# Generate images

In [None]:
generator = torch.Generator().manual_seed(1024)
prompt = "a grass animal in the style of Ken Sugimori"

_ = pipeline(prompt, num_inference_steps=1) # Needed: https://github.com/huggingface/diffusers/issues/372
image = pipeline(prompt, num_inference_steps=30, generator=generator).images[0]

# TODO: https://huggingface.co/docs/diffusers/using-diffusers/reusing_seeds

In [None]:
# imgplot = plt.imshow(images[0])
# plt.show(image)
# # image.save(f"sd-output.png")

display(image)