# Lora Trainer XL

In [None]:
# @title ## 1. Install the trainer
import os

root_path = "/content"
trainer_dir = os.path.join(root_path, "trainer")

# @markdown Select the version of the trainer you want to use
trainer_version = "Original" # @param ["Original", "Forked (Added CAME and REX)"]

installed_dependencies = False
first_step_done = False

try:
  print("Installing trainer...")
  !apt -y update -qq
  !apt install -y python3.10-venv aria2 -qq

  installed_dependencies = True

  if trainer_version == "Original":
    !git clone https://github.com/derrian-distro/LoRA_Easy_Training_scripts_Backend {trainer_dir}
  else:
    !git clone https://github.com/Jelosus2/LoRA_Easy_Training_scripts_Backend {trainer_dir}
  !chmod 755 /content/trainer/install_310.sh

  os.chdir(trainer_dir)
  !yes | ./install_310.sh

  if "Forked" in trainer_version:
    print("Patching trainer...")
    !rm ./sd_scripts/library/train_util.py
    !cp ./custom/train_util.py ./sd_scripts/library/train_util.py

  os.chdir(root_path)
  first_step_done = True
  print("Done!")
except:
  print("Error intalling the trainer!")
  first_step_done = False

In [None]:
# @title ## 2. Setup the directories
import os
import shutil
from google.colab import drive

root_path = "/content"
trainer_dir = os.path.join(root_path, "trainer")
drive_dir = os.path.join(root_path, "drive/MyDrive")
pretrained_model_dir = os.path.join(root_path, "pretrained_model")
vae_dir = os.path.join(root_path, "vae")
tagger_models_dir = os.path.join(root_path, "tagger_models")

# @markdown The name for your project. Make sure it can be used as a folder name
project_name = "My_first_lora" # @param {type: "string"}
# @markdown Specify the name for the directories. If you use drive it will be created on /content/drive/MyDrive. If you have multiple datasets separate each with a (,) (dataset1, dataset2, ...)
output_dir_name = "output" # @param {type: "string"}
dataset_dir_name = "dataset" # @param {type: "string"}
config_file_dir_name = "config" # @param {type: "string"}
tags_file_dir_name = "tags" # @param {type: "string"}
# @markdown Use Drive to store all the files and directories
use_drive = True # @param {type: "boolean"}

second_step_done = False

config_dir = os.path.join(root_path, project_name, config_file_dir_name)
tags_dir = os.path.join(root_path, project_name, tags_file_dir_name)

if use_drive:
  config_dir = os.path.join(drive_dir, project_name, config_file_dir_name)
  tags_dir = os.path.join(drive_dir, project_name, tags_file_dir_name)

def mount_drive_dir():
  base_dir = os.path.join(root_path, project_name)

  if use_drive:
    if not os.path.exists(drive_dir):
      drive.mount(os.path.dirname(drive_dir))
    base_dir = os.path.join(drive_dir, project_name)

  return base_dir

def make_directories():
  mount_drive = mount_drive_dir()
  output_dir = os.path.join(mount_drive, output_dir_name)

  if use_drive and os.path.exists(os.path.join(root_path, project_name)):
    shutil.rmtree(os.path.join(root_path, project_name))
  elif not use_drive and os.path.exists(os.path.join(drive_dir, project_name)):
    shutil.rmtree(os.path.join(drive_dir, project_name))

  for dir in [pretrained_model_dir, vae_dir, output_dir, tagger_models_dir, config_dir, tags_dir]:
    os.makedirs(dir, exist_ok=True)

  for dataset_m_dir in dataset_dir_name.replace(" ", "").split(','):
    os.makedirs(os.path.join(mount_drive, dataset_m_dir), exist_ok=True)

def main():
  print("Setting up directories...")
  make_directories()
  print("Done!")

try:
  main()
  second_step_done = True
except:
  print("Error setting up the directories!")
  second_step_done = False

In [None]:
# @title ## 3. Download the base model and/or VAE used for training

model_url = ""
vae_url = ""
model_name = ""

# @markdown Default models provided here for training, if you want to use another one introduce the URL in the input below.
training_model = "(XL) PonyDiffusion v6" # @param ["(XL) PonyDiffusion v6", "(XL) Animagine", "(XL) SDXL 1.0", "(1.5) anime-full-final-pruned (Most used on Anime LoRas)", "(1.5) AnyLora", "(1.5) SD 1.5"]
custom_training_model = "" # @param {type: "string"}
# @markdown VAE for training. Not needed for 1.5 nor XL but it's recommended to use SDXL base VAE on XL training. if you want to use a custom one introduce the URL in the input below.
vae = "SDXL VAE" # @param ["SDXL VAE", "None"]
custom_vae = "" # @param {type: "string"}

if custom_training_model:
  model_url = custom_training_model
elif "Pony" in training_model:
  model_url = "https://civitai.com/api/download/models/290640"
  model_name = "ponydiffusion_v6.safetensors"
elif "Animagine" in training_model:
  model_url = "https://huggingface.co/cagliostrolab/animagine-xl-3.0/resolve/main/animagine-xl-3.0.safetensors"
elif "SDXL" in training_model:
  model_url = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors"
elif "anime" in training_model:
  model_url = "https://huggingface.co/hollowstrawberry/stable-diffusion-guide/resolve/main/models/animefull-final-pruned-fp16.safetensors"
elif "Any" in training_model:
  model_url = "https://huggingface.co/Lykon/AnyLoRA/resolve/main/AnyLoRA_noVae_fp16-pruned.safetensors"
elif "SD 1.5" in training_model:
  model_url = "https://huggingface.co/hollowstrawberry/stable-diffusion-guide/resolve/main/models/sd-v1-5-pruned-noema-fp16.safetensors"

if custom_vae:
  vae_url = custom_vae
elif "SDXL" in vae:
  vae_url = "https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors"

model_file = f"/content/pretrained_model/{model_name}"
vae_file = ""

if not "installed_dependencies" in globals():
  print("Installing missing dependency...")
  !apt -y update -qq
  !apt install -y aria2 -qq
  globals().setdefault("installed_dependencies", True)

if not vae == "None":
  stripped_model_vae = vae_url.strip()

  if stripped_model_vae.lower().endswith((".ckpt", ".safetensors")):
    vae_file = f"content/vae{stripped_model_vae[stripped_model_vae.rfind('/'):]}"
  else:
    vae_file = "/content/vae/downloaded_vae.safetensors"
    if os.path.exists(vae_file):
      !rm "{vae_file}"

stripped_model_url = model_url.strip()

if stripped_model_url.lower().endswith((".ckpt", ".safetensors")):
  model_file = f"content/pretrained_model{stripped_model_url[stripped_model_url.rfind('/'):]}"
elif not "pony" in model_file:
  model_file = "/content/pretrained_model/downloaded_model.safetensors"
  if os.path.exists(model_file):
    !rm "{model_file}"

print(f"Downloading model from {model_url}...")
!aria2c "{model_url}" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "{model_file}"

if not vae == "None":
  print(f"Downloading vae from {vae_url}...")
  !aria2c "{vae_url}" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "{vae_file}"

print("Models downloaded successfully!")

In [None]:
# @title ## 4. Upload your dataset
import os
import zipfile

# @markdown ### Unzip the dataset
# @markdown You can extract your dataset in your dataset directory.

zip_path = "/content/drive/MyDrive/dataset.zip" # @param {type: "string"}
# @markdown If you have multiple dataset directories extract each zip in each dataset directory.
extract_to_dataset_dir = "dataset" # @param {type: "string"}

def extract_dataset():
  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  if not os.path.exists(zip_path):
    print("The path of the zip doesn't exists!")
    return

  if "drive/MyDrive" in zip_path and not os.path.exists(drive_dir):
    print("Your trying to access drive but you didn't mount it!")
    return

  dataset_dir = os.path.join(root_path, project_name, extract_to_dataset_dir)
  if os.path.exists(drive_dir):
    dataset_dir = os.path.join(drive_dir, project_name, extract_to_dataset_dir)

  if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir, exist_ok=True)
    print(f"Created dataset directory on new location because it didn't exist before: {dataset_dir}")

  print("Extracting dataset...")

  with zipfile.ZipFile(zip_path, 'r') as f:
    f.extractall(dataset_dir)

  print(f"Dataset extracted in {dataset_dir}")

extract_dataset()

In [None]:
# @markdown ### Tag your images
import os

method = "Anime" # @param ["Anime", "Photorealistic"]
model = "SmilingWolf/wd-v1-4-swinv2-tagger-v2" # @param ["SmilingWolf/wd-v1-4-swinv2-tagger-v2", "SmilingWolf/wd-v1-4-moat-tagger-v2", "SmilingWolf/wd-v1-4-convnextv2-tagger-v2", "SmilingWolf/wd-v1-4-convnext-tagger-v2", "SmilingWolf/wd-v1-4-vit-tagger-v2"]
dataset_dir_name = "dataset" # @param {type: "string"}
blacklisted_tags = "" # @param {type: "string"}
threshold = 0.35 # @param {type: "slider", min:0.0, max: 1.0, step:0.01}
caption_min = 10 # @param {type: "number"}
caption_max = 75 # @param {type: "number"}

def caption_images():
  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  dataset_dir = os.path.join(root_path, project_name, dataset_dir_name)
  if os.path.exists(drive_dir):
    dataset_dir = os.path.join(drive_dir, project_name, dataset_dir_name)

  sd_scripts = os.path.join(trainer_dir, "sd_scripts")
  if not os.path.exists(sd_scripts):
    print("Please run the step 1 first.")
    return

  try:
    import accelerate
  except Exception:
    print("Installing missing dependencies...")
    !pip install accelerate==0.25.0 diffusers[torch]==0.21.2 einops==0.6.0

  try:
    import timm
  except Exception:
    print("Installing missing dependencies for BLIP Captioning...")
    !pip install timm==0.6.12 fairscale==0.4.13

  print("Tagging images")
  %env PYTHONPATH={sd_scripts}

  if method == "Anime":
    !python {sd_scripts}/finetune/tag_images_by_wd14_tagger.py \
      {dataset_dir} \
      --repo_id={model} \
      --model_dir={tagger_models_dir} \
      --thresh={threshold} \
      --batch_size=8 \
      --max_data_loader_n_workers=2 \
      --caption_extension=.txt \
      --undesired_tags={blacklisted_tags} \
      --force_download \
      --remove_underscore
  else:
    os.chdir(sd_scripts)
    !python finetune/make_captions.py \
      {dataset_dir} \
      --beam_search \
      --max_data_loader_n_workers=2 \
      --batch_size=8 \
      --min_length={caption_min} \
      --max_length={caption_max} \
      --caption_extension=.txt
    os.chdir(root_path)

  %env PYTHONPATH=/env/python
  print("Tagging complete!")

caption_images()

In [None]:
# @title ## 5. Start the training
import os

# @markdown ###TODO: Explain it

def init_training():
  if not os.path.exists(trainer_dir):
    print("Please run the 1st step first.")
    return

  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  os.chdir(trainer_dir)
  !chmod 755 run.sh
  !./run.sh
  os.chdir(root_path)

init_training()