# Lora_Easy_Training_Colab
[![Ko-Fi](https://img.shields.io/badge/Ko--fi-F16061?logo=ko-fi&logoColor=white&style=flat)](https://ko-fi.com/jelosus1)

### Colab powered by [Lora_Easy_Training_Scripts_Backend](https://github.com/derrian-distro/LoRA_Easy_Training_scripts_Backend/)

<h4><font color="red"><u>DISCLAIMER:</u></font> The Forked version of the trainer is not maintained by its original author, please do not open issues there if you encounter any problem. Instead submit them in the <a href="https://github.com/Jelosus2/LoRA_Easy_Training_scripts_Backend/">forked repo</a>.</h4>


---


Learn how to use the colab [here](https://civitai.com/articles/4409)

---

Last Update: March 10, 2024. Check the [full changelog](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#changelog)

Changes:
- Modified the way Forked trainer is installed due to CAME and REX being officially implemented. You have to update the UI (both original or forked) if you want to use any of those, just open a command line in the root folder of the UI and run `git pull`
- Added the newly released v3 taggers and modified the script to make them work.

In [None]:
# @title ## 1. Install the trainer
import os

root_path = "/content"
trainer_dir = os.path.join(root_path, "trainer")

# @markdown Select the version of the trainer you want to use. Forked version is an unofficial version that adds some features like different optimizers, schedulers, etc. You can check the features that are available on it [here](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#additions-of-the-forked-version)
trainer_version = "Original" # @param ["Original", "Forked (Additional features)"]

installed_dependencies = False
first_step_done = False

try:
  print("Installing trainer...")
  !apt -y update -qq
  !apt install -y python3.10-venv aria2 -qq

  installed_dependencies = True

  if trainer_version == "Original":
    !git clone https://github.com/derrian-distro/LoRA_Easy_Training_scripts_Backend {trainer_dir}
  else:
    !git clone https://github.com/Jelosus2/LoRA_Easy_Training_scripts_Backend {trainer_dir}

  !chmod 755 /content/trainer/colab_install.sh
  os.chdir(trainer_dir)
  !./colab_install.sh

  if "Forked" in trainer_version:
    # No use right now
    print("Patching trainer...")

  os.chdir(root_path)
  first_step_done = True
  print("Done!")
except Exception as e:
  print(f"Error intalling the trainer!\n{e}")
  first_step_done = False

In [None]:
# @title ## 2. Setup the directories
import os
import shutil
from google.colab import drive

root_path = "/content"
trainer_dir = os.path.join(root_path, "trainer")
drive_dir = os.path.join(root_path, "drive/MyDrive")
pretrained_model_dir = os.path.join(root_path, "pretrained_model")
vae_dir = os.path.join(root_path, "vae")
tagger_models_dir = os.path.join(root_path, "tagger_models")

# @markdown The name for your project. Make sure it can be used as a folder name
project_name = "My_first_lora" # @param {type: "string"}
# @markdown Specify the name for the directories. If you use Drive, it will be created at the base path of your drive. If you have multiple datasets, separate each with a comma `(,)` like this: **dataset1, dataset2, ...**

# @markdown The directory where the results of the training will be stored.
output_dir_name = "output" # @param {type: "string"}
# @markdown The directory where your dataset(s) will be located.
dataset_dir_name = "dataset" # @param {type: "string"}
# @markdown Use Drive to store all the files and directories
use_drive = True # @param {type: "boolean"}

project_name = project_name.replace(" ", "_")
output_dir_name = output_dir_name.replace(" ", "_")

second_step_done = False

def is_valid_folder_name(folder_name: str) -> bool:
  invalid_characters = '<>:"/\|?*'

  if any(char in invalid_characters for char in folder_name):
    return False

  return True

def mount_drive_dir() -> str:
  base_dir = os.path.join(root_path, project_name)

  if use_drive:
    if not os.path.exists(drive_dir):
      drive.mount(os.path.dirname(drive_dir))
    base_dir = os.path.join(drive_dir, project_name)

  return base_dir

def make_directories():
  mount_drive = mount_drive_dir()
  output_dir = os.path.join(mount_drive, output_dir_name)

  if use_drive and os.path.exists(os.path.join(root_path, project_name)):
    shutil.rmtree(os.path.join(root_path, project_name))
  elif not use_drive and os.path.exists(os.path.join(drive_dir, project_name)):
    shutil.rmtree(os.path.join(drive_dir, project_name))

  for dir in [pretrained_model_dir, vae_dir, output_dir, tagger_models_dir]:
    os.makedirs(dir, exist_ok=True)

  for dataset_m_dir in dataset_dir_name.replace(" ", "").split(','):
    if is_valid_folder_name(dataset_m_dir):
      os.makedirs(os.path.join(mount_drive, dataset_m_dir), exist_ok=True)
    else:
      print(f"{dataset_m_dir} is not a valid name for a folder")
      return

def main():
  for name in [project_name, output_dir_name]:
      if not is_valid_folder_name(name):
        print(f"{name} is not a valid name for a folder")
        return

  print("Setting up directories...")
  make_directories()
  print("Done!")

try:
  main()
  second_step_done = True
except Exception as e:
  print(f"Error setting up the directories!\n{e}")
  second_step_done = False

In [None]:
# @title ## 3. Download the base model and/or VAE used for training
import os
import re

model_url = ""
vae_url = ""
model_name = ""

# @markdown Default models are provided here for training. If you want to use another one, introduce the URL in the input below. The link must be pointing to either Civitai or Hugging Face and have the correct format. You can check how to get the correct link [here](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#how-to-get-the-link-for-custom-modelvae).
training_model = "(XL) PonyDiffusion v6" # @param ["(XL) PonyDiffusion v6", "(XL) Animagine", "(XL) SDXL 1.0", "(1.5) anime-full-final-pruned (Most used on Anime LoRas)", "(1.5) AnyLora", "(1.5) SD 1.5"]
custom_training_model = "" # @param {type: "string"}
# @markdown VAE used for training. It's not needed for 1.5 nor XL, but it's recommended to use the SDXL base VAE for XL training. If you want to use a custom one, introduce the URL in the input below.
vae = "SDXL VAE" # @param ["SDXL VAE", "None"]
custom_vae = "" # @param {type: "string"}

thrid_step_done = False

if custom_training_model:
  model_url = custom_training_model
elif "Pony" in training_model:
  model_url = "https://civitai.com/api/download/models/290640"
  model_name = "ponydiffusion_v6.safetensors"
elif "Animagine" in training_model:
  model_url = "https://huggingface.co/cagliostrolab/animagine-xl-3.0/resolve/main/animagine-xl-3.0.safetensors"
elif "SDXL" in training_model:
  model_url = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors"
elif "anime" in training_model:
  model_url = "https://huggingface.co/hollowstrawberry/stable-diffusion-guide/resolve/main/models/animefull-final-pruned-fp16.safetensors"
elif "Any" in training_model:
  model_url = "https://huggingface.co/Lykon/AnyLoRA/resolve/main/AnyLoRA_noVae_fp16-pruned.safetensors"
elif "SD 1.5" in training_model:
  model_url = "https://huggingface.co/hollowstrawberry/stable-diffusion-guide/resolve/main/models/sd-v1-5-pruned-noema-fp16.safetensors"

if custom_vae:
  vae_url = custom_vae
elif "SDXL" in vae:
  vae_url = "https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors"

model_file = ""
vae_file = ""

if not "installed_dependencies" in globals():
  print("Installing missing dependency...")
  !apt -y update -qq
  !apt install -y aria2 -qq
  globals().setdefault("installed_dependencies", True)

def download_model(model_url, model_name):
  global model_file

  if re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*", model_url):
    model_url = model_url.replace("blob", "resolve")
  elif re.search(r"https:\/\/civitai\.com\/models\/\d+", model_url):
    if m := re.search(r"modelVersionId=(\d+)", model_url):
      model_url = f"https://civitai.com/api/download/models/{m.group(1)}"
  elif not re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*", model_url) and not re.search(r"https:\/\/civitai\.com\/api\/download\/models\/(\d+)", model_url):
    print("Invalid model download URL!\nCheck how to get the correct link in https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#how-to-get-the-link-for-custom-modelvae")
    return

  stripped_model_url = model_url.strip()

  if model_name:
    model_file = f"/content/pretrained_model/{model_name}"
  elif stripped_model_url.lower().endswith((".ckpt", ".safetensors")):
    model_file = f"/content/pretrained_model{stripped_model_url[stripped_model_url.rfind('/'):]}"
  elif not "pony" in model_file:
    model_file = "/content/pretrained_model/downloaded_model.safetensors"
    if os.path.exists(model_file):
      !rm "{model_file}"

  print(f"Downloading model from {model_url}...")
  !aria2c "{model_url}" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "{model_file}"

def download_vae(vae_url):
  global vae_file

  if not vae == "None":
    if re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*", vae_url):
      vae_url = vae_url.replace("blob", "resolve")
    elif re.search(r"https:\/\/civitai\.com\/models\/\d+", vae_url):
      if m := re.search(r"modelVersionId=(\d+)", vae_url):
        vae_url = f"https://civitai.com/api/download/models/{m.group(1)}"
    elif not re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*", vae_url) and not re.search(r"https:\/\/civitai\.com\/api\/download\/models\/(\d+)", vae_url):
      print("Invalid VAE download URL!\nCheck how to get the correct link in https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#how-to-get-the-link-for-custom-modelvae")
      return

    stripped_model_vae = vae_url.strip()

    if stripped_model_vae.lower().endswith((".ckpt", ".safetensors")):
      vae_file = f"/content/vae{stripped_model_vae[stripped_model_vae.rfind('/'):]}"
    else:
      vae_file = "/content/vae/downloaded_vae.safetensors"
      if os.path.exists(vae_file):
        !rm "{vae_file}"

    print(f"Downloading vae from {vae_url}...")
    !aria2c "{vae_url}" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "{vae_file}"
  else:
    vae_file = ""

try:
  download_model(model_url, model_name)
  download_vae(vae_url)
  thrid_step_done = True
except Exception as e:
  print(f"Failed to download the models\n{e}")
  thrid_step_done = False

In [None]:
# @title ## 4. Upload your dataset
import os
import re
import zipfile

# @markdown ### Unzip the dataset
# @markdown If you have a dataset in a zip file, you can specify the path to it below. This will extract the dataset into the dataset directory specified in step 2. It supports downloading the zip from **HuggingFace**. To get the correct link you only need to follow the steps [for models/VAEs](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#from-huggingface) but applying them to the zip file.

zip_path = "/content/drive/MyDrive/dataset.zip" # @param {type: "string"}
# @markdown Specify the name of your dataset directory. If it doesn't exist, it will be created. If you have multiple dataset directories, extract each zip file into its respective dataset directory.
extract_to_dataset_dir = "dataset" # @param {type: "string"}

if not "installed_dependencies" in globals():
  print("Installing missing dependency...")
  !apt -y update -qq
  !apt install -y aria2 -qq
  globals().setdefault("installed_dependencies", True)

def extract_dataset():
  global zip_path
  is_from_hf = False

  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  if zip_path.startswith("https://huggingface.co/"):
    is_from_hf = True

  if not os.path.exists(zip_path) and not is_from_hf:
    print("The path of the zip doesn't exists!")
    return

  if "drive/MyDrive" in zip_path and not os.path.exists(drive_dir):
    print("Your trying to access drive but you didn't mount it!")
    return


  dataset_dir = os.path.join(root_path, project_name, extract_to_dataset_dir)
  if os.path.exists(drive_dir):
    dataset_dir = os.path.join(drive_dir, project_name, extract_to_dataset_dir)

  if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir, exist_ok=True)
    print(f"Created dataset directory on new location because it didn't exist before: {dataset_dir}")

  if is_from_hf and re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*\.zip", zip_path):
    print("Zip file from HuggingFace detected, attempting to download...")
    !aria2c "{zip_path}" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "/content/dataset.zip"
    zip_path = "/content/dataset.zip"
  elif is_from_hf and not re.search(r"https:\/\/huggingface\.co\/.*(?:resolve|blob).*\.zip", zip_path):
    print("Invalid URL provided for downloading the zip file.")
    return

  print("Extracting dataset...")

  with zipfile.ZipFile(zip_path, 'r') as f:
    f.extractall(dataset_dir)

  print(f"Dataset extracted in {dataset_dir}")

  if is_from_hf:
    print("Removing temporary zip file...")
    !rm "{zip_path}"
    print("Done!")

extract_dataset()

In [None]:
# @markdown ### Tag your images
import os

# @markdown As the name suggests, this is the type of tagging you want for your dataset.
method = "Anime" # @param ["Anime", "Photorealistic"]
# @markdown `(Only applies to Anime method)` The default model used for tagging is `SmilingWolf/wd-swinv2-tagger-v3`. I find it more accurate than its v2 version, but if you have experience, you can use another one and tweak the parameters. If you don't, the default configuration should be fine.
model = "SmilingWolf/wd-swinv2-tagger-v3" # @param ["SmilingWolf/wd-swinv2-tagger-v3", "SmilingWolf/wd-vit-tagger-v3", "SmilingWolf/wd-convnext-tagger-v3", "SmilingWolf/wd-v1-4-swinv2-tagger-v2", "SmilingWolf/wd-v1-4-moat-tagger-v2", "SmilingWolf/wd-v1-4-convnextv2-tagger-v2", "SmilingWolf/wd-v1-4-convnext-tagger-v2", "SmilingWolf/wd-v1-4-vit-tagger-v2"]
# @markdown The directory name of the dataset you want to tag. You can specify another directory when the previous one is fully tagged, in case you have more than one dataset.
dataset_dir_name = "dataset" # @param {type: "string"}
# @markdown `(Only applies to Anime method)` Specify the tags that you don't want the autotagger to use. Separate each one with a comma `(,)` like this: **1girl, solo, standing, ...**
blacklisted_tags = "" # @param {type: "string"}
# @markdown `(Only applies to Anime method)` Specify the minimum confidence level required for assigning a tag to the image. A lower threshold results in more tags being assigned. The recommended default value for v2 taggers is 0.35 and for v3 is 0.25.
threshold = 0.25 # @param {type: "slider", min:0.0, max: 1.0, step:0.01}
# @markdown `(Only applies to Photorealistic method)` Specify the minimum number of words (also known as tokens) to include in the captions.
caption_min = 10 # @param {type: "number"}
# @markdown `(Only applies to Photorealistic method)` Specify the maximum number of words (also known as tokens) to include in the captions.
caption_max = 75 # @param {type: "number"}

blacklisted_tags = blacklisted_tags.replace(" ", "")

def caption_images():
  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  dataset_dir = os.path.join(root_path, project_name, dataset_dir_name)
  if os.path.exists(drive_dir):
    dataset_dir = os.path.join(drive_dir, project_name, dataset_dir_name)

  sd_scripts = os.path.join(trainer_dir, "sd_scripts")
  if not os.path.exists(sd_scripts):
    print("Please run the step 1 first.")
    return

  wd_path = os.path.join(sd_scripts, "finetune", "tag_images_by_wd14_tagger.py")

  try:
    import accelerate
  except Exception:
    print("Installing missing dependencies...")
    !pip install accelerate==0.25.0 diffusers[torch]==0.21.2 einops==0.6.0 onnx==1.15.0
    !pip install onnxruntime-gpu==1.17.1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/

    !rm "{wd_path}"
    !aria2c "https://raw.githubusercontent.com/Jelosus2/Lora_Easy_Training_Colab/main/custom/tag_images_by_wd14_tagger.py" --console-log-level=warn -c -s 16 -x 16 -k 10M -d / -o "{wd_path}"

  try:
    import timm
  except Exception:
    print("Installing missing dependencies for BLIP Captioning...")
    !pip install timm==0.6.12 fairscale==0.4.13

  use_onnx = True if "v3" in model else False
  model_dir = os.path.join(tagger_models_dir, model.split("/")[-1])

  print("Tagging images")
  %env PYTHONPATH={sd_scripts}

  if method == "Anime":
    !python {wd_path} \
      {dataset_dir} \
      --repo_id={model} \
      --model_dir={model_dir} \
      --thresh={threshold} \
      --batch_size=8 \
      --max_data_loader_n_workers=2 \
      --caption_extension=.txt \
      --undesired_tags={blacklisted_tags} \
      --remove_underscore \
      {"--onnx" if use_onnx else ""}
  else:
    os.chdir(sd_scripts)
    !python finetune/make_captions.py \
      {dataset_dir} \
      --beam_search \
      --max_data_loader_n_workers=2 \
      --batch_size=8 \
      --min_length={caption_min} \
      --max_length={caption_max} \
      --caption_extension=.txt
    os.chdir(root_path)

  %env PYTHONPATH=/env/python
  print("Tagging complete!")

caption_images()

In [None]:
# @title ## 5. Start the training
import os

# @markdown Execute this cell to obtain the paths. Input these paths into the UI to start the training.

def print_paths():
  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  if not globals().get("thrid_step_done"):
    print("You didn't complete the thrid step!")
    return

  dataset_dirs = []
  project_base_dir = os.path.join(root_path, project_name)
  if globals().get("use_drive"):
    project_base_dir = os.path.join(drive_dir, project_name)

  for id, p_dataset_m_dir in enumerate(dataset_dir_name.replace(" ", "").split(',')):
    dataset_dirs.append(f"Dataset directory {id + 1}: {os.path.join(project_base_dir, p_dataset_m_dir)}")

  model_path = model_file
  vae_path = vae_file or "None"
  output_path = os.path.join(project_base_dir, output_dir_name)

  print("Dataset paths:\n  {0}\nModel path: {1}\nVAE path: {2}\nOutput path: {3}\nConfig file path: {4}\nTags file path: {4}".format('\n  '.join(dataset_dirs), model_path.replace(" ", ""), vae_path, output_path, "It's saved locally on your machine"))

print_paths()

In [None]:
import os
import json

# @markdown Running this cell will create a tunnel that allows you to connect from your local UI. If you don't have it installed, please install it [here](https://github.com/derrian-distro/LoRA_Easy_Training_Scripts). If you are using the forked version of the trainer, you can install it [here](https://github.com/Jelosus2/LoRA_Easy_Training_Colab_Frontend). [Instructions for installation](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#how-to-install-the-ui). Once you launch the UI, set up your training parameters, copy the given URL into your interface, and click "Start training".


# @markdown `(Optional)` Ngrok is an alternative method, and you need a token that you can obtain from [Ngrok's dashboard](https://dashboard.ngrok.com/get-started/your-authtoken). I recommend using it only if you want, have experience, or if the default tunnel provider is down. [How to obtain Ngrok token](https://github.com/Jelosus2/LoRA_Easy_Training_Colab?tab=readme-ov-file#how-to-obtain-the-ngrok-token)

use_ngrok = False # @param {type: "boolean"}
ngrok_token = "" # @param {type: "string"}

def init_training():
  if not os.path.exists(trainer_dir):
    print("Please run the 1st step first.")
    return

  if not globals().get("second_step_done"):
    print("You didn't complete the second step!")
    return

  config_file = os.path.join(trainer_dir, "config.json")

  if use_ngrok:
    if not ngrok_token:
      print("The ngrok token must not be empty!")
      return

    with open(config_file, 'r') as config:
      data = json.load(config)

    data["remote_mode"] = "ngrok"
    data["ngrok_token"] = ngrok_token

    with open(config_file, 'w') as config:
      json.dump(data, config, indent=2)
  else:
    with open(config_file, 'r') as config:
      data = json.load(config)

    if data["remote_mode"] == "ngrok":
      data["remote_mode"] = "cloudflared"
      data["ngrok_token"] = ""

      with open(config_file, 'w') as config:
        json.dump(data, config, indent=2)

  os.chdir(trainer_dir)
  !chmod 755 run.sh
  !./run.sh
  os.chdir(root_path)


init_training()