<a href="https://colab.research.google.com/github/Linaqruf/kohya-trainer/blob/main/kohya-LoRA-finetuner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kohya LoRA Fine-Tuning


This notebook has been adapted for use in Google Colab based on [kohya-ss/sd-scripts](https://github.com/kohya-ss/sd-scripts). </br>
This notebook was adapted by [Linaqruf](https://github.com/Linaqruf)</br>
You can find the latest update to the notebook [here](https://github.com/Linaqruf/kohya-trainer/blob/main/kohya-trainer.ipynb).


# Install Kohya Trainer

In [None]:
#@title Clone Kohya Trainer
#@markdown Clone the Kohya Trainer repository from GitHub and check for updates

%cd /content/

import os

def clone_kohya_trainer():
  # Check if the directory already exists
  if os.path.isdir('/content/kohya-trainer'):
    %cd /content/kohya-trainer
    print("This folder already exists, will do a !git pull instead\n")
    !git pull
  else:
    !git clone https://github.com/Linaqruf/kohya-trainer

# Clone or update the Kohya Trainer repository
clone_kohya_trainer()

In [None]:
#@title Installing Dependencies
%cd /content/kohya-trainer

import os

Install_xformers = True #@param {'type':'boolean'}
  
def install_dependencies():
  #@markdown This will install required Python packages
  !pip install --upgrade -r requirements.txt
  !pip install -U gallery-dl
  !pip install imjoy-elfinder

  if Install_xformers:
    !pip install -U -I --no-deps https://github.com/camenduru/stable-diffusion-webui-colab/releases/download/0.0.15/xformers-0.0.15.dev0+189828c.d20221207-cp38-cp38-linux_x86_64.whl
  else:
    pass

# Install dependencies
install_dependencies()

#@markdown After Accelerate updated its version to 0.15.0, you can't manually input the config using
#@markdown `!accelerate config` in Google Colab. Instead, a `config.yaml` file will be generated by
#@markdown the `write_basic_config()` function. You can find the file [here](/content/kohya-trainer/accelerate_config/config.yaml) after installation.
#@markdown if you want to modify it.

from accelerate.utils import write_basic_config
accelerate_config = "/content/kohya-trainer/accelerate_config/config.yaml"
write_basic_config(save_location = accelerate_config) # Write a config file

In [None]:
#@title Install Special File Explorer for Colab
#@markdown this will work real-time even though you're running other cells
import threading
from google.colab import output
from imjoy_elfinder.app import main

# start imjoy-elfinder server
thread = threading.Thread(target=main, args=[["--root-dir=/content", "--port=8765"]])
thread.start()

open_in_new_tab = True #@param {type:"boolean"}

if open_in_new_tab:
  # open imjoy-elfinder in a new tab
  output.serve_kernel_port_as_window(8765)
else:
  # view the 
  output.serve_kernel_port_as_iframe(8765, height='500')


# Prepare Cloud Storage (Huggingface/GDrive)

In [None]:
#@title Login to Huggingface hub

#@markdown ## Instructions:
#@markdown 1. Of course, you need a Huggingface account first.
#@markdown 2. To create a huggingface token, go to [this link](https://huggingface.co/settings/tokens), then `create new token` or copy available token with the `Write` role.

%cd /content/kohya-trainer


from huggingface_hub import login

write_token = "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" #@param {type:"string"}
login(write_token, add_to_git_credential=True)



In [None]:
#@title Mount Google Drive

from google.colab import drive

mount_drive = True #@param {'type':'boolean'}

if mount_drive:
  drive.mount('/content/drive')

# Collecting datasets

You can either upload your datasets to this notebook or use the image scraper below to bulk download images from Danbooru.

If you want to use your own datasets, you can upload to colab `local files`.


In [None]:
#@title Define Train Data Directory
#@markdown Define where your train data will be located. This cell will also create a folder based on your input. 
#@markdown This folder will be used as the target folder for scraping, tagging, bucketing, and training in the next cell.

import os

train_data_dir = "/content/fine_tune/train_data" #@param {'type' : 'string'}

if not os.path.exists(train_data_dir):
    os.makedirs(train_data_dir)
else:
    print(f"{train_data_dir} already exists\n")

print(f"Your train data directory : {train_data_dir}")


In [None]:
#@title Download compressed (.zip) dataset (Optional)


#@markdown ### Define Download Parameter
datasets_url = "https://huggingface.co/datasets/Linaqruf/pixiv-niji-journey/resolve/main/nijijourney_pixiv_2022110620221222_preprocessed.zip" #@param {'type': 'string'}
dataset_dst = '/content/train_data.zip' #@param{'type':'string'}
#@markdown ### Define Auto-Unzip Parameter
extract_to = '/content/fine_tune/train_data' #@param{'type':'string'}
unzip_module = "use_7zip" #@param ["use_unzip","use_7zip","use_Zipfile"]

def download_and_unzip_dataset(url, zip_file, extract_to, unzip_module):
  try:
    # Download dataset
    if url.startswith("https://drive.google.com"):
      # Use gdown to download file from Google Drive
      !gdown -o "{zip_file}" --fuzzy "{url}"
    elif url.startswith("magnet:?"):
      install_aria()
      # Use aria2c to download file from magnet link
      !aria2c --summary-interval=10 -c -x 10 -k 1M -s 10 -o "{zip_file}" "{url}"
    else:
      user_token = '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'
      user_header = f"\"Authorization: Bearer {user_token}\""
      # Use wget to download file from URL
      !wget -c -O "{zip_file}" --header={user_header} "{url}"

    # Unzip dataset
    if unzip_module == "use_7zip":
      !7z x $zip_file -o$extract_to
    elif unzip_module == "use_unzip":
      !unzip $zip_file -d $extract_to
    elif unzip_module == "use_Zipfile":
      import zipfile
      with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
  except Exception as e:
    print("An error occurred while downloading or unzipping the file:", e)

# Call download_and_unzip_dataset function
download_and_unzip_dataset(datasets_url, dataset_dst, extract_to, unzip_module)

In [None]:
#@title Clone Datasets Repository (Optional)
%cd /content/

#@markdown ### Define Parameters
repository_url = "https://huggingface.co/datasets/Linaqruf/momoko-tag"  #@param {'type': 'string'}

#@markdown ### Leave it empty if your datasets is on `main` branch
branch = "" #@param {'type': 'string'}

!git lfs install
if branch != "":
  !git clone --branch {branch} {repository_url}
else:
  !git clone {repository_url}


In [None]:
#@title Simple Booru Scraper
#@markdown Use gallery-dl to scrape images from a booru site using the specified tags
import os
import html

%cd /content

# Set configuration options
train_data_dir = "/content/fine_tune/train_data" #@param {'type' : 'string'}
booru = "Gelbooru" #@param ["", "Danbooru", "Gelbooru"]
tag1 = "hito_komoru" #@param {type: "string"}
tag2 = "" #@param {type: "string"}
download_tags = False #@param {type: "boolean"}
# Construct the search query
if tag2 != "":
  tags = tag1 + "+" + tag2
else:
  tags = tag1

if download_tags == True:
  write_tags = "--write-tags"
else:
  write_tags = ""

# Scrape images from the specified booru site using the given tags
if booru.lower() == "danbooru":
  !gallery-dl "https://danbooru.donmai.us/posts?tags={tags}" {write_tags} -D {train_data_dir}
elif booru.lower() == "gelbooru":
  !gallery-dl "https://gelbooru.com/index.php?page=post&s=list&tags={tags}" {write_tags} -D {train_data_dir}
else:
  print(f"Unknown booru site: {booru}")

if download_tags == True: 
  # Get a list of all the .txt files in the folder
  files = [f for f in os.listdir(train_data_dir) if f.endswith(".txt")]

  # Loop through each file
  for file in files:
      file_path = os.path.join(train_data_dir, file)

      # Read the contents of the file
      with open(file_path, "r") as f:
          contents = f.read()

      # Decode HTML entities and replace _ with a space
      contents = html.unescape(contents)
      contents = contents.replace("_", " ")

      # Split the contents on newline characters and join with commas
      contents = ", ".join(contents.split("\n"))

      # Write the modified contents back to the file
      with open(file_path, "w") as f:
          f.write(contents)


In [None]:
#@title Datasets cleaner
#@markdown This will delete unnecessary files and unsupported media like `.mp4`, `.webm`, and `.gif`

%cd /content

import os

train_data_dir = "/content/fine_tune/train_data" #@param {'type' : 'string'}

test = os.listdir(train_data_dir)

#@markdown I recommend to `keep_metadata` especially if you're doing resume training and you have metadata and bucket latents file from previous training like `.npz`, `.txt`, `.caption`, and `json`.
keep_metadata = True #@param {'type':'boolean'}

# List of supported file types
if keep_metadata == True:
  supported_types = [".jpg", ".jpeg", ".png", ".caption", ".npz", ".txt", "json"]
else:
  supported_types = [".jpg", ".jpeg", ".png"]

# Iterate over all files in the directory
for item in test:
    # Extract the file extension from the file name
    file_ext = os.path.splitext(item)[1]
    # If the file extension is not in the list of supported types, delete the file
    if file_ext not in supported_types:
        # Print a message indicating the name of the file being deleted
        print(f"Deleting file {item} from {train_data_dir}")
        # Delete the file
        os.remove(os.path.join(train_data_dir, item))


# Dataset Labeling

In [None]:
#@title Auto-captioning and auto-tagging
%cd /content/kohya-trainer/finetune

#@markdown We're using [BLIP](https://huggingface.co/spaces/Salesforce/BLIP) for image captioning and [Waifu Diffusion 1.4 Tagger](https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags) for image tagging like danbooru.

train_data_dir = "/content/fine_tune/train_data" #@param {'type' : 'string'}

#@markdown Tick this if you want to label your dataset with natural language like this: <br>
#@markdown `a girl with long hair holding a cellphone`

Start_BLIP_Captioning = True #@param {type:"boolean"}

#@markdown or Tick this if you want to label your dataset with danbooru tag like this: <br>
#@markdown `1girl, solo, looking_at_viewer, short_hair, bangs, simple_background, shirt, black_hair, white_background, closed_mouth, choker, hair_over_one_eye, head_tilt, grey_eyes, black_shirt, floating_hair, black_choker, eyes_visible_through_hair, portrait`

Start_WD_1_4_Tagger = True #@param {type:"boolean"}

#@markdown or you can use them both

batch_size = 8

if Start_BLIP_Captioning == True:
  !python make_captions.py \
    {train_data_dir} \
    --batch_size {batch_size} \
    --caption_extension .caption
else:
  pass

if Start_WD_1_4_Tagger == True:
  !python tag_images_by_wd14_tagger.py \
    {train_data_dir} \
    --batch_size {batch_size} \
    --caption_extension .txt
else:
  pass
    

In [None]:
#@title Append Custom Tag
%cd /content/

import os

def clone_random_repo():
  # Check if the directory already exists
  if os.path.isdir('/content/cafe-aesthetic-scorer/'):
    %cd /content/cafe-aesthetic-scorer/
    print("This folder already exists, will do a !git pull instead\n")
    !git pull
  else:
    !git clone https://github.com/Linaqruf/cafe-aesthetic-scorer/

clone_random_repo()

%cd /content/cafe-aesthetic-scorer/
#@markdown If you want to append custom tag, you can do that here. This cell will add custom tag at the beginning of lines
train_data_dir = "/content/fine_tune/train_data" #@param {type:"string"}
custom_tag = "fumo" #@param {type:"string"}
caption_extension = "txt" #@param ["txt","caption"]
#@markdown Tick this if you want to append custom tag at the end of lines instead
append = False #@param {type: "boolean"}

if append:
  append_tag = "--append"
else:
  append_tag = ""

!python custom_tagger.py \
  {train_data_dir} \
  {caption_extension} \
  {custom_tag} \
  {append_tag} 
  
  

In [None]:
#@title Create meta_clean.json 
# Change the working directory
%cd /content/kohya-trainer/finetune


#@markdown ### Define Parameters
train_data_dir = "/content/fine_tune/train_data" #@param {type:"string"}
parent_folder = os.path.dirname(train_data_dir)
meta_cap_dd = f"{parent_folder}/meta_cap_dd.json" 
meta_cap = f"{parent_folder}/meta_cap.json" 
meta_clean = f"{parent_folder}/meta_clean.json" #@param {type:"string"}
#@markdown This cell will merge all dataset label from captioning, tagging, and custom tagging into one JSON file, and later it will be used as input JSON for bucketing section.

# Check if the train_data_dir exists and is a directory
if os.path.isdir(train_data_dir):
  # Check if there are any .caption files in the train_data_dir
  if any(file.endswith('.caption') for file in os.listdir(train_data_dir)):
    # Create meta_cap.json from captions
    !python merge_captions_to_metadata.py \
      {train_data_dir} \
      {meta_cap}

  # Check if there are any .txtn files in the train_data_dir
  if any(file.endswith('.txt') for file in os.listdir(train_data_dir)):
    # Create meta_cap_dd.json from tags
    !python merge_dd_tags_to_metadata.py \
      {train_data_dir} \
      {meta_cap_dd}
else:
  print("train_data_dir does not exist or is not a directory.")

# Merge meta_cap.json to meta_cap_dd.json
if os.path.exists(meta_cap) and os.path.exists(meta_cap_dd):
  !python merge_dd_tags_to_metadata.py \
    {train_data_dir} \
    --in_json {meta_cap} \
    {meta_cap_dd}

# Clean meta_cap_dd.json and store it to meta_clean.json
if os.path.exists(meta_cap_dd):
  # Clean captions and tags in meta_cap_dd.json and store the result in meta_clean.json
  !python clean_captions_and_tags.py \
    {meta_cap_dd} \
    {meta_clean}
elif os.path.exists(meta_cap):
  # If meta_cap_dd.json does not exist, clean meta_cap.json and store the result in meta_clean.json
  !python clean_captions_and_tags.py \
    {meta_cap} \
    {meta_clean}


# Prepare Training

In [None]:
#@title Install Pre-trained Model 
%cd /content/
import os

# Check if directory exists
if not os.path.exists('pre_trained_model'):
  # Create directory if it doesn't exist
  os.makedirs('pre_trained_model')

#@title Install Pre-trained Model 

installModels = []
installVae = []
installVaeArgs = []
installv2Models = []

#@markdown ### Available Model
#@markdown Select one of available pretrained model to download:
#@markdown ### SD1.x model
modelUrl = ["", \
            "https://huggingface.co/Linaqruf/personal_backup/resolve/main/animeckpt/model-pruned.ckpt", \
            "https://huggingface.co/Linaqruf/anything-v3.0/resolve/main/Anything-V3.0-pruned.ckpt", \
            "https://huggingface.co/Linaqruf/anything-v3-better-vae/resolve/main/any-v3-fp32-better-vae.ckpt", \
            "https://huggingface.co/Rasgeath/self_made_sauce/resolve/main/Kani-anime-pruned.ckpt", \
            "https://huggingface.co/hesw23168/SD-Elysium-Model/resolve/main/Elysium_Anime_V2.ckpt", \
            "https://huggingface.co/prompthero/openjourney-v2/resolve/main/openjourney-v2.ckpt", \
            "https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0/resolve/main/dreamlike-diffusion-1.0.ckpt", \
            "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt"]
modelList = ["", \
             "Animefull-final-pruned", \
             "Anything-V3", \
             "Anything-V3-better-vae", \
             "Kani-anime", \
             "Elysium-anime-V2", \
             "OpenJourney-V2", \
             "Dreamlike-diffusion-V1-0", \
             "Stable-Diffusion-v1-5"]
modelName = "Anything-V3-better-vae" #@param ["", "Animefull-final-pruned", "Anything-V3", "Anything-V3-better-vae", "Kani-anime", "Elysium-anime-V2", "OpenJourney-V2", "Dreamlike-diffusion-V1-0", "Stable-Diffusion-v1-5"]

#@markdown ### SD2.x model
v2ModelUrl = ["", \
              "https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.ckpt", \
              "https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.ckpt", \
              "https://huggingface.co/hakurei/waifu-diffusion-v1-4/resolve/main/wd-1-4-anime_e1.ckpt"]
v2ModelList = ["", \
              "stable-diffusion-2-1-base", \
              "stable-diffusion-2-1-768v", \
              "waifu-diffusion-1-4-anime-e-1"]
v2ModelName = "" #@param ["", "stable-diffusion-2-1-base", "stable-diffusion-2-1-768v", "waifu-diffusion-1-4-anime-e-1"]

#@markdown ### Custom model
#@markdown The model URL should be a direct download link.

customName = "" #@param {'type': 'string'}
customUrl = "" #@param {'type': 'string'}
#@markdown Change this part with your own huggingface token to download private model
hf_token = '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' #@param {type:"string"}
user_header = f"\"Authorization: Bearer {hf_token}\""
#@markdown Select one of the VAEs to download, select `none` for not download VAE:
vaeUrl = ["", \
          "https://huggingface.co/Linaqruf/personal_backup/resolve/main/animevae/animevae.pt", \
          "https://huggingface.co/hakurei/waifu-diffusion-v1-4/resolve/main/vae/kl-f8-anime.ckpt", \
          "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.ckpt"]
vaeList = ["none", \
           "anime.vae.pt", \
           "waifudiffusion.vae.pt", \
           "stablediffusion.vae.pt"]
vaeName = "none" #@param ["none", "anime.vae.pt", "waifudiffusion.vae.pt", "stablediffusion.vae.pt"]

# Check if user has specified a custom model
if customName != "" and customUrl != "":
  # Add custom model to list of models to install
  installModels.append((customName, customUrl))

# Check if user has selected a model
if modelName != "":
  # Map selected model to URL
  installModels.append((modelName, modelUrl[modelList.index(modelName)]))

# Check if user has selected a model
if v2ModelName != "":
  # Map selected model to URL
  installv2Models.append((v2ModelName, v2ModelUrl[v2ModelList.index(v2ModelName)]))

installVae.append((vaeName, vaeUrl[vaeList.index(vaeName)]))

def install_aria():
  # Install aria2 if it is not already installed
  if not os.path.exists('/usr/bin/aria2c'):
    !apt install -y -qq aria2

def install(checkpoint_name, url):
  if url.endswith(".ckpt"):
    dst = "/content/pre_trained_model/" + str(checkpoint_name) + ".ckpt"
  elif url.endswith(".safetensors"):
    dst = "/content/pre_trained_model/" + str(checkpoint_name) + ".safetensors"
  elif url.endswith(".pt"):
    dst = "/content/pre_trained_model/" + str(checkpoint_name)
  else:
    dst = "/content/pre_trained_model/" + str(checkpoint_name) + ".ckpt"

  if url.startswith("https://drive.google.com"):
    # Use gdown to download file from Google Drive
    !gdown --fuzzy -O  {dst} "{url}"
  elif url.startswith("magnet:?"):
    install_aria()
    # Use aria2c to download file from magnet link
    !aria2c --summary-interval=10 -c -x 10 -k 1M -s 10 -o {dst} "{url}"
  elif url.startswith("https://huggingface.co/"):
    # Use wget to download file from Hugging Face
    !wget -c --header={user_header} "{url}" -O {dst}
  else:
    # Use wget to download file from URL
    !wget -c "{url}" -O {dst}

def install_checkpoint():
  # Iterate through list of models to install
  for model in installModels:
    # Call install function for each model
    install(model[0], model[1])

  # Iterate through list of models to install
  for v2model in installv2Models:
    # Call install function for each v2model
    install(v2model[0], v2model[1])
    
  if vaeName != "none":
    for vae in installVae:
      install(vae[0], vae[1])
  else:
    pass

# Call install_checkpoint function to download all models in the list
install_checkpoint()

# Troubleshooting

file_path = "/content/pre_trained_model/waifudiffusion.vae.pt.ckpt"

if os.path.exists(file_path):
    # File exists, so rename it
    new_file_path = "/content/pre_trained_model/waifudiffusion.vae.pt"
    os.rename(file_path, new_file_path)
else:
    # File does not exist, so do nothing
    pass


In [None]:
#@title Aspect Ratio Bucketing

# Change working directory
%cd /content/kohya-trainer/finetune

#@markdown ### Define parameters
V2 = False #@param{type:"boolean"}
train_data_dir = "/content/fine_tune/train_data" #@param {type:"string"}
model_dir = "/content/pre_trained_model/Anything-V3-better-vae.ckpt" #@param {'type' : 'string'} 
input_json = "/content/fine_tune/meta_clean.json" #@param {'type' : 'string'} 
output_json = "/content/fine_tune/meta_lat.json"#@param {'type' : 'string'} 
batch_size = 4 #@param {'type':'integer'}
max_resolution = "512,512" #@param ["512,512", "640,640", "768,768"] {allow-input: false}
mixed_precision = "no" #@param ["no", "fp16", "bf16"] {allow-input: false}

if V2:
  SDV2 = "--v2"
else:
  SDV2 = ""
# Run script to prepare buckets and latents
!python prepare_buckets_latents.py \
  {train_data_dir} \
  {input_json} \
  {output_json} \
  {model_dir} \
  {SDV2} \
  --batch_size {batch_size} \
  --max_resolution {max_resolution} \
  --mixed_precision {mixed_precision}



  

# Start Training



In [None]:
#@title Define Important folder
import os
V2 = "none" #@param ["none", "V2_base", "V2_768_v"] {allow-input: false}
pre_trained_model_path ="/content/pre_trained_model/Anything-V3-better-vae.ckpt" #@param {'type':'string'}
meta_lat_json_dir = "/content/fine_tune/meta_lat.json" #@param {'type':'string'}
train_data_dir = "/content/fine_tune/train_data" #@param {'type':'string'}
output_dir ="/content/fine_tune/output" #@param {'type':'string'}

# Check if directory exists
if not os.path.exists(output_dir):
  # Create directory if it doesn't exist
  os.makedirs(output_dir)

# List of important folder paths
folder_paths = [
    pre_trained_model_path,
    meta_lat_json_dir,
    train_data_dir,
    output_dir,
]

# Check if each folder exists
for folder_path in folder_paths:
    if folder_path:
        try:
            if os.path.exists(folder_path):
                print(f'{folder_path} can be used, located at {os.path.dirname(folder_path)}')
            else:
                pass
        except:
            print(f'An error occurred while checking if {folder_path} exists')
    else:
        print('Empty folder path')

inference_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/"

if V2 == "V2_base":
  v2_model = "--v2"
  v2_768v_model= ""
  inference_url += "v2-inference.yaml"
elif V2 == "V2_768_v":
  v2_model = "--v2"
  v2_768v_model = "--v2_parameterization"
  inference_url += "v2-inference-v.yaml"
else:
  v2_model = ""
  v2_768v_model = ""

try:
  if V2 != "none":
    !wget {inference_url} -O {output_dir}/last.yaml
    print("File successfully downloaded")
except:
  print("There was an error downloading the file. Please check the URL and try again.")



In [None]:
#@title Define Specific LoRA Training parameter
#@markdown ## LoRA - Low Rank Adaptation Fine-Tuning
#@markdown If you're following `https://rentry.org/lora_train` guide, they set `network_dim` to `128`, you can change it yourself or use default parameter
network_dim = 4 #@param {'type':'number'}
network_module = "network.lora" #@param {'type':'string'}
#@markdown `Specify network_weights for resume training`
network_weights = "/content/fine_tune/output/last.safetensors" #@param {'type':'string'}
network_train_on = "all" #@param ['all','unet_only', 'text_encoder_only'] {'type':'string'}
#@markdown When neither `--network_train_unet_only` nor `--network_train_text_encoder_only` is specified (default), both Text Encoder and U-Net LoRA modules are enabled.

unet_lr = 0 #@param {'type':'number'}
text_encoder_lr = 0 #@param {'type':'number'}
#@markdown Leave the value to 0 (zero) to use default learning rate

print("Load network module :", network_module)
print(f"{network_module} dim set to :", network_dim)

if network_weights == "":
  network_weights_value =""
  print("No LoRA weight loaded")
else:
  network_weights_value ="--network_weights " + str(network_weights)
  print("Load LoRA weight: ", network_weights)

if network_train_on == "unet_only":
  lora_module_to_train = "--network_train_unet_only"
  print("Enable LoRA for U-Net")
  print("Disable LoRA for Text Encoder")
elif network_train_on == "text_encoder_only":
  lora_module_to_train = "--network_train_text_encoder_only"
  print("Disable LoRA for U-Net")
  print("Enable LoRA for Text Encoder")
else:
  lora_module_to_train = ""
  print("Enable LoRA for U-Net")
  print("Enable LoRA for Text Encoder")

if unet_lr == 0:
  unet_lr_value = ""
else:
  unet_lr_value = "--unet_lr" + "=" + "{}".format(unet_lr)

if text_encoder_lr == 0:
  text_encoder_lr_value = ""
else:
  text_encoder_lr_value = "--text_encoder_lr" + "=" + "{}".format(text_encoder_lr)

In [None]:
#@title Training begin
#@markdown ### Define Parameters
import glob 
import math

accelerate_config = "/content/kohya-trainer/accelerate_config/config.yaml"
num_cpu_threads_per_process = 8 #@param {'type':'integer'}
train_batch_size = 4  #@param {type: "slider", min: 1, max: 10}
learning_rate ="1e-4" #@param {'type':'string'}
num_epoch = 10 #@param {'type':'integer'}
dataset_repeats = 1 #@param {'type':'integer'}
resolution = 512 #@param {'type':'integer'}
lr_scheduler = "constant" #@param  ["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"] {allow-input: false}
max_token_length = "225" #@param  ["150", "225"] {allow-input: false}
clip_skip = 2 #@param {type: "slider", min: 1, max: 10}
mixed_precision = "fp16" #@param ["no","fp16","bf16"] {allow-input: false}
save_model_as = 'safetensors' #@param ['default', 'ckpt', 'pt', 'safetensors'] {'type':'string'}
save_precision = 'fp16' #@param ['none','float', 'fp16', 'bf16'] {'type':'string'}
save_every_n_epochs = 1 #@param {'type':'integer'}
gradient_accumulation_steps = 1 #@param {type: "slider", min: 1, max: 10}
#@markdown ### Log And Debug
log_prefix = "LoRA-fine-tune-style1" #@param {'type':'string'}
logs_dst = "/content/fine_tune/training_logs" #@param {'type':'string'}
debug_mode = False #@param {'type':'boolean'}

# Hidden Config
save_state = False
resume_path = ""
#V2 Inference

if V2 == "none":
  penultimate_layer = "--clip_skip" + "=" + "{}".format(clip_skip)
else:
  penultimate_layer = ""

save_model_as_value_mapping = {
    "default": "",
    "ckpt": "--save_model_as=ckpt",
    "pt": "--save_model_as=pt",
    "safetensors": "--save_model_as=safetensors"
}
save_model_as_value = save_model_as_value_mapping[save_model_as]

save_state_value_mapping = {True: "--save_state", False: ""}
save_state_value = save_state_value_mapping[save_state]

if resume_path == "":
  resume_value = ""
else:
  resume_value = "--resume " + str(resume_path)

save_precision_value_mapping = {
    "none": "",
    "float": "--save_precision=float",
    "fp16": "--save_precision=fp16",
    "bf16": "--save_precision=bf16"
}
save_precision_value = save_precision_value_mapping[save_precision]

debug_mode_value_mapping = {True: "--debug", False: ""}
debug_mode_value = debug_mode_value_mapping[debug_mode]

# Get number of valid images
image_num = len(glob.glob(train_data_dir + "/*.npz"))

print("Total Train Data =", image_num)
print("Total Epoch=", num_epoch)
print("Dataset repeats =", dataset_repeats, "x")
repeats = image_num * dataset_repeats
print("Total Repeats =", image_num, "*", dataset_repeats, "=", repeats)

# calculate max_train_steps
max_train_steps = math.ceil(repeats / train_batch_size * num_epoch)
print("max_train_steps =", repeats, "/", train_batch_size, "*", num_epoch ,"=", max_train_steps, "\n")

%cd /content/kohya-trainer

!accelerate launch \
  --config_file {accelerate_config} \
  --num_cpu_threads_per_process {num_cpu_threads_per_process} \
  train_network.py \
  {v2_model} \
  {v2_768v_model} \
  --network_module=networks.lora \
  --network_dim {network_dim} \
  {network_weights_value} \
  {lora_module_to_train} \
  {unet_lr_value} \
  {text_encoder_lr_value} \
  --pretrained_model_name_or_path={pre_trained_model_path} \
  --in_json {meta_lat_json_dir} \
  --train_data_dir={train_data_dir} \
  --output_dir={output_dir} \
  --resolution {resolution} \
  --shuffle_caption \
  --train_batch_size={train_batch_size} \
  --learning_rate={learning_rate} \
  --lr_scheduler={lr_scheduler} \
  --max_token_length={max_token_length} \
  {penultimate_layer} \
  --mixed_precision={mixed_precision} \
  --dataset_repeats {dataset_repeats} \
  --max_train_steps={max_train_steps} \
  --use_8bit_adam \
  --xformers \
  --gradient_checkpointing \
  --gradient_accumulation_steps {gradient_accumulation_steps} \
  {save_model_as_value} \
  {save_state_value} \
  {resume_value} \
  --save_every_n_epochs {save_every_n_epochs} \
  {save_precision_value} \
  {debug_mode_value} \
  --logging_dir={logs_dst} \
  --log_prefix {log_prefix}



# Extras

Popular Negative Prompt:<br>
`lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry`

Waifu Diffusion 1.4 Negative Prompt:<br>

`worst quality, low quality, medium quality, deleted, lowres, comic, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry`

In [None]:
#@title Inference
#@markdown LoRA Config
network_dim = 4 #@param {'type':'number'}
network_weights = "/content/fine_tune/output/last.safetensors" #@param {'type':'string'}
#@markdown `Choose range from 0 to 1.0`
network_mul = 1.0 #@param {'type':'number'}
#@markdown Other Config
V2 = "none" #@param ["none", "V2_base", "V2_768_v"] {allow-input: false}
prompt = "masterpiece, best quality, 1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt" #@param {type: "string"}
negative = "worst quality, low quality, medium quality, deleted, lowres, comic, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry" #@param {type: "string"}
model = "/content/pre_trained_model/Anything-V3-better-vae.ckpt" #@param {type: "string"}
vae = "" #@param {type: "string"}
output_folder = "/content/tmp" #@param {type: "string"}
scale = 12 #@param {type: "slider", min: 1, max: 40}
sampler = "ddim" #@param ["ddim", "pndm", "lms", "euler", "euler_a", "heun", "dpm_2", "dpm_2_a", "dpmsolver","dpmsolver++", "dpmsingle", "k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a"]
steps = 28 #@param {type: "slider", min: 1, max: 100}
precision = "fp16" #@param ["fp16", "bf16"] {allow-input: false}
width = 512 #@param {type: "integer"}
height = 768 #@param {type: "integer"}
images_per_prompt = 4 #@param {type: "integer"}
batch_size = 4 #@param {type: "integer"}
clip_skip = 2 #@param {type: "slider", min: 1, max: 40}
seed = -1 #@param {type: "integer"}

if vae == "":
  load_vae =""
else:
  load_vae ="--vae " + str(vae)

if V2 == "V2_base":
  v2_model = "--v2"
  v2_768v_model= ""
elif V2 == "V2_768_v":
  v2_model = "--v2"
  v2_768v_model = "--v2_parameterization"
else:
  v2_model = ""
  v2_768v_model = ""

if V2 == "none":
  penultimate_layer = "--clip_skip" + "=" + "{}".format(clip_skip)
else:
  penultimate_layer = ""

if seed <= 0:
  seed_number = ""
else:
  seed_number = "--seed" + "=" + "{}".format(seed)

%cd /content/kohya-trainer
!python gen_img_diffusers.py \
  {v2_model} \
  {v2_768v_model} \
  --ckpt {model} \
  --outdir {output_folder} \
  --xformers \
  {load_vae} \
  --{precision} \
  --W {width} \
  --H {height} \
  {seed_number} \
  --scale {scale} \
  --sampler {sampler} \
  --steps {steps} \
  --max_embeddings_multiples 3 \
  --batch_size {batch_size} \
  --images_per_prompt {images_per_prompt} \
  {penultimate_layer} \
  --network_module=networks.lora \
  --network_weight="{network_weights}" \
  --network_mul 1.0 \
  --network_dim {network_dim} \
  --prompt "{prompt} --n {negative}"



In [None]:
#@title Visualize loss graph (Optional)
training_logs_path = "/content/fine_tune/training_logs" #@param {type : "string"}

%cd /content/kohya-trainer
%load_ext tensorboard
%tensorboard --logdir {training_logs_path}

## Commit trained model to Huggingface

### To Commit models:
1. Create a huggingface repository for your model.
2. Clone your model to this Colab session.
3. Move the necessary files to your repository to save your trained model to huggingface. These files are located in `/content/fine_tune/output` folder:
   - `epoch-nnnnn.ckpt/.safetensors/.pt` and/or
   - `last.ckpt/.safetensors/.pt`
4. Commit your model to huggingface.

### To Commit datasets:
1. Create a huggingface repository for your datasets.
2. Clone your datasets to this Colab session.
3. Move the necessary files to your repository so that you can resume training without rebuilding your dataset with this notebook:
  - The `train_data` folder.
  - The `meta_lat.json` file.
  - The `last-state` folder.
4. Commit your datasets to huggingface.



In [None]:
#@title Clone Model or Datasets

#@markdown Type of item to clone (model or dataset)
type_of_item = "dataset" #@param ["model", "dataset"]

#@markdown Install or uninstall git lfs
install_git_lfs = False #@param {'type':'boolean'}

%cd /content
username = "your-username" #@param {'type': 'string'}
model_repo = "your-model-repo" #@param {'type': 'string'}
datasets_repo = "your-dataset-repo" #@param {'type': 'string'}

if type_of_item == "model":
  Repository_url = f"https://huggingface.co/{username}/{model_repo}"
elif type_of_item == "dataset":
  Repository_url = f"https://huggingface.co/datasets/{username}/{datasets_repo}"

if install_git_lfs:
  !git lfs install
else:
  !git lfs uninstall
!git clone {Repository_url}


In [None]:
#@title Commit Model or Datasets to Huggingface

#@markdown Type of item to commit (model or dataset)
type_of_item = "model" #@param ["model", "dataset"]

%cd /content
#@markdown Go to your model or dataset path
item_path = "your-cloned-repo-name-or-path" #@param {'type': 'string'}

#@markdown #Git Commit

#@markdown Set **git commit identity**
email = "your-email" #@param {'type': 'string'}
name = "your-huggingface-username" #@param {'type': 'string'}
#@markdown Set **commit message**
commit_m = "feat: upload prototype model" #@param {'type': 'string'}

%cd {item_path}
!git lfs install
!huggingface-cli lfs-enable-largefiles .
!git add .
!git lfs help smudge
!git config --global user.email "{email}"
!git config --global user.name "{name}"
!git commit -m "{commit_m}"
!git push
