# Установка зависимостей

In [None]:
import os
%store -r

root_dir = "/content"
%store root_dir
repo_dir = str(root_dir)+"/kohya-trainer"
%store repo_dir
tools_dir = str(root_dir)+"/kohya-trainer/tools"
%store tools_dir 
finetune_dir = str(root_dir)+"/kohya-trainer/finetune"
%store finetune_dir
training_dir = str(root_dir)+"/dreambooth"
%store training_dir

repo_url = "https://github.com/Linaqruf/kohya-trainer"


%cd {root_dir}
!git clone {repo_url} {repo_dir}
os.makedirs(repo_dir, exist_ok=True)
os.makedirs(tools_dir, exist_ok=True)
os.makedirs(finetune_dir, exist_ok=True)
os.makedirs(training_dir, exist_ok=True)


In [None]:
%store -r

%cd {repo_dir}

accelerate_config = os.path.join(repo_dir, "accelerate_config/config.yaml")
%store accelerate_config

def install_dependencies():
    !pip -q install --upgrade gallery-dl gdown imjoy-elfinder
    !apt -q install liblz4-tool aria2
    !pip -q install --upgrade -r requirements.txt
    !pip install xformers

    from accelerate.utils import write_basic_config
    if not os.path.exists(accelerate_config):
        write_basic_config(save_location=accelerate_config)

install_dependencies()


In [None]:
#@title ## 1.3 Вход в Huggingface hub
from huggingface_hub import login
%store -r

#@markdown Введите свой huggingface-token
write_token = "" #@param {type:"string"}
login(write_token, add_to_git_credential=True)

%store write_token


In [None]:
#@title ## 1.4 Скачивание SD 1.5
import os
%store -r

%cd {root_dir}

modelUrl = "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt"
modelName = "Stable-Diffusion-v1-5"

vaeUrl = 'https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.ckpt'
vaeName = "stablediffusion.vae.pt" 

def install_model(checkpoint_name, url):
  ext = "ckpt" if url.endswith(".ckpt") else "safetensors"

  hf_token = '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' 
  user_header = f"\"Authorization: Bearer {hf_token}\""
  !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -d {root_dir}/pre_trained_model -o {checkpoint_name}.{ext} "{url}"

def install_vae(vae_name, url):
  hf_token = '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'
  user_header = f"\"Authorization: Bearer {hf_token}\""
  !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -o vae/{vae_name} "{url}"


install_model(modelName, modelUrl)
install_vae(vaeName, vaeUrl)

In [None]:
#@title ## 1.5 Объявление директории для датасета
%store -r

parent_directory = "/content/dreambooth/train_data" #@param {type: "string"}
%store parent_directory
reg_folder_directory = os.path.join(os.path.dirname(parent_directory), "reg_data")
%store reg_folder_directory

reg_repeats = 1 #@param {type: "integer"}
train_repeats = 22 #@param {type: "integer"}
concept_name = "LOGOGENA" #@param {type: "string"}
class_name = "logo_generator" #@param {type: "string"}

def get_folder_name(repeats, class_name, concept_name=None):
  if class_name:
    return f"{repeats}_{concept_name} {class_name}" if concept_name else f"{repeats}_{class_name}"
  return f"{repeats}_{concept_name}"

train_folder = get_folder_name(train_repeats, class_name, concept_name=concept_name)
reg_folder = get_folder_name(reg_repeats, class_name)

train_data_dir = os.path.join(parent_directory, train_folder)
reg_data_dir = os.path.join(reg_folder_directory, reg_folder)

os.makedirs(parent_directory, exist_ok=True)
os.makedirs(reg_folder_directory, exist_ok=True)
os.makedirs(train_data_dir, exist_ok=True)
os.makedirs(reg_data_dir, exist_ok=True)


In [None]:
#@title ## 4.1. Конвентирование RGBA в RGB 

import random
from tqdm import tqdm
import concurrent.futures
from PIL import Image
random_color = False #@param {type:"boolean"}

batch_size = 32 #@param {type:"number"}

images = [image for image in os.listdir(train_data_dir) if image.endswith('.png') or image.endswith('.webp')]
background_colors = [(255, 255, 255), 
                     (0, 0, 0), 
                     (255, 0, 0), 
                     (0, 255, 0), 
                     (0, 0, 255), 
                     (255, 255, 0), 
                     (255, 0, 255), 
                     (0, 255, 255)]

def process_image(image_name):
    img = Image.open(f'{train_data_dir}/{image_name}')

    if img.mode in ('RGBA', 'LA'):
        if random_color:
          background_color = random.choice(background_colors)
        else:
          background_color = (255, 255, 255)
        bg = Image.new('RGB', img.size, background_color)
        bg.paste(img, mask=img.split()[-1])

        if image_name.endswith('.webp'):
            bg = bg.convert('RGB')
            bg.save(f'{train_data_dir}/{image_name.replace(".webp", ".jpg")}', "JPEG")
            os.remove(f'{train_data_dir}/{image_name}')
            print(f" Converted image: {image_name} to {image_name.replace('.webp', '.jpg')}")
        else:
            bg.save(f'{train_data_dir}/{image_name}', "PNG")
            print(f" Converted image: {image_name}")
    else:
        if image_name.endswith('.webp'):
            img.save(f'{train_data_dir}/{image_name.replace(".webp", ".jpg")}', "JPEG")
            os.remove(f'{train_data_dir}/{image_name}')
            print(f" Converted image: {image_name} to {image_name.replace('.webp', '.jpg')}")
        else:
            img.save(f'{train_data_dir}/{image_name}', "PNG")

num_batches = len(images) // batch_size + 1

with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in tqdm(range(num_batches)):
        start = i * batch_size
        end = start + batch_size
        batch = images[start:end]
        executor.map(process_image, batch)

print("All images have been converted")


# Обучение модели



In [None]:
#@title ## 3.1. Define Important folder
from google.colab import drive
%store -r

project_name = "LOGOGENA" #@param {type:"string"}
pretrained_model_name_or_path = "/content/pre_trained_model/Stable-Diffusion-v1-5.ckpt" #@param {type:"string"}
vae = "/content/vae/stablediffusion.vae.pt"  #@param {type:"string"}
train_folder_directory = "/content/dreambooth/train_data" #@param {'type':'string'}
%store train_folder_directory
reg_folder_directory = "/content/dreambooth/reg_data" 
%store reg_folder_directory
output_dir = "/content/dreambooth/output" 
resume_path =""
inference_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/"

#@markdown Поставьте галку для того, чтобы выход модели сохранялся на диске
output_to_drive = False #@param {'type':'boolean'}

if output_to_drive:
  output_dir = "/content/drive/MyDrive/dreambooth/output"

  if not os.path.exists("/content/drive"):
    drive.mount('/content/drive')  

os.makedirs(output_dir, exist_ok=True)


Stored 'train_folder_directory' (str)
Stored 'reg_folder_directory' (str)


In [None]:
#@title ## 3.2. Объявление параметров и запуск обучения
%store -r


network_dim = 128 #@param {'type':'number'}
network_alpha = 128 #@param {'type':'number'}
network_module = "networks.lora"

network_train_on = "both" 

learning_rate = 1e-4 #@param {'type':'number'}
unet_lr = 0 
text_encoder_lr = 5e-4 #@param {'type':'number'}
lr_scheduler = "constant" 

lr_scheduler_num_cycles = 1 
lr_scheduler_power = 1 

no_metadata = False 

train_batch_size = 1 #@param {type:"number"}
num_epochs = 50 #@param {type:"number"}
caption_extension = '.txt' #@param {'type':'string'}
mixed_precision = "fp16" #@param ["no","fp16","bf16"] {allow-input: false}
save_precision = "fp16" #@param ["float", "fp16", "bf16"] {allow-input: false}
save_n_epochs_type = "save_every_n_epochs" #@param ["save_every_n_epochs", "save_n_epoch_ratio"] {allow-input: false}
save_n_epochs_type_value = 5 #@param {type:"number"}
save_model_as = "safetensors" #@param ["ckpt", "pt", "safetensors"] {allow-input: false}
resolution = 512 #@param {type:"slider", min:512, max:1024, step:128}
enable_bucket = False
min_bucket_reso = 320 if resolution > 640 else 256
max_bucket_reso = 1280 if resolution > 640 else 1024
cache_latents = True 
max_token_length = 225 #@param {type:"number"}
clip_skip = 1 #@param {type:"number"}
use_8bit_adam = True 
gradient_checkpointing = False 
gradient_accumulation_steps = 1 
seed = 0 
logging_dir = "/content/dreambooth/logs"
log_prefix = project_name
additional_argument = "--shuffle_caption --xformers" 
print_hyperparameter = True 
prior_loss_weight = 1.0
%cd {repo_dir}

train_command=f"""
accelerate launch --config_file={accelerate_config} --num_cpu_threads_per_process=8 train_network.py \
  {"--v2" if v2 else ""} \
  {"--v_parameterization" if v2 and v_parameterization else ""} \
  --network_dim={network_dim} \
  --network_alpha={network_alpha} \
  --network_module={network_module} \
  {"--network_weights=" + network_weights if network_weights else ""} \
  {"--network_train_unet_only" if network_train_on == "unet_only" else ""} \
  {"--network_train_text_encoder_only" if network_train_on == "text_encoder_only" else ""} \
  --learning_rate={learning_rate} \
  {"--unet_lr=" + format(unet_lr) if unet_lr !=0 else ""} \
  {"--text_encoder_lr=" + format(text_encoder_lr) if text_encoder_lr !=0 else ""} \
  {"--no_metadata" if no_metadata else ""} \
  {"--training_comment=" + training_comment if training_comment and not no_metadata else ""} \
  --lr_scheduler={lr_scheduler} \
  {"--lr_scheduler_num_cycles=" + format(lr_scheduler_num_cycles) if lr_scheduler == "cosine_with_restarts" else ""} \
  {"--lr_scheduler_power=" + format(lr_scheduler_power) if lr_scheduler == "polynomial" else ""} \
  --pretrained_model_name_or_path={pretrained_model_name_or_path} \
  {"--vae=" + vae if vae else ""} \
  {"--caption_extension=" + caption_extension if caption_extension else ""} \
  --train_data_dir={train_folder_directory} \
  --reg_data_dir={reg_folder_directory} \
  --output_dir={output_dir} \
  --prior_loss_weight={prior_loss_weight} \
  {"--resume=" + resume_path if resume_path else ""} \
  {"--output_name=" + project_name if project_name else ""} \
  --mixed_precision={mixed_precision} \
  --save_precision={save_precision} \
  {"--save_every_n_epochs=" + format(save_n_epochs_type_value) if save_n_epochs_type=="save_every_n_epochs" else ""} \
  {"--save_n_epoch_ratio=" + format(save_n_epochs_type_value) if save_n_epochs_type=="save_n_epoch_ratio" else ""} \
  --save_model_as={save_model_as} \
  --resolution={resolution} \
  {"--enable_bucket" if enable_bucket else ""} \
  {"--min_bucket_reso=" + format(min_bucket_reso) if enable_bucket else ""} \
  {"--max_bucket_reso=" + format(max_bucket_reso) if enable_bucket else ""} \
  {"--cache_latents" if cache_latents else ""} \
  --train_batch_size={train_batch_size} \
  --max_token_length={max_token_length} \
  {"--use_8bit_adam" if use_8bit_adam else ""} \
  --max_train_epochs={num_epochs} \
  {"--seed=" + format(seed) if seed > 0 else ""} \
  {"--gradient_checkpointing" if gradient_checkpointing else ""} \
  {"--gradient_accumulation_steps=" + format(gradient_accumulation_steps) } \
  {"--clip_skip=" + format(clip_skip) if v2 == False else ""} \
  --logging_dir={logging_dir} \
  --log_prefix={log_prefix} \
  {additional_argument}
  """

debug_params = ["v2", \
                "v_parameterization", \
                "network_dim", \
                "network_alpha", \
                "network_module", \
                "network_weights", \
                "network_train_on", \
                "learning_rate", \
                "unet_lr", \
                "text_encoder_lr", \
                "no_metadata", \
                "training_comment", \
                "lr_scheduler", \
                "lr_scheduler_num_cycles", \
                "lr_scheduler_power", \
                "pretrained_model_name_or_path", \
                "vae", \
                "caption_extension", \
                "train_folder_directory", \
                "reg_folder_directory", \
                "output_dir", \
                "prior_loss_weight", \
                "resume_path", \
                "project_name", \
                "mixed_precision", \
                "save_precision", \
                "save_n_epochs_type", \
                "save_n_epochs_type_value", \
                "save_model_as", \
                "resolution", \
                "enable_bucket", \
                "min_bucket_reso", \
                "max_bucket_reso", \
                "cache_latents", \
                "train_batch_size", \
                "max_token_length", \
                "use_8bit_adam", \
                "num_epochs", \
                "seed", \
                "gradient_checkpointing", \
                "gradient_accumulation_steps", \
                "clip_skip", \
                "logging_dir", \
                "log_prefix", \
                "additional_argument"]



In [None]:
#@title ## 3.3. Вывод обучаюющей кривой
training_logs_path = "/content/dreambooth/logs" #@param {type : "string"}

%cd /content/kohya-trainer
# %load_ext tensorboard
%tensorboard --logdir {training_logs_path} --port 6007