In [1]:
import os
import shutil
import subprocess
import urllib.request
import configparser
import random
import concurrent.futures
import time
from library.studiolabs_utils import (
    clone_or_update_repo,
    install_dependencies,
    create_dirs,
    download_model,
    preProcessingParams,
    clean_directory,
    process_image,
    find_images,
    convertImages,
    preprocess_images,
    run_captioning_process,
    custom_caption_tag
)


print('1.0 DEFINE DIRECTORIES')
dirs = create_dirs()
print('2.0 CLONE REPO AND INSTALL DIRECTORIES')
# Read the config.ini file
config = configparser.ConfigParser()
config.read(dirs['trainer_config'])
print(dirs['accelerate_config'])
    
clone_or_update_repo(
    url=config.get('UserSettings', 'repo_url'),
    save_directory=dirs['root_dir'],
    branch = config.get('UserSettings', 'branch')
    )


install_dependencies(
    dirs,
    verbose=config.getboolean('UserSettings', 'verbose'), 
    install_xformers=config.getboolean('UserSettings', 'install_xformers')
    )

from PIL import Image

command = "pip cache purge"
subprocess.run(command, shell=True)

# Get the parameter values from the config file
model_url = config.get('DownloadModels', 'model_url')
vae_url = config.get('DownloadModels', 'vae_url')


# Download the model file if it doesn't exist

try:
    download_model(model_url, dirs['pretrained_dir'])
except Exception as e:
    print(f"Failed to download the model from {model_url}. Error: {str(e)}")

try:
    download_model(vae_url, dirs['vae_dir'])
except Exception as e:
    print(f"Failed to download the VAE model from {vae_url}. Error: {str(e)}")


print('4.1 DATA CLEANING')
train_image_folder = os.path.join(dirs['train_data_dir'],config.get('ImagePreprocessing', 'train_image_dir'))

convert = config.get('ImagePreprocessing', 'convert')
random_color = config.get('ImagePreprocessing', 'random_color')
recursive = config.get('ImagePreprocessing', 'recursive')
batch_size, supported_types, background_colors = preProcessingParams()

clean_directory(train_image_folder, supported_types)
images = find_images(train_image_folder)
num_batches = len(images) // batch_size + 1
convertImages(images,convert,batch_size,num_batches)

print('4.2.1. BLIP Captioning')
# Use BLIP for general images
# Use Waifu for anime/manga images
# Specified in the config file

print(train_image_folder)
run_captioning_process(config, train_image_folder, dirs['finetune_dir'])
    
print('4.2.3. Custom Caption/Tag')
custom_caption_tag(config, train_image_folder)

1.0 DEFINE DIRECTORIES
2.0 CLONE REPO AND INSTALL DIRECTORIES
/home/studio-lab-user/sagemaker-studiolab-notebooks/kohya-trainer/accelerate_config/config.yaml
kohya-trainer folder already exists
Installation can take multiple minutes, enable "Verbose" to see progress


  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.


Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done




  current version: 4.10.3
  latest version: 23.5.0

Please update conda by running

    $ conda update -n base conda





# All requested packages already installed.





Files removed: 32
Downloading model from https://civitai.com/api/download/models/29460...
Failed to download the model from https://civitai.com/api/download/models/29460. Error: HTTP Error 403: Forbidden
Model already exists.
4.1 DATA CLEANING


100%|██████████| 1/1 [00:00<00:00, 921.42it/s]

All images have been converted
4.2.1. BLIP Captioning
/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3
python /home/studio-lab-user/sagemaker-studiolab-notebooks/kohya-trainer/finetune/make_captions.py "/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3" --batch_size=8 --beam_search --min_length=5 --max_length=75 --debug --caption_extension=".caption" --max_data_loader_n_workers=2 





Current Working Directory is:  /home/studio-lab-user/sagemaker-studiolab-notebooks/kohya-trainer
load images from /home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3
found 5 images.
loading BLIP caption: https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth
load checkpoint from https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth
BLIP loaded


100%|██████████| 1/1 [00:00<00:00,  4.38it/s]


/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3/1.png a man with a beard and a white shirt
/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3/2.png a man with a beard and a white shirt
/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3/3.png a man with a beard and a white shirt
/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3/4.png a man with a beard and a white shirt
/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3/5.png a man with a beard and a white shirt
done!
4.2.3. Custom Caption/Tag


In [None]:
#@title ## 5.5. Start Training

#@markdown Check your config here if you want to edit something: 
#@markdown - `sample_prompt` : /content/dreambooth/config/sample_prompt.txt
#@markdown - `config_file` : /content/dreambooth/config/config_file.toml
#@markdown - `dataset_config` : /content/dreambooth/config/dataset_config.toml

#@markdown Generated sample can be seen here: /content/dreambooth/output/sample

#@markdown You can import config from another session if you want.

import toml

sample_prompt = os.path.join(dreambooth_training_dir,"config/sample_prompt.txt") #@param {type:'string'}
config_file = os.path.join(dreambooth_training_dir,"config/config_file.toml") #@param {type:'string'}
dataset_config = os.path.join(dreambooth_training_dir,"config/dataset_config.toml") #@param {type:'string'}

with open(config_file, 'r') as configfile:
    config = toml.load(configfile)
    
config['model_arguments']['pretrained_model_name_or_path'] = "/home/studio-lab-user/sagemaker-studiolab-notebooks/pretrained_model/realisticVisionV20_v20NoVAE.safetensors"
config['model_arguments']['vae'] = "/home/studio-lab-user/sagemaker-studiolab-notebooks/pretrained_model/realisticVisionV20_v20NoVAE.safetensors"
config['huggingface_arguments']['huggingface_path_in_repo'] = "mymodel"
config['huggingface_arguments']['huggingface_token'] = "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"
config['huggingface_arguments']['huggingface_repo_id'] = "xxthekingxx/myMiroslav7"
config['training_arguments']['output_name'] = "Miroslav7"
config['training_arguments']['log_prefix'] = "Miroslav7"
config['training_arguments']['max_train_steps'] = 2500
config['optimizer_arguments']['learning_rate'] = 1e-6
    
with open(config_file, 'w') as configfile:
    toml.dump(config, configfile)
    
    
    
with open(dataset_config, 'r') as configfile:
    config = toml.load(configfile)

print(config['datasets'][0]['subsets'][0]['image_dir'])
config['datasets'][0]['subsets'][0]['image_dir'] = "/home/studio-lab-user/sagemaker-studiolab-notebooks/train_data/Miroslav3"

with open(dataset_config, 'w') as configfile:
    toml.dump(config, configfile)


accelerate_conf = {
    "config_file" : accelerate_config,
    "num_cpu_threads_per_process" : 1,
}

train_conf = {
    "sample_prompts" : sample_prompt,
    "dataset_config" : dataset_config,
    "config_file" : config_file
}

def train(config):
    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "

    return args

accelerate_args = train(accelerate_conf)
train_args = train(train_conf)
final_args = f"accelerate launch {accelerate_args} train_db.py {train_args}"
print(final_args)
os.chdir(repo_dir)
!{final_args}

In [None]:
!pip install fonttools

In [None]:
import os
from PIL import Image, ImageDraw, ImageFont
import textwrap
import matplotlib.font_manager as fm

def get_font():
    # Get the list of font names
    font_names = [f.name for f in fm.fontManager.ttflist]
    #print([(i,k) for i,k in enumerate(font_names)])
    font_path = fm.findfont(font_names[17])
    fontsize = 40
    font = ImageFont.truetype(font_path, fontsize)
    return font, fontsize

def fetch_image_locations(directory):
    # Fetch image files from the directory
    image_locations = [f for f in os.listdir(directory) if f.endswith('.png') or f.endswith('.jpg')]
    return image_locations

def main():
    top_space = 40
    left_space = 400
    text_offset = 4
    padding = 20  # Padding between images
    text_width_limit = 750 
    # Directory path and grid settings
    directory = '/home/studio-lab-user/sagemaker-studiolab-notebooks/dreambooth/output/sample/Miroslav7/'
    grid_save_dir = "/home/studio-lab-user/sagemaker-studiolab-notebooks/dreambooth/output/sample/Miroslav7_grid.png"


    # Fetch image files from the directory
    image_locations = fetch_image_locations(directory)
    prompts = sorted(list(set([img_name.split('_')[1] for img_name in image_locations])))
    epochs = sorted(list(set([img_name.split('_')[2][1:] for img_name in image_locations])))
    font, fontsize = get_font()
    
    max_image_width = 0
    max_image_height = 0

    # Find the maximum width and height among all images
    for image_file in image_locations:
        image_path = os.path.join(directory, image_file)
        img = Image.open(image_path)
        width, height = img.size
        max_image_width = max(max_image_width, width)
        max_image_height = max(max_image_height, height)

    canvas_width = ((max_image_width + padding) * len(epochs)) + padding + left_space
    canvas_height = ((max_image_height + padding) * len(prompts)) + padding + top_space
    canvas = Image.new('RGB', (canvas_width, canvas_height), 'white')
    draw = ImageDraw.Draw(canvas)

    # Iterate over the image files and place them in the grid
    for i,epoch in enumerate(epochs):
        epoch_x = ((max_image_width + padding) * i) + padding + left_space 
        epoch_y = text_offset 
        draw.text((epoch_x, epoch_y), f'Epoch: {epoch}', fill='black', font=font)
        for k,prompt in enumerate(prompts):
        
            text=prompt
            #Shorten the text if it exceeds 50 characters
            if len(text) > 200:
                text = text[:200] + '...'

            # Wrap the text to fit within the limit
            wrapped_text = textwrap.wrap(text, width=int(text_width_limit / fontsize))

            # Calculate the total height required for the wrapped text
            total_text_height = len(wrapped_text) * fontsize

            # Calculate the starting position to center the text vertically
            y_start = top_space + padding + k * (padding + max_image_height)
            x = text_offset * 4

            # Draw the wrapped text
            for line in wrapped_text:
                text_bbox = draw.textbbox((0, y_start), line, font=font)
                text_width = text_bbox[2] - text_bbox[0]
                text_height = text_bbox[3] - text_bbox[1]
                draw.text((x, y_start), line, font=font, fill='black')
                y_start += fontsize
        
            for img_name in image_locations:
                if epoch == img_name.split('_')[2][1:] and prompt == img_name.split('_')[1]:
                    image_path = os.path.join(directory, img_name)
                    img = Image.open(image_path)

                    # Calculate the position of the image in the grid
                    x = (max_image_width + padding) * i + padding + left_space
                    y = top_space + (max_image_height + padding) * k + padding

                    # Paste the image onto the canvas
                    canvas.paste(img, (x, y))

    # Save the final image grid
    canvas.save(grid_save_dir)
    print('Saved')
    
if __name__ == "__main__":
    main()

In [None]:
# @title ## 6.2. Inference
v2 = False  # @param {type:"boolean"}
v_parameterization = False  # @param {type:"boolean"}
prompt = "RAW photo, mirox in a fancy suit, fashion magazine photoshoot, full body shot, high detailed skin, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"  # @param {type: "string"}
negative = "(weird eyes, disfigured eyes, looking different direction:1.3), cgi, 3d, render, mutated hands, mutated fingers, deformed, distorted, disfigured, poorly drawn, bad anatomy, bad quality, worst quality"  # @param {type: "string"}
model = os.path.join(dreambooth_output_dir,'Miroslav7.ckpt')  # @param {type: "string"}
vae = os.path.join(vae_dir,'vae-ft-mse-840000-ema-pruned.ckpt')  # @param {type: "string"}
outdir = inference_dir  # @param {type: "string"}
scale = 7  # @param {type: "slider", min: 1, max: 40}
sampler = "euler_a"  # @param ["ddim", "pndm", "lms", "euler", "euler_a", "heun", "dpm_2", "dpm_2_a", "dpmsolver","dpmsolver++", "dpmsingle", "k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a"]
steps = 35  # @param {type: "slider", min: 1, max: 100}
precision = "fp16"  # @param ["fp16", "bf16"] {allow-input: false}
width = 512  # @param {type: "integer"}
height = 768  # @param {type: "integer"}
images_per_prompt = 12  # @param {type: "integer"}
batch_size = 1  # @param {type: "integer"}
clip_skip = 1  # @param {type: "slider", min: 1, max: 40}
seed = -1  # @param {type: "integer"}

final_prompt = f"{prompt} --n {negative}"

config = {
    "v2": v2,
    "v_parameterization": v_parameterization,
    "ckpt": model,
    "outdir": outdir,
    "xformers": True,
    "vae": vae if vae else None,
    "fp16": True,
    "W": width,
    "H": height,
    "seed": seed if seed > 0 else None,
    "scale": scale,
    "sampler": sampler,
    "steps": steps,
    "max_embeddings_multiples": 3,
    "batch_size": batch_size,
    "images_per_prompt": images_per_prompt,
    "clip_skip": clip_skip if not v2 else None,
    "prompt": final_prompt,
}

args = ""
for k, v in config.items():
    if isinstance(v, str):
        args += f'--{k}="{v}" '
    if isinstance(v, bool) and v:
        args += f"--{k} "
    if isinstance(v, float) and not isinstance(v, bool):
        args += f"--{k}={v} "
    if isinstance(v, int) and not isinstance(v, bool):
        args += f"--{k}={v} "

final_args = f"python gen_img_diffusers.py {args}"

os.chdir(repo_dir)
!{final_args}

In [None]:
import os
#@title ## 7.2. Model Pruner

os.chdir(tools_dir)

if not os.path.exists('prune.py'):
    !wget https://raw.githubusercontent.com/lopho/stable-diffusion-prune/main/prune.py

#@markdown Convert to Float16
fp16 = True #@param {'type':'boolean'}
#@markdown Use EMA for weights
ema = False #@param {'type':'boolean'}
#@markdown Strip CLIP weights
no_clip = False #@param {'type':'boolean'}
#@markdown Strip VAE weights
no_vae = False #@param {'type':'boolean'}
#@markdown Strip depth model weights
no_depth = False #@param {'type':'boolean'}
#@markdown Strip UNet weights
no_unet = False #@param {'type':'boolean'}

model_path = "/home/studio-lab-user/sagemaker-studiolab-notebooks/dreambooth/output/Hen1.ckpt" #@param {'type' : 'string'}

config = {
    "fp16": fp16,
    "ema": ema,
    "no_clip": no_clip,
    "no_vae": no_vae,
    "no_depth": no_depth,
    "no_unet": no_unet,
}

suffixes = {
    "fp16": "-fp16",
    "ema": "-ema",
    "no_clip": "-no-clip",
    "no_vae": "-no-vae",
    "no_depth": "-no-depth",
    "no_unet": "-no-unet",
}

print(f"Loading model from {model_path}")

dir_name = os.path.dirname(model_path)
base_name = os.path.basename(model_path)
output_name = base_name.split('.')[0]

for option, suffix in suffixes.items():
    if config[option]:
        print(f"Applying option {option}")
        output_name += suffix
        
output_name += '-pruned'
output_path = os.path.join(dir_name, output_name + ('.ckpt' if model_path.endswith(".ckpt") else ".safetensors"))

args = ""
for k, v in config.items():
    if k.startswith("_"):
        args += f'"{v}" '
    elif isinstance(v, str):
        args += f'--{k}="{v}" '
    elif isinstance(v, bool) and v:
        args += f"--{k} "
    elif isinstance(v, float) and not isinstance(v, bool):
        args += f"--{k}={v} "
    elif isinstance(v, int) and not isinstance(v, bool):
        args += f"--{k}={v} "

final_args = f"python3 prune.py {model_path} {output_path} {args}"
!{final_args}

print(f"Saving pruned model to {output_path}")

In [None]:
# @title ## 7.1. Upload Config widgimport ipywidgets as widgets
import ipywidgets as widgets
from IPython.display import display

# Create the widget elements
write_token_input = widgets.Text(description="Write Token:",style={"description_width": "initial"})
orgs_name_input = widgets.Text(description="Orgs Name:",style={"description_width": "initial"})
model_name_input = widgets.Text(description="Model Name:",style={"description_width": "initial"})
dataset_name_input = widgets.Text(description="Dataset Name:",style={"description_width": "initial"})
make_private_checkbox = widgets.Checkbox(value=True, description="Make Private")

# Create the widget container
widget_container = widgets.VBox([
    widgets.HTML("<h3>Login to Huggingface Hub</h3>"),
    write_token_input,
    orgs_name_input,
    model_name_input,
    dataset_name_input,
    make_private_checkbox
])

display(widget_container)

In [None]:
# @title ## 7.1. Create repo
from huggingface_hub import login
from huggingface_hub import HfApi
from huggingface_hub.utils import validate_repo_id, HfHubHTTPError


# @markdown Login to Huggingface Hub
# @markdown > Get **your** huggingface `WRITE` token [here](https://huggingface.co/settings/tokens)
write_token = write_token_input.value
# @markdown Fill this if you want to upload to your organization, or just leave it empty.
orgs_name = orgs_name_input.value
# @markdown If your model/dataset repo does not exist, it will automatically create it.
model_name = model_name_input.value
dataset_name = dataset_name_input.value
make_private = make_private_checkbox.value  # @param{type:"boolean"}

def authenticate(write_token):
    login(write_token, add_to_git_credential=True)
    api = HfApi()
    return api.whoami(write_token), api


def create_repo(api, user, orgs_name, repo_name, repo_type, make_private=False):
    global model_repo
    global datasets_repo
    
    if orgs_name == "":
        repo_id = user["name"] + "/" + repo_name.strip()
    else:
        repo_id = orgs_name + "/" + repo_name.strip()

    try:
        validate_repo_id(repo_id)
        api.create_repo(repo_id=repo_id, repo_type=repo_type, private=make_private)
        print(f"{repo_type.capitalize()} repo '{repo_id}' didn't exist, creating repo")
    except HfHubHTTPError as e:
        print(f"{repo_type.capitalize()} repo '{repo_id}' exists, skipping create repo")
    
    if repo_type == "model":
        model_repo = repo_id
        print(f"{repo_type.capitalize()} repo '{repo_id}' link: https://huggingface.co/{repo_id}\n")
    else:
        datasets_repo = repo_id
        print(f"{repo_type.capitalize()} repo '{repo_id}' link: https://huggingface.co/datasets/{repo_id}\n")

user, api = authenticate(write_token)

# @markdown This will be uploaded to model repo
#model_path = os.path.join(dreambooth_output_dir,"Hen1.ckpt")  # @param {type :"string"}
path_in_repo = ""  # @param {type :"string"}
# @markdown Now you can save your config file for future use
# @markdown Other Information
commit_message = "uploading model"  # @param {type :"string"}

if not commit_message:
    commit_message = "feat: upload " + project_name.value + " checkpoint"

if model_name:
    create_repo(api, user, orgs_name, model_name, "model", make_private)
if dataset_name:
    create_repo(api, user, orgs_name, dataset_name, "dataset", make_private)

print("uploading to: ",user["name"] + "/" + model_name.strip())
print("uploading config")
api.upload_folder(
    folder_path=dreambooth_config_dir,
    repo_id=user["name"] + "/" + model_name.strip(),
    repo_type=None,
    path_in_repo="config",
)
print("uploading sample images")
api.upload_folder(
    folder_path=dreambooth_output_dir + "/" + "sample" + "/" + "Miroslav7",
    repo_id=user["name"] + "/" + model_name.strip(),
    repo_type=None,
    path_in_repo="samples",
)
print("uploading sample image grid")
api.upload_file(
    path_or_fileobj=dreambooth_output_dir + "/" + "sample" + "/" + "Miroslav7_grid.png",
    path_in_repo="Miroslav7_grid.png",
    repo_id=user["name"] + "/" + model_name.strip(),
    repo_type=None,
)

print("done")


In [None]:
# @title ### 8.2.1. Upload Model
from huggingface_hub import HfApi
from pathlib import Path

api = HfApi()

# @markdown This will be uploaded to model repo
#model_path = os.path.join(dreambooth_output_dir,"Hen1.ckpt")  # @param {type :"string"}
path_in_repo = ""  # @param {type :"string"}
# @markdown Now you can save your config file for future use
config_path = dreambooth_config_dir  # @param {type :"string"}
# @markdown Other Information
commit_message = "uploading model"  # @param {type :"string"}

if not commit_message:
    commit_message = "feat: upload " + project_name.value + " checkpoint"

#if os.path.exists(model_path):
#    vae_exists = os.path.exists(os.path.join(model_path, "vae"))
#    unet_exists = os.path.exists(os.path.join(model_path, "unet"))
#    text_encoder_exists = os.path.exists(os.path.join(model_path, "text_encoder"))


def upload_model(model_paths, is_folder: bool, is_config: bool):
    path_obj = Path(model_paths)
    trained_model = path_obj.parts[-1]

    if path_in_repo:
        trained_model = path_in_repo

    if is_config:
        if path_in_repo:
            trained_model = f"{path_in_repo}_config"
        else:
            trained_model = f"{project_name.value}_config"

    if is_folder == True:
        print(f"Uploading {trained_model} to https://huggingface.co/" + model_repo)
        print(f"Please wait...")

        if vae_exists and unet_exists and text_encoder_exists:
            api.upload_folder(
                folder_path=model_paths,
                repo_id=model_repo,
                commit_message=commit_message,
                ignore_patterns=".ipynb_checkpoints",
            )
        else:
            api.upload_folder(
                folder_path=model_paths,
                path_in_repo=trained_model,
                repo_id=model_repo,
                commit_message=commit_message,
                ignore_patterns=".ipynb_checkpoints",
            )
        print(
            f"Upload success, located at https://huggingface.co/"
            + model_repo
            + "/tree/main\n"
        )
    else:
        print(f"Uploading {trained_model} to https://huggingface.co/" + model_repo)
        print(f"Please wait...")

        api.upload_file(
            path_or_fileobj=model_paths,
            path_in_repo=trained_model,
            repo_id=model_repo,
            commit_message=commit_message,
        )

        print(
            f"Upload success, located at https://huggingface.co/"
            + model_repo
            + "/blob/main/"
            + trained_model
            + "\n"
        )


def upload():
    #if model_path.endswith((".ckpt", ".safetensors", ".pt")):
    #    upload_model(model_path, False, False)
    #else:
    #    upload_model(model_path, True, False)

    if config_path:
        upload_model(config_path, True, True)


upload()