In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!pip install wget

In [5]:
# import modules required
import os
import time
import random
from os import listdir
from os.path import isfile
from IPython.utils import capture
from IPython.display import clear_output
import wget
from subprocess import check_output
import urllib.request
import requests
import base64
from gdown.download import get_url_from_gdrive_confirmation
from urllib.parse import urlparse, parse_qs, unquote
from urllib.request import urlopen, Request
from subprocess import getoutput
import shutil
from google.colab import files, runtime
from PIL import Image
from tqdm import tqdm
import ipywidgets as widgets
from io import BytesIO

In [None]:
# Dependencies

print('Installing dependencies...')

with capture.capture_output() as cap:
    os.chdir('/content')
    !pip install -qq --no-deps accelerate==0.12.0
    !wget -q -i https://raw.githubusercontent.com/TheLastBen/fast-stable-diffusion/main/Dependencies/dbdeps.txt
    !dpkg -i *.deb
    !tar -C / --zstd -xf gcolabdeps.tar.zst
    !rm *.deb | rm *.zst | rm *.txt
    !git clone -q --depth 1 --branch main https://github.com/TheLastBen/diffusers
    !pip install gradio==3.16.2 --no-deps -qq

    if not os.path.exists('gdrive/MyDrive/sd/libtcmalloc/libtcmalloc_minimal.so.4'):
        %env CXXFLAGS=-std=c++14
        !wget -q https://github.com/gperftools/gperftools/releases/download/gperftools-2.5/gperftools-2.5.tar.gz && tar zxf gperftools-2.5.tar.gz && mv gperftools-2.5 gperftools
        !wget -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/AUTOMATIC1111_files/Patch
        %cd /content/gperftools
        !patch -p1 < /content/Patch
        !./configure --enable-minimal --enable-libunwind --enable-frame-pointers --enable-dynamic-sized-delete-support --enable-sized-delete --enable-emergency-malloc; make -j4
        !mkdir -p /content/gdrive/MyDrive/sd/libtcmalloc && cp .libs/libtcmalloc*.so* /content/gdrive/MyDrive/sd/libtcmalloc
        %env LD_PRELOAD=/content/gdrive/MyDrive/sd/libtcmalloc/libtcmalloc_minimal.so.4
        %cd /content
        !rm *.tar.gz Patch && rm -r /content/gperftools
    else:
        %env LD_PRELOAD=/content/gdrive/MyDrive/sd/libtcmalloc/libtcmalloc_minimal.so.4

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['PYTHONWARNINGS'] = 'ignore'

print('Installation completed successfully!')

In [None]:
# =============================================================================================================================================================================================================================

In [None]:
# Model Download
# Skip this cell when loading a previous session that contains a trained model

with capture.capture_output() as cap:
    os.chdir('/content')

if os.path.exists('/content/gdrive/MyDrive/Dreambooth-Training/token.txt'):
    with open("/content/gdrive/MyDrive/Dreambooth-Training/token.txt") as f:
        token = f.read()
    auth = f'https://USER:{token}@'
else:
    auth = "https://"


def download_model():
    if os.path.exists('/content/stable-diffusion-v1-5'):
        !rm -r /content/stable-diffusion-v1-5

    os.chdir('/content')

    !mkdir /content/stable-diffusion-v1-5
    os.chdir('/content/stable-diffusion-v1-5')
    !git config --global init.defaultBranch main
    !git init
    !git lfs install --system --skip-repo
    !git remote add -f origin  "https://huggingface.co/runwayml/stable-diffusion-v1-5"
    !git config core.sparsecheckout true
    !echo -e "scheduler\ntext_encoder\ntokenizer\nunet\nvae\nmodel_index.json\n!vae/diffusion_pytorch_model.bin\n!*.safetensors\n!*.fp16.bin\n!*.non_ema.bin" > .git/info/sparse-checkout
    !git pull origin main
    if os.path.exists('/content/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin'):
        !wget -q -O vae/diffusion_pytorch_model.bin https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/diffusion_pytorch_model.bin
        !rm -r .git
        !rm model_index.json
        time.sleep(1)
        wget.download('https://raw.githubusercontent.com/TheLastBen/fast-stable-diffusion/main/Dreambooth/model_index.json')
        os.chdir('/content')
        print('Done!')
    else:
        while not os.path.exists('/content/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin'):
            print('Something went wrong...')
            time.sleep(5)


if not os.path.exists('/content/stable-diffusion-v1-5'):
    download_model()
    MODEL_NAME = "/content/stable-diffusion-v1-5"
else:
    MODEL_NAME = "/content/stable-diffusion-v1-5"
    print("The v1.5 model already exists, using this model.")

In [None]:
# =============================================================================================================================================================================================================================

In [None]:
# Create/Load a Session

# ==================================================================================================
# Enter the session name.
# If an older session with the same name exists, that will be loaded. Otherwise a new session will be made.
Session_Name = 'comicpanel2'


try:
    MODEL_NAME
    pass
except:
    MODEL_NAME = ""

PT = ""

while Session_Name == "":
    print('Input the Session Name:')
    Session_Name = input('')
Session_Name = Session_Name.replace(" ", "_")


INSTANCE_NAME = Session_Name
OUTPUT_DIR = "/content/models/" + Session_Name
SESSION_DIR = '/content/gdrive/MyDrive/Dreambooth-Training/Sessions/' + Session_Name
INSTANCE_DIR = SESSION_DIR + '/instance_images'
CAPTIONS_DIR = SESSION_DIR + '/captions'
MDLPTH = str(SESSION_DIR + "/" + Session_Name + '.ckpt')

if os.path.exists(str(SESSION_DIR)):
    mdls = [ckpt for ckpt in listdir(SESSION_DIR) if ckpt.split(".")[-1] == "ckpt"]

    if not os.path.exists(MDLPTH) and '.ckpt' in str(mdls):
        def f(n):
            k = 0
            for i in mdls:
                if k == n:
                    !mv "$SESSION_DIR/$i" $MDLPTH
                k += 1

        k = 0
        print('No final checkpoint model found, select which intermediary checkpoint to use, enter only the number, (000 to skip):\n')

        for i in mdls:
            print(str(k) + '-', i)
            k += 1
        n = input()

        while int(n) > k-1:
            n = input()
        if n != "000":
            f(int(n))
            print('Using the model', mdls[int(n)] + "...")
            time.sleep(2)
        else:
            print('Skipping the intermediary checkpoints...')
        del n

with capture.capture_output() as cap:
    %cd /content
    resume = False

if os.path.exists(str(SESSION_DIR)) and not os.path.exists(MDLPTH):
    print('Loading session with no previous model, using the original model or the custom downloaded model')
    if MODEL_NAME == "":
        print('No model found, use the "Model Download" cell to download a model.')
    else:
        print('Session Loaded, proceed to uploading instance images')

elif os.path.exists(MDLPTH):
    print('Session found, loading the trained model ...')
    wget.download('https://github.com/TheLastBen/fast-stable-diffusion/raw/main/Dreambooth/det.py')
    print('Detecting model version...')
    Model_Version = check_output('python det.py --MODEL_PATH ' + MDLPTH, shell=True).decode('utf-8').replace('\n', '')

    print(Model_Version + ' Detected')
    !rm det.py
    !wget -q -O config.yaml https://github.com/CompVis/stable-diffusion/raw/main/configs/stable-diffusion/v1-inference.yaml
    print('Session found, loading the trained model ...')
    !python /content/diffusers/scripts/convert_original_stable_diffusion_to_diffusers.py --checkpoint_path $MDLPTH --dump_path "$OUTPUT_DIR" --original_config_file config.yaml
    !rm /content/config.yaml


    if os.path.exists(OUTPUT_DIR + '/unet/diffusion_pytorch_model.bin'):
        resume=True
        clear_output()
        print('Session loaded.')
    else:
        if not os.path.exists(OUTPUT_DIR + '/unet/diffusion_pytorch_model.bin'):
            print('Conversion error, if the error persists, remove the CKPT file from the current session folder')

elif not os.path.exists(str(SESSION_DIR)):
    %mkdir -p "$INSTANCE_DIR"
    print('Creating session...')
    if MODEL_NAME == "":
        print('No model found, use the "Model Download" cell to download a model.')
    else:
        print('Session created, proceed to uploading instance images')

In [None]:
# =============================================================================================================================================================================================================================

In [None]:
# Instance Images

# Set to False to keep any existing instance images.
remove_existing_instance_images = False

# instance images will be taken from the directory specified.
images_folder = '/content/gdrive/MyDrive/Uni Work/third year/FYP/training data/panels/sep'

# Make sure to upload the captions separately into the captions folder.


with capture.capture_output() as cap:
    %cd /content


if remove_existing_instance_images:
    if os.path.exists(str(INSTANCE_DIR)):
        !rm -r "$INSTANCE_DIR"
    if os.path.exists(str(CAPTIONS_DIR)):
        !rm -r "$CAPTIONS_DIR"

if not os.path.exists(str(INSTANCE_DIR)):
    %mkdir -p "$INSTANCE_DIR"
if not os.path.exists(str(CAPTIONS_DIR)):
    %mkdir -p "$CAPTIONS_DIR"

if os.path.exists(INSTANCE_DIR+"/.ipynb_checkpoints"):
    %rm -r $INSTANCE_DIR"/.ipynb_checkpoints"



while images_folder != "" and not os.path.exists(str(images_folder)):
    print('The images folder specified does not exist, copy the path in here:')
    images_folder = input('')


if os.path.exists(images_folder + "/.ipynb_checkpoints"):
    %rm -r "$images_folder""/.ipynb_checkpoints"

with capture.capture_output() as cap:
    !mv $images_folder/*.txt $CAPTIONS_DIR
for filename in tqdm(os.listdir(images_folder), bar_format='  |{bar:15}| {n_fmt}/{total_fmt} Uploaded'):
    %cp -r "$images_folder/$filename" "$INSTANCE_DIR"


with capture.capture_output() as cap:
    %cd "$INSTANCE_DIR"
    !find . -name "* *" -type f | rename 's/ /-/g'
    %cd "$CAPTIONS_DIR"
    !find . -name "* *" -type f | rename 's/ /-/g'

    %cd $SESSION_DIR
    !rm instance_images.zip captions.zip
    !zip -r instance_images instance_images
    !zip -r captions captions
    %cd /content

print('\nUploading images complete!')

In [None]:
# =============================================================================================================================================================================================================================

In [None]:
# Training
#Start DreamBooth


# Set this to True to continue training the prior model.
Resume_Training = False

# ==============
# Training parameters

UNet_Training_Steps = 3000

# UNet_Learning_Rate = 2e-6
UNet_Learning_Rate = 1e-5
untlr = UNet_Learning_Rate

Text_Encoder_Training_Steps = 900

# keep low to avoid overfitting (1e-6 is higher than 4e-7)
# Text_Encoder_Learning_Rate = 1e-6
Text_Encoder_Learning_Rate = 2e-5
txlr = Text_Encoder_Learning_Rate

# Always set as True for style training. Not needed for faces.
Offset_Noise = True
# ==============



if os.path.exists(INSTANCE_DIR + "/.ipynb_checkpoints"):
    %rm -r $INSTANCE_DIR"/.ipynb_checkpoints"

if os.path.exists(CAPTIONS_DIR+"/.ipynb_checkpoints"):
    %rm -r $CAPTIONS_DIR"/.ipynb_checkpoints"


if resume and not Resume_Training:
    print('Overwrite previously trained model? ("yes"/"no") - "no" will resume training the prior model.')
    while True:
        ansres = input('')
        if ansres == 'no':
            Resume_Training = True
            break
        elif ansres == 'yes':
            Resume_Training = False
            resume = False
            break

while not Resume_Training and MODEL_NAME == "":
    print('No model found, go back and download a model.')
    time.sleep(5)

MODELT_NAME = MODEL_NAME


trnonltxt = ""
if UNet_Training_Steps == 0:
    trnonltxt = "--train_only_text_encoder"

ofstnse = ""
if Offset_Noise:
    ofstnse = "--offset_noise"

Seed = random.randint(1, 999999)
resuming = ""

if Resume_Training and os.path.exists(OUTPUT_DIR + '/unet/diffusion_pytorch_model.bin'):
    MODELT_NAME = OUTPUT_DIR
    print('Resuming Training...')
    resuming = "Yes"
elif Resume_Training and not os.path.exists(OUTPUT_DIR + '/unet/diffusion_pytorch_model.bin'):
    print('Previous model not found, training a new model...')
    MODELT_NAME = MODEL_NAME
    while MODEL_NAME == "":
        print('No model found, use the "Model Download" cell to download a model.')
        time.sleep(5)


TexRes = 512
GCUNET = ""


Enable_text_encoder_training = True

if Text_Encoder_Training_Steps == 0:
    Enable_text_encoder_training = False
else:
    stptxt = Text_Encoder_Training_Steps


def dump_only_textenc(trnonltxt, MODELT_NAME, INSTANCE_DIR, OUTPUT_DIR, PT, Seed, precision, Training_Steps):
    !accelerate launch /content/diffusers/examples/dreambooth/train_dreambooth.py \
    $trnonltxt \
    --external_captions \
    $ofstnse \
    --image_captions_filename \
    --train_text_encoder \
    --dump_only_text_encoder \
    --pretrained_model_name_or_path="$MODELT_NAME" \
    --instance_data_dir="$INSTANCE_DIR" \
    --output_dir="$OUTPUT_DIR" \
    --captions_dir="$CAPTIONS_DIR" \
    --instance_prompt="$PT" \
    --seed=$Seed \
    --resolution=$TexRes \
    --mixed_precision=$precision \
    --train_batch_size=1 \
    --gradient_accumulation_steps=1 --gradient_checkpointing \
    --use_8bit_adam \
    --learning_rate=$txlr \
    --lr_scheduler="linear" \
    --lr_warmup_steps=0 \
    --max_train_steps=$Training_Steps

def train_only_unet(SESSION_DIR, MODELT_NAME, INSTANCE_DIR, OUTPUT_DIR, PT, Seed, Res, precision, Training_Steps):
    if resuming == "Yes":
        print('Resuming Training...')
    print('Training the UNet...')
    !accelerate launch /content/diffusers/examples/dreambooth/train_dreambooth.py \
    --external_captions \
    $ofstnse \
    --image_captions_filename \
    --train_only_unet \
    --Session_dir=$SESSION_DIR \
    --pretrained_model_name_or_path="$MODELT_NAME" \
    --instance_data_dir="$INSTANCE_DIR" \
    --output_dir="$OUTPUT_DIR" \
    --captions_dir="$CAPTIONS_DIR" \
    --instance_prompt="$PT" \
    --seed=$Seed \
    --resolution=$Res \
    --mixed_precision=$precision \
    --train_batch_size=1 \
    --gradient_accumulation_steps=1 $GCUNET \
    --use_8bit_adam \
    --learning_rate=$untlr \
    --lr_scheduler="linear" \
    --lr_warmup_steps=0 \
    --max_train_steps=$Training_Steps


if Enable_text_encoder_training :
    print('Training the text encoder...')
    if os.path.exists(OUTPUT_DIR + '/' + 'text_encoder_trained'):
        %rm -r $OUTPUT_DIR"/text_encoder_trained"
    dump_only_textenc(trnonltxt, MODELT_NAME, INSTANCE_DIR, OUTPUT_DIR, PT, Seed, 'fp16', Training_Steps=stptxt)


if UNet_Training_Steps != 0:
    train_only_unet(SESSION_DIR, MODELT_NAME, INSTANCE_DIR, OUTPUT_DIR, PT, Seed, 512, 'fp16', Training_Steps=UNet_Training_Steps)

if UNet_Training_Steps == 0 and Text_Encoder_Training_Steps == 0:
    print('All training steps were set to 0, there is nothing to do.')
else:
    if os.path.exists('/content/models/' + INSTANCE_NAME + '/unet/diffusion_pytorch_model.bin'):
        prc = "--fp16"
        !python /content/diffusers/scripts/convertosdv2.py $prc $OUTPUT_DIR $SESSION_DIR/$Session_Name".ckpt"

        filepath = SESSION_DIR + '/' + INSTANCE_NAME + '.ckpt'
        if os.path.exists(filepath):
            print("Training complete! Trained checkpoint model available at", filepath)
        else:
            print("Something went wrong, trained model instance not created.")
    else:
        print("Something went wrong!")

In [None]:
# =============================================================================================================================================================================================================================

In [None]:
# Upload the Trained Model to HuggingFace.
Name_of_the_concept = ''

# Create write-access token: https://huggingface.co/settings/tokens
# "New token" > Role: Write. Read tokens won't work.
hf_token = ''


# imports for uploading to huggingface
from slugify import slugify
from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
from huggingface_hub import create_repo
from IPython.display import display_markdown
from IPython.display import clear_output
from IPython.utils import capture
from google.colab import files
import shutil
import time
import os


if (Name_of_the_concept == ""):
    Name_of_the_concept = Session_Name
Name_of_the_concept = Name_of_the_concept.replace(" ", "-")


api = HfApi()
your_username = api.whoami(token=hf_token)["name"]

repo_id = f"{your_username}/{slugify(Name_of_the_concept)}"
output_dir = f'/content/models/' + INSTANCE_NAME

def bar(prg):
    return "Uploading to HuggingFace : " '|'+'█' * prg + ' ' * (25 - prg) + '| ' + str(prg * 4) + "%"


print("Loading...")


with capture.capture_output() as cap:
    %cd $OUTPUT_DIR
    !rm -r safety_checker feature_extractor .git
    !rm model_index.json
    !git init
    !git lfs install --system --skip-repo
    !git remote add -f origin  "https://USER:{hf_token}@huggingface.co/runwayml/stable-diffusion-v1-5"
    !git config core.sparsecheckout true
    !echo -e "feature_extractor\nsafety_checker\nmodel_index.json" > .git/info/sparse-checkout
    !git pull origin main
    !rm -r .git
    %cd /content


image_string = ""

readme_text = f'''---
license: creativeml-openrail-m
tags:
- text-to-image
- stable-diffusion
---
## {Name_of_the_concept} - Diffusion model trained on Stable Diffusion 1.5
Trigger-word: ""
'''

# Write Readme.md to a file
readme_file = open("README.md", "w")
readme_file.write(readme_text)
readme_file.close()


operations = [
    CommitOperationAdd(path_in_repo="README.md", path_or_fileobj="README.md"),
    CommitOperationAdd(path_in_repo=f"{Session_Name}.ckpt",path_or_fileobj=MDLPTH)
]
create_repo(repo_id,private=True, token=hf_token)

api.create_commit(
    repo_id=repo_id,
    operations=operations,
    commit_message=f"Upload the concept {Name_of_the_concept} embeds and token",
    token=hf_token
)

api.upload_folder(
    folder_path=OUTPUT_DIR + "/feature_extractor",
    path_in_repo="feature_extractor",
    repo_id=repo_id,
    token=hf_token
)

print(bar(4))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/safety_checker",
    path_in_repo="safety_checker",
    repo_id=repo_id,
    token=hf_token
)

print(bar(8))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/scheduler",
    path_in_repo="scheduler",
    repo_id=repo_id,
    token=hf_token
)

print(bar(9))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/text_encoder",
    path_in_repo="text_encoder",
    repo_id=repo_id,
    token=hf_token
)

print(bar(12))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/tokenizer",
    path_in_repo="tokenizer",
    repo_id=repo_id,
    token=hf_token
)

print(bar(13))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/unet",
    path_in_repo="unet",
    repo_id=repo_id,
    token=hf_token
)

print(bar(21))

api.upload_folder(
    folder_path=OUTPUT_DIR + "/vae",
    path_in_repo="vae",
    repo_id=repo_id,
    token=hf_token
)

print(bar(23))

api.upload_file(
    path_or_fileobj=OUTPUT_DIR + "/model_index.json",
    path_in_repo="model_index.json",
    repo_id=repo_id,
    token=hf_token
)

print(bar(25))

print(f'The concept was uploaded successfully: https://huggingface.co/{repo_id}')