In [None]:
from google.colab import drive

drive.mount('/content/drive')



In [None]:
# Check the GPU
!nvidia-smi



In [None]:
# Install dependencies.
!pip install bitsandbytes transformers accelerate peft -q
!pip install git+https://github.com/huggingface/diffusers.git -q
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/dreambooth/train_dreambooth_lora_sdxl.py
!pip install datasets -q


In [None]:
import os
import glob
import matplotlib.pyplot as plt
import json
from PIL import Image
import torch
from transformers import AutoProcessor, BlipForConditionalGeneration
import gc
import locale
from huggingface_hub import notebook_login
from huggingface_hub import upload_folder
from IPython.display import display, Markdown
from diffusers import DiffusionPipeline, AutoencoderKL

In [None]:
def load_images_from_folder(folder_path):
    img_paths = []
    for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.gif']:
        img_paths.extend(glob.glob(os.path.join(folder_path, ext)))
    return img_paths

# Load The images
folder = '/content/drive/My Drive//'
img_paths = load_images_from_folder(folder)

device = "cuda" if torch.cuda.is_available() else "cpu"
blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Function to generate captions for images
def caption_images(input_image):
    inputs = blip_processor(images=input_image, return_tensors="pt").to(device, torch.float16)
    pixel_values = inputs.pixel_values
    generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50, num_beams=5, repetition_penalty=2.5)
    generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_caption

# Define paths
metadata_file = os.path.join(goofy_folder, "metadata.jsonl")

# Create metadata.jsonl file
with open(metadata_file, 'w') as outfile:
    for img_path in goofy_img_paths:
        img_name = os.path.basename(img_path)
        img = Image.open(img_path).convert('RGB').resize((256, 256), Image.LANCZOS)  # Use LANCZOS instead of ANTIALIAS
        caption = caption_images(img)
        entry = {"file_name": img_name, "prompt": f"  , {caption}"}
        json.dump(entry, outfile, ensure_ascii=False)
        outfile.write('\n')

In [None]:
del blip_processor, blip_model
gc.collect()
torch.cuda.empty_cache()

In [None]:
locale.getpreferredencoding = lambda: "UTF-8"

!accelerate config default


In [None]:
notebook_login() #use hugging face secret key

In [None]:
!accelerate launch train_dreambooth_lora_sdxl.py \
  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
  --dataset_name="/content/drive/My Drive//" \  #adjust the path
  --output_dir="/content/drive/My Drive/output/LoRA" \
  --caption_column="prompt" \
  --mixed_precision="fp16" \
  --instance_prompt="cartoon characters" \
  --resolution=1024 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=3 \
  --gradient_checkpointing \
  --learning_rate=1e-4 \
  --snr_gamma=5.0 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --use_8bit_adam \
  --max_train_steps=500 \
  --checkpointing_steps=717 \
  --seed="0"

In [None]:
from huggingface_hub import upload_folder

# Replace with your actual Hugging Face username and repository name
username = ""
repository_name = ""

# Path to your output directory on Google Drive same used in the fine tuning script
output_dir = "/content/drive/My Drive//"

# Upload to Hugging Face Hub
upload_folder(
    repo_id=f"{username}/{repository_name}",
    folder_path=output_dir,
    commit_message="Uploaded trained model",
    ignore_patterns=["step_*", "epoch_*"],
)

In [None]:
from IPython.display import display, Markdown
repo_id=" "
link_to_model = f"https://huggingface.co/{repo_id}"
display(Markdown("### Your model has finished training.\nAccess it here: {}".format(link_to_model)))


In [None]:
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)
pipe.load_lora_weights(repo_id)
_ = pipe.to("cuda")

In [None]:
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image
import matplotlib.pyplot as plt

metadata = {
    "characters": {},
    "scenes": {}
}

def generate_image_with_metadata(prompt, character_name=None, scene_name=None, metadata=None):
    if character_name in metadata["characters"]:
        char_metadata = metadata["characters"][character_name]
        seed = char_metadata["seed"]
    else:
        seed = torch.randint(0, 2**32, (1,)).item()
        metadata["characters"][character_name] = {"seed": seed}

    if scene_name in metadata["scenes"]:
        scene_metadata = metadata["scenes"][scene_name]
        seed = scene_metadata["seed"]
    else:
        seed = torch.randint(0, 2**32, (1,)).item()
        metadata["scenes"][scene_name] = {"seed": seed}

    generator = torch.Generator(device=device).manual_seed(seed)
    image = pipe(prompt, generator=generator).images[0]

    return image

# Define prompts and corresponding scene names
prompts_and_scenes = [
    (" ", " "),
    (" ", " "),
    (" ", " "),
    (" ", " "),
]
# Generate and save multiple images with metadata
generated_images = []
for i, (prompt, scene_name) in enumerate(prompts_and_scenes):
    if " " in prompt: #write character name
        character_name = " "
    else:
        character_name = None

    image = generate_image_with_metadata(prompt, character_name=character_name, scene_name=scene_name, metadata=metadata)
    image.save(f"generated_image_{i}.png")
    generated_images.append(image)
    torch.cuda.empty_cache()

def display_images(images, titles):
    if len(images) == 1:
        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
        ax.imshow(images[0])
        ax.set_title(titles[0])
        ax.axis('off')
    else:
        fig, axes = plt.subplots(1, len(images), figsize=(20, 5))
        for ax, img, title in zip(axes, images, titles):
            ax.imshow(img)
            ax.set_title(title)
            ax.axis('off')
    plt.show()

# Display generated images
titles = [scene_name for _, scene_name in prompts_and_scenes]
display_images(generated_images, titles)
