In [1]:
# Install the necessary libraries
!pip install -U -q diffusers accelerate transformers peft bitsandbytes
!pip install -q wandb # For metric identification and tracking

# Download the official training script for SDXL/SSD-1B
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/train_text_to_image_lora_sdxl.py

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.9/380.9 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m90.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.0/557.0 kB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m553.3/553.3 kB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 5.1.1 requires transformers<5.0.0,>=4.41.0, but you have transforme

In [2]:
# 1. Uninstall existing version to avoid conflicts
!pip uninstall -y diffusers

# 2. Install the latest "dev" version from source
!pip install git+https://github.com/huggingface/diffusers

# 3. Ensure other dependencies are up to date for SSD-1B
!pip install -U -q accelerate transformers peft bitsandbytes

Found existing installation: diffusers 0.36.0
Uninstalling diffusers-0.36.0:
  Successfully uninstalled diffusers-0.36.0
Collecting git+https://github.com/huggingface/diffusers
  Cloning https://github.com/huggingface/diffusers to /tmp/pip-req-build-jicm89lz
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers /tmp/pip-req-build-jicm89lz
  Resolved https://github.com/huggingface/diffusers to commit fe78a7b7c6c0de7fd1d5ba957efc8d74ef7b00dc
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: diffusers
  Building wheel for diffusers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for diffusers: filename=diffusers-0.37.0.dev0-py3-none-any.whl size=4928884 sha256=08cc32afd82f7a9436c8ca752214d1b5f5aa50bcc70b3783bb97d547db6696ff
  Stored in directory: /tmp/pip-ephem-wheel-cac

In [3]:
import pandas as pd
import os
import shutil
import json
from tqdm import tqdm

# --- CONFIGURATION ---
# 1. PASTE YOUR COPIED CSV PATH HERE:
CSV_PATH = "/kaggle/input/labels/demo.csv" 

IMG_SOURCE_DIR = "/kaggle/input/pixel-art/images/images"
TRAIN_DIR = "/kaggle/working/train_data"
LIMIT = 3000 # Your project requirement
MANDATORY_KEYWORDS = "pixel art, 16-bit, sprite, "

os.makedirs(TRAIN_DIR, exist_ok=True)

# Load the CSV
df = pd.read_csv(CSV_PATH)
df_sample = df.sample(n=min(LIMIT, len(df)), random_state=42)

metadata = []
print(f"Moving {len(df_sample)} images to training folder...")

for index, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
    file_name = row['file_name']
    caption = row['text']
    
    source_path = os.path.join(IMG_SOURCE_DIR, file_name)
    target_path = os.path.join(TRAIN_DIR, file_name)
    
    if os.path.exists(source_path):
        shutil.copy(source_path, target_path)
        # Combine your mandatory style keywords with the CSV caption
        # full_caption = MANDATORY_KEYWORDS + str(caption)
        full_caption = str(caption)
        metadata.append({"file_name": file_name, "text": full_caption})

# Save the metadata file that the training script expects
with open(os.path.join(TRAIN_DIR, "metadata.jsonl"), 'w') as f:
    for entry in metadata:
        f.write(json.dumps(entry) + "\n")

print(f"Successfully prepared {len(metadata)} images in {TRAIN_DIR}")

Moving 3000 images to training folder...


100%|██████████| 3000/3000 [00:18<00:00, 162.85it/s]

Successfully prepared 3000 images in /kaggle/working/train_data





In [4]:
!accelerate launch --num_processes=1 train_text_to_image_lora_sdxl.py \
  --pretrained_model_name_or_path="segmind/SSD-1B" \
  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
  --train_data_dir="/kaggle/working/train_data" \
  --caption_column="text" \
  --resolution=512 \
  --mixed_precision="fp16" \
  --train_batch_size=1 \
  --gradient_accumulation_steps=8 \
  --learning_rate=5e-5 \
  --rank=16 \
  --max_train_steps=2000 \
  --checkpointing_steps=500 \
  --validation_prompt="16x16 pixel art of a small owl flying" \
  --validation_epochs=1 \
  --num_validation_images=4 \
  --output_dir="/kaggle/working/court_of_owls_lora" \
  --seed=42 \
  --report_to="tensorboard" \
  --dataloader_num_workers=2

The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.
Flax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.
tokenizer_config.json: 100%|███████████████████| 737/737 [00:00<00:00, 4.83MB/s]
vocab.json: 1.06MB [00:00, 27.8MB/s]
merges.txt: 525kB [00:00, 101MB/s]
special_tokens_map.json: 100%|█████████████████| 472/472 [00:00<00:00, 3.24MB/s]
tokenizer_config.json: 100%|███████████████████| 725/725 [00:00<00:00, 4.86MB/s]
special_tokens_map.json: 100%|█████████████████| 460/460 [00:00<00:00, 3.86MB/s]
config.json: 100%|█████████████████████████████| 560/560 [00

In [5]:
# import matplotlib.pyplot as plt
# from PIL import Image
# import glob
# import os

# # Check multiple possible locations for validation images
# possible_paths = [
#     '/kaggle/working/court_of_owls_lora/samples/*.png',
#     '/kaggle/working/court_of_owls_lora/*.png',
#     '/kaggle/working/court_of_owls_lora/**/*.png'
# ]

# sample_images = []
# for path in possible_paths:
#     sample_images.extend(glob.glob(path, recursive=True))

# sample_images = list(set(sample_images))  # Remove duplicates
# sample_images.sort(key=os.path.getmtime)

# if sample_images:
#     # Show the last 4 validation images
#     num_to_show = min(4, len(sample_images))
#     fig, axes = plt.subplots(1, num_to_show, figsize=(5*num_to_show, 5))
#     if num_to_show == 1:
#         axes = [axes]
    
#     for ax, img_path in zip(axes, sample_images[-num_to_show:]):
#         img = Image.open(img_path)
#         ax.imshow(img)
#         ax.axis('off')
#         ax.set_title(os.path.basename(img_path))
    
#     plt.tight_layout()
#     plt.show()
# else:
#     print("No validation images found yet.")
#     print("Check output directory contents:")
#     !ls -la /kaggle/working/court_of_owls_lora/