In [None]:
!pip install datasets

In [None]:
import os
import json
from datasets import load_dataset
from PIL import Image
from tqdm import tqdm

TRAIN_DIR = "/kaggle/working/plan_b_train_data"
TARGET_SIZE = 1024  # Upscale target for SSD-1B
os.makedirs(TRAIN_DIR, exist_ok=True)

# 1. Download directly from Hugging Face
print("Downloading pixel-art-nouns-2k from Hugging Face...")
dataset = load_dataset("jiovine/pixel-art-nouns-2k", split="train")

metadata = []
print(f"Upscaling {len(dataset)} images using Nearest Neighbor...")

for i, item in enumerate(tqdm(dataset)):
    try:
        # HF datasets automatically load images as PIL objects
        img = item['image']
        
        # The text column in this specific dataset is usually 'text'
        caption = item['text'] 
        
        if img.mode != "RGB":
            img = img.convert("RGB")
            
        # 2. Crucial Step: Nearest Neighbor upscaling to keep pixels sharp
        img_upscaled = img.resize((TARGET_SIZE, TARGET_SIZE), resample=Image.NEAREST)
        
        filename = f"noun_{i}.png"
        img_upscaled.save(os.path.join(TRAIN_DIR, filename), "PNG")
        
        # Add your mandatory style trigger (optional but recommended)
        # full_caption = "pixel art, 16-bit, sprite, " + str(caption)
        full_caption = caption
        metadata.append({"file_name": filename, "text": full_caption})
        
    except Exception as e:
        print(f"Skipping image {i} due to error: {e}")

# 3. Save the JSONL file for the trainer
with open(os.path.join(TRAIN_DIR, "metadata.jsonl"), 'w') as f:
    for entry in metadata:
        f.write(json.dumps(entry) + "\n")
        
print(f"Plan B Data Ready! Saved to {TRAIN_DIR}")

In [None]:
# import matplotlib.pyplot as plt
# from PIL import Image
# import os

# # Path to your newly processed Plan B dataset
# TRAIN_DIR = "/kaggle/working/plan_b_train_data"

# # Grab the very first image we processed
# sample_image_path = os.path.join(TRAIN_DIR, "noun_0.png")

# if os.path.exists(sample_image_path):
#     img = Image.open(sample_image_path)
    
#     # Display the image
#     plt.figure(figsize=(8, 8))
#     plt.imshow(img)
#     plt.title(f"Upscaled Resolution: {img.size[0]}x{img.size[1]} pixels\n(Notice the crisp edges!)", fontsize=14)
#     plt.axis('off') # Hides the axis numbers for a cleaner look
#     plt.show()
# else:
#     print("Image not found. Make sure the Plan B Data Prep cell has finished running completely!")

In [None]:
# Install the necessary libraries
!pip install -U -q diffusers accelerate transformers peft bitsandbytes
!pip install -q wandb # For metric identification and tracking

# Download the official training script for SDXL/SSD-1B
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/train_text_to_image_lora_sdxl.py

In [None]:
# 1. Uninstall existing version to avoid conflicts
!pip uninstall -y diffusers

# 2. Install the latest "dev" version from source
!pip install git+https://github.com/huggingface/diffusers

# 3. Ensure other dependencies are up to date for SSD-1B
!pip install -U -q accelerate transformers peft bitsandbytes

In [None]:
!accelerate launch --num_processes=1 train_text_to_image_lora_sdxl.py \
  --pretrained_model_name_or_path="segmind/SSD-1B" \
  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
  --train_data_dir="/kaggle/working/plan_b_train_data" \
  --caption_column="text" \
  --resolution=1024 \
  --mixed_precision="fp16" \
  --train_batch_size=1 \
  --gradient_accumulation_steps=4 \
  --learning_rate=1e-5 \
  --lr_scheduler="cosine" \
  --lr_warmup_steps=100 \
  --rank=32 \
  --max_train_steps=1500 \
  --checkpointing_steps=300 \
  --validation_prompt="a character with square black glasses, a hotdog-shaped head and a peachy-colored body on a warm background" \
  --validation_epochs=1 \
  --num_validation_images=4 \
  --output_dir="/kaggle/working/court_of_owls_lora_plan_b" \
  --seed=42 \
  --report_to="tensorboard" \
  --dataloader_num_workers=2 \
  --noise_offset=0.1 \
  --train_text_encoder

In [None]:
# import torch
# from diffusers import DiffusionPipeline
# import matplotlib.pyplot as plt
# import os

# # Load the base model
# pipe = DiffusionPipeline.from_pretrained(
#     "segmind/SSD-1B",
#     torch_dtype=torch.float16,
#     variant="fp16"
# )
# pipe.to("cuda")

# # Load your trained LoRA weights
# pipe.load_lora_weights("/kaggle/working/court_of_owls_lora_plan_b/pytorch_lora_weights.safetensors")

# # Generate test images
# prompts = [
# "a character with circular blue glasses, a square-shaped head and a teal-colored body on a cool background"
# ]

# images = []
# for prompt in prompts:
#     image = pipe(
#         prompt=prompt,
#         num_inference_steps=25,
#         guidance_scale=7.5,
#         generator=torch.Generator("cuda").manual_seed(42)
#     ).images[0]
#     images.append(image)

# # Display results
# # fig, axes = plt.subplots(1, 4, figsize=(20, 5))
# # for ax, img, prompt in zip(axes, images, prompts):
# #     ax.imshow(img)
# #     ax.set_title(prompt, fontsize=10)
# #     ax.axis('off')
# # plt.tight_layout()
# plt.figure(figsize=(8, 8))
# plt.imshow(image)
# plt.title("LoRA Generalization Test", fontsize=16, fontweight='bold')
# plt.axis('off') # Hides the grid numbers for a clean presentation
# plt.tight_layout()
# plt.show()

# # Create output directory and save
# os.makedirs("/kaggle/working/output", exist_ok=True)
# plt.savefig("/kaggle/working/output/validation_results.png")
# plt.show()

# print("Images saved to: /kaggle/working/output/validation_results.png")

In [None]:
import torch
from diffusers import DiffusionPipeline
import matplotlib.pyplot as plt
import os

# 1. Load the base model
pipe = DiffusionPipeline.from_pretrained(
    "segmind/SSD-1B",
    torch_dtype=torch.float16,
    variant="fp16"
)
pipe.to("cuda")

# 2. Load your trained LoRA weights (directory, not single file!)
lora_path = "/kaggle/working/court_of_owls_lora_plan_b"
if os.path.exists(lora_path):
    pipe.load_lora_weights(lora_path)
    print("LoRA weights loaded successfully.")
else:
    print("Warning: LoRA weights not found at the specified path!")

# 3. No style trigger needed — captions were saved without prefix
prompts = [
    "a character with square black glasses, a hotdog-shaped head and a peachy-colored body on a warm background",
    "a character with square black sunglasses, a shower-shaped head and a blue-colored body on a cool background",
    "a character with square black sunglasses, a tornado-shaped head and a computerblue-colored body on a cool background",
]

images = []
for prompt in prompts:
    print(f"Generating: {prompt[:80]}...")
    image = pipe(
        prompt=prompt,
        num_inference_steps=40,
        guidance_scale=8.0,
        generator=torch.Generator("cuda").manual_seed(42)
    ).images[0]
    images.append(image)

# 4. Display results side by side
fig, axes = plt.subplots(1, len(images), figsize=(7 * len(images), 7))
if len(images) == 1:
    axes = [axes]
for ax, img, prompt in zip(axes, images, prompts):
    ax.imshow(img)
    ax.set_title(prompt, fontsize=10, wrap=True)
    ax.axis('off')
plt.suptitle("LoRA Pixel Art Generation", fontsize=16, fontweight='bold')
plt.tight_layout()

# 5. Save
os.makedirs("/kaggle/working/output", exist_ok=True)
save_path = "/kaggle/working/output/validation_results.png"
plt.savefig(save_path, dpi=150, bbox_inches='tight')
plt.show()

print(f"Images saved to: {save_path}")