# üåπ SkyyRose LoRA Training

Train a custom LoRA on **604 exact product images** for precise generation.

**Requirements:**
- Google Colab with GPU (T4 free tier works)
- HuggingFace account for saving the model

**Runtime:** ~2-3 hours on T4 GPU

## 1Ô∏è‚É£ Setup Environment

In [None]:
# Check GPU
!nvidia-smi

# Install dependencies
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu118
!pip install -q diffusers[torch] transformers accelerate peft bitsandbytes
!pip install -q huggingface_hub safetensors pillow tqdm

In [None]:
# Login to HuggingFace (required to save model)
from huggingface_hub import notebook_login

notebook_login()

## 2Ô∏è‚É£ Download Training Data

In [None]:
import zipfile
from pathlib import Path

# Download the optimized training zip from HuggingFace
DATASET_URL = "https://huggingface.co/datasets/damBruh/skyyrose-lora-dataset-v3/resolve/main/training/skyyrose_training_data.zip"
DATA_DIR = Path("/content/training_data")

print("üì• Downloading training data...")
!wget -q -O /content/training_data.zip "{DATASET_URL}"

# Extract
DATA_DIR.mkdir(exist_ok=True)
with zipfile.ZipFile("/content/training_data.zip", "r") as zf:
    zf.extractall(DATA_DIR)

# Count images
images = list(DATA_DIR.glob("*.jpg")) + list(DATA_DIR.glob("*.png"))
print(f"‚úÖ Extracted {len(images)} training images")

## 3Ô∏è‚É£ Create Dataset

In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
from torch.utils.data import DataLoader, Dataset


class SkyyRoseDataset(Dataset):
    """Dataset for SkyyRose LoRA training."""

    def __init__(self, data_dir: Path, resolution: int = 512):
        self.resolution = resolution
        self.images = []
        self.captions = []

        # Find all images and their caption files
        for img_path in sorted(data_dir.glob("*.jpg")):
            caption_path = img_path.with_suffix(".txt")
            if caption_path.exists():
                caption = caption_path.read_text().strip()
            else:
                caption = "skyyrose luxury streetwear product"

            self.images.append(img_path)
            self.captions.append(caption)

        self.transform = T.Compose(
            [
                T.Resize((resolution, resolution), interpolation=T.InterpolationMode.LANCZOS),
                T.ToTensor(),
                T.Normalize([0.5], [0.5]),  # [-1, 1]
            ]
        )

        print(f"Loaded {len(self.images)} images")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert("RGB")
        return {
            "pixel_values": self.transform(image),
            "caption": self.captions[idx],
        }


# Create dataset
dataset = SkyyRoseDataset(DATA_DIR, resolution=512)
print(f"Sample caption: {dataset.captions[0][:80]}...")

## 4Ô∏è‚É£ Load SDXL Base Model

In [None]:
import gc

from diffusers import AutoencoderKL, UNet2DConditionModel
from transformers import CLIPTextModel, CLIPTokenizer

MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float16

print(f"üîß Loading SDXL on {device}...")

# Tokenizer and text encoder
tokenizer = CLIPTokenizer.from_pretrained(MODEL_ID, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(
    MODEL_ID, subfolder="text_encoder", torch_dtype=dtype
).to(device)
text_encoder.requires_grad_(False)

# VAE
vae = AutoencoderKL.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=dtype).to(device)
vae.requires_grad_(False)

# UNet (this is what we'll train LoRA on)
unet = UNet2DConditionModel.from_pretrained(MODEL_ID, subfolder="unet", torch_dtype=dtype)

print("‚úÖ Base model loaded")

## 5Ô∏è‚É£ Configure LoRA

In [None]:
from peft import LoraConfig, get_peft_model

# LoRA configuration
lora_config = LoraConfig(
    r=16,  # Rank
    lora_alpha=16,
    target_modules=["to_k", "to_q", "to_v", "to_out.0"],
    lora_dropout=0.05,
)

# Apply LoRA to UNet
unet = get_peft_model(unet, lora_config)
unet = unet.to(device)
unet.print_trainable_parameters()

print("‚úÖ LoRA configured")

## 6Ô∏è‚É£ Training Configuration

In [None]:
# Training hyperparameters
EPOCHS = 10
BATCH_SIZE = 1  # Keep at 1 for T4 GPU memory
LEARNING_RATE = 1e-4
SAVE_EVERY = 2  # Save checkpoint every N epochs

# Output directory
OUTPUT_DIR = Path("/content/skyyrose-lora-v3")
OUTPUT_DIR.mkdir(exist_ok=True)

# DataLoader
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

# Optimizer
optimizer = torch.optim.AdamW(unet.parameters(), lr=LEARNING_RATE, weight_decay=0.01)

print("üìä Training config:")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Learning rate: {LEARNING_RATE}")
print(f"  Total steps: {len(dataloader) * EPOCHS}")

## 7Ô∏è‚É£ Train! üöÄ

In [None]:
import json

from tqdm.auto import tqdm

print("üöÄ Starting SkyyRose LoRA Training...")
print("=" * 50)

best_loss = float("inf")
training_log = []

for epoch in range(EPOCHS):
    unet.train()
    epoch_loss = 0.0

    progress = tqdm(dataloader, desc=f"Epoch {epoch + 1}/{EPOCHS}")

    for batch in progress:
        pixel_values = batch["pixel_values"].to(device, dtype=dtype)
        captions = batch["caption"]

        # Encode text
        text_inputs = tokenizer(
            captions,
            padding="max_length",
            max_length=77,
            truncation=True,
            return_tensors="pt",
        )
        text_embeds = text_encoder(text_inputs.input_ids.to(device))[0]

        # Encode images to latents
        with torch.no_grad():
            latents = vae.encode(pixel_values).latent_dist.sample()
            latents = latents * vae.config.scaling_factor

        # Add noise (diffusion forward process)
        noise = torch.randn_like(latents)
        timesteps = torch.randint(0, 1000, (latents.shape[0],), device=device)

        # Simplified noise schedule
        alpha_t = 1 - (timesteps.float() / 1000).view(-1, 1, 1, 1)
        noisy_latents = alpha_t.sqrt() * latents + (1 - alpha_t).sqrt() * noise

        # Predict noise
        noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states=text_embeds).sample

        # MSE loss
        loss = torch.nn.functional.mse_loss(noise_pred, noise)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        progress.set_postfix(loss=f"{loss.item():.4f}")

    # Epoch stats
    avg_loss = epoch_loss / len(dataloader)
    training_log.append({"epoch": epoch + 1, "loss": avg_loss})
    print(f"Epoch {epoch + 1}/{EPOCHS} - Loss: {avg_loss:.4f}")

    # Save best checkpoint
    if avg_loss < best_loss:
        best_loss = avg_loss
        unet.save_pretrained(OUTPUT_DIR / "best")
        print(f"  üíæ Saved best checkpoint (loss: {best_loss:.4f})")

    # Periodic save
    if (epoch + 1) % SAVE_EVERY == 0:
        unet.save_pretrained(OUTPUT_DIR / f"checkpoint-{epoch + 1}")

    # Clear memory
    gc.collect()
    torch.cuda.empty_cache()

# Save final model
print("\nüíæ Saving final model...")
unet.save_pretrained(OUTPUT_DIR / "final")

# Save training log
with open(OUTPUT_DIR / "training_log.json", "w") as f:
    json.dump(
        {
            "epochs": EPOCHS,
            "best_loss": best_loss,
            "trigger_word": "skyyrose",
            "dataset": "damBruh/skyyrose-lora-dataset-v3",
            "images": len(dataset),
            "log": training_log,
        },
        f,
        indent=2,
    )

print("\n" + "=" * 50)
print("‚úÖ TRAINING COMPLETE!")
print(f"Best loss: {best_loss:.4f}")
print(f"Model saved to: {OUTPUT_DIR}")

## 8Ô∏è‚É£ Upload to HuggingFace Hub

In [None]:
from huggingface_hub import HfApi, create_repo

REPO_ID = "damBruh/skyyrose-lora-v3"  # Change to your username/repo

print(f"üì§ Uploading to HuggingFace: {REPO_ID}...")

# Create repo if needed
try:
    create_repo(REPO_ID, exist_ok=True, repo_type="model")
except Exception as e:
    print(f"Repo exists or error: {e}")

# Upload the best checkpoint
api = HfApi()
api.upload_folder(
    folder_path=str(OUTPUT_DIR / "best"),
    repo_id=REPO_ID,
    repo_type="model",
)

# Upload training log
api.upload_file(
    path_or_fileobj=str(OUTPUT_DIR / "training_log.json"),
    path_in_repo="training_log.json",
    repo_id=REPO_ID,
    repo_type="model",
)

print(f"\n‚úÖ Uploaded to: https://huggingface.co/{REPO_ID}")

## 9Ô∏è‚É£ Test Generation

In [None]:
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline

print("üé® Testing generation with trained LoRA...")

# Load pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    variant="fp16",
).to(device)

# Load trained LoRA
pipe.load_lora_weights(str(OUTPUT_DIR / "best"))

# Test prompts
test_prompts = [
    "skyyrose signature collection lavender rose beanie, luxury streetwear, product photo",
    "skyyrose black_rose collection sherpa jacket, dark elegance, studio lighting",
    "skyyrose love_hurts collection windbreaker, bold design, professional photo",
]

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, prompt in enumerate(test_prompts):
    print(f"Generating: {prompt[:50]}...")
    image = pipe(
        prompt,
        num_inference_steps=30,
        guidance_scale=7.5,
    ).images[0]

    axes[i].imshow(image)
    axes[i].set_title(prompt.split(",")[0][:30])
    axes[i].axis("off")

    # Save
    image.save(OUTPUT_DIR / f"test_{i + 1}.png")

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "test_results.png", dpi=150)
plt.show()

print("\n‚úÖ Test images saved!")

## üéâ Done!

Your SkyyRose LoRA is trained and uploaded to HuggingFace.

**Usage:**
```python
from diffusers import StableDiffusionXLPipeline

pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.load_lora_weights("damBruh/skyyrose-lora-v3")

image = pipe("skyyrose luxury streetwear, [your product description]").images[0]
```

**Trigger word:** `skyyrose`