<a href="https://colab.research.google.com/github/Rishardmunene/Stable-Diffusion-test/blob/SDXL/Fine_tuned_SDXL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install PyTorch
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Verify installation
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Current device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

Looking in indexes: https://download.pytorch.org/whl/cu121
CUDA available: True
Current device: Tesla T4


In [4]:
# Uninstall existing PyTorch
!pip3 uninstall torch torchvision torchaudio

# Install PyTorch with CUDA support
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Found existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Would remove:
    /usr/local/bin/convert-caffe2-to-onnx
    /usr/local/bin/convert-onnx-to-caffe2
    /usr/local/bin/torchfrtrace
    /usr/local/bin/torchrun
    /usr/local/lib/python3.10/dist-packages/functorch/*
    /usr/local/lib/python3.10/dist-packages/torch-2.5.1+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torch/*
    /usr/local/lib/python3.10/dist-packages/torchgen/*
Proceed (Y/n)? y
Y
  Successfully uninstalled torch-2.5.1+cu121
Found existing installation: torchvision 0.20.1+cu121
Uninstalling torchvision-0.20.1+cu121:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/torchvision-0.20.1+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libcudart.7ec1eba6.so.12
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libjpeg.ceea7512.so.62
    /usr/local/lib/python3.10/dist-packages/torchvision.libs/libnvjpeg.f00ca762.so.12
    /usr/local

In [1]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")

CUDA available: True
CUDA version: 12.1
Device count: 1
Current device: 0
Device name: Tesla T4


In [1]:
!pip install --upgrade diffusers transformers accelerate
!pip install torch torchvision
!pip install bitsandbytes



In [2]:
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers import StableDiffusionPipeline, DDPMScheduler
from diffusers.optimization import get_scheduler
import os
import requests
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset

In [3]:
class DreamBoothDataset(Dataset):
    def __init__(self, instance_data_root, instance_prompt, tokenizer, size=1024):
        self.size = size
        self.tokenizer = tokenizer
        self.instance_prompt = instance_prompt
        self.instance_data_root = instance_data_root
        self.image_paths = [os.path.join(instance_data_root, file_path)
                           for file_path in os.listdir(instance_data_root)]

        self.num_instance_images = len(self.image_paths)
        self._length = self.num_instance_images

        self.image_transforms = transforms.Compose([
            transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(size),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])

    def __len__(self):
        return self._length

    def __getitem__(self, index):
        image = Image.open(self.image_paths[index % self.num_instance_images])
        if not image.mode == "RGB":
            image = image.convert("RGB")
        image = self.image_transforms(image)

        example = {
            "instance_images": image,
            "instance_prompt_ids": self.tokenizer(
                self.instance_prompt,
                padding="do_not_pad",
                truncation=True,
                max_length=self.tokenizer.model_max_length,
            ).input_ids,
        }
        return example

In [4]:
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
pipeline = StableDiffusionXLPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    use_safetensors=True
)

# Enable memory optimizations
pipeline.enable_model_cpu_offload()
pipeline.enable_vae_slicing()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [5]:
# Training configuration
instance_prompt = "a photo of a misty mountain lake under moonlight, calm water surface, pine trees, detailed, natural, high quality"
output_dir = "dreambooth-model"
instance_data_dir = "instance_images"

# Hyperparameters
learning_rate = 2e-6
max_train_steps = 400
train_batch_size = 1
gradient_accumulation_steps = 1

# Create dataset
train_dataset = DreamBoothDataset(
    instance_data_root=instance_data_dir,
    instance_prompt=instance_prompt,
    tokenizer=pipeline.tokenizer,
)

In [6]:
print(torch.cuda.is_available())

True


In [None]:
# Initialize pipeline first (outside function)
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
pipeline = StableDiffusionXLPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    use_safetensors=True
)

def train_dreambooth(pipeline):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Move pipeline to device
    pipeline = pipeline.to(device)

    # Initialize optimizer
    optimizer = torch.optim.AdamW(pipeline.unet.parameters(), lr=learning_rate)

    for epoch in range(max_train_steps):
        for batch in train_dataset:
            prompt_ids = batch["instance_prompt_ids"].to(device)
            images = batch["instance_images"].to(device)

            with torch.amp.autocast(device_type=device):
                noise = torch.randn_like(images)
                timesteps = torch.randint(0, pipeline.scheduler.num_train_timesteps, (images.shape[0],), device=device)
                noisy_images = pipeline.scheduler.add_noise(images, noise, timesteps)
                model_output = pipeline.unet(noisy_images, timesteps, prompt_ids)
                loss = torch.nn.functional.mse_loss(model_output.sample, noise)

            loss = loss / gradient_accumulation_steps
            loss.backward()

            if (epoch + 1) % gradient_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}, Loss: {loss.item()}")

        if (epoch + 1) % 100 == 0:
            pipeline.save_pretrained(f"{output_dir}/checkpoint-{epoch}")
            print(f"Saved checkpoint at epoch {epoch}")

# Run training with pipeline parameter
train_dreambooth(pipeline)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]