In [3]:
# Install necessary libraries
!pip install diffusers datasets transformers accelerate scipy ftfy

import datasets
import diffusers
import huggingface_hub
import torch
from diffusers import DDPMPipeline
from PIL import Image
from datasets import load_dataset
from torchvision import transforms
from diffusers import DDPMScheduler
from diffusers import UNet2DModel
from torch.nn import functional as F
from matplotlib import pyplot as plt

# Suppress warnings
datasets.logging.set_verbosity_error()
diffusers.logging.set_verbosity_error()
huggingface_hub.logging.set_verbosity_error()

# Load your own dataset
# dataset = load_dataset("your-dataset-here", split="train")  # Example dataset, replace with your dataset
dataset = load_dataset("microsoft/cats_vs_dogs", split="train")


# Create a subset of the first 1024 images
dataset = dataset.select(range(1024))

# Check the length of the subset
print(f"Length of the subset: {len(dataset)}")

# Preprocessing
image_size = 64
preprocess = transforms.Compose(
    [
        transforms.Resize((image_size, image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)

def transform(examples):
    examples["pixel_values"] = [preprocess(image.convert("RGB")) for image in examples["image"]]
    return examples

# Apply the transform to the dataset
dataset.set_transform(transform)

# Create a custom dataset class to return tensors directly
class TensorDataset(torch.utils.data.Dataset):
    def __init__(self, hf_dataset):
        self.hf_dataset = hf_dataset

    def __len__(self):
        return len(self.hf_dataset)

    def __getitem__(self, idx):
        return self.hf_dataset[idx]["pixel_values"]  # Return tensor directly

# Wrap the dataset in the custom class
tensor_dataset = TensorDataset(dataset)

# Create dataloader
batch_size = 16
train_dataloader = torch.utils.data.DataLoader(tensor_dataset, batch_size=batch_size, shuffle=True)

# Model and scheduler
model = UNet2DModel(
    sample_size=image_size,
    in_channels=3,
    out_channels=3,
    layers_per_block=2,
    block_out_channels=(64, 128, 256, 512),  # Number of channels in each block
    down_block_types=(
        "DownBlock2D",     # Downsampling blocks
        "DownBlock2D",
        "AttnDownBlock2D", # Downsampling blocks with attention
        "AttnDownBlock2D",
    ),
    up_block_types=(
        "AttnUpBlock2D",   # Upsampling blocks with attention
        "AttnUpBlock2D",
        "UpBlock2D",       # Upsampling blocks
        "UpBlock2D"),
).to("cuda")

noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

# Training loop
num_epochs = 50  # Adjust as needed

for epoch in range(num_epochs):
    for step, batch in enumerate(train_dataloader):
        clean_images = batch.to("cuda")  # Move batch to GPU
        noise = torch.randn(clean_images.shape).to("cuda")
        timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (batch_size,), device="cuda").long()
        noisy_images = noise_scheduler.add_noise(clean_images, noise, timesteps)

        noise_pred = model(noisy_images, timesteps).sample
        loss = F.mse_loss(noise_pred, noise)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print(f"Epoch {epoch}, Step {step}: Loss = {loss.item()}")

# Save the model
# Replace with your desired path
torch.save(model.state_dict(), "animal_diffusion_model.pth")

Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/391M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23410 [00:00<?, ? examples/s]

Length of the subset: 1024
Epoch 0, Step 0: Loss = 1.1114991903305054
Epoch 1, Step 0: Loss = 0.3290935456752777
Epoch 2, Step 0: Loss = 0.23672091960906982
Epoch 3, Step 0: Loss = 0.10348746180534363
Epoch 4, Step 0: Loss = 0.09593629091978073
Epoch 5, Step 0: Loss = 0.09373729676008224
Epoch 6, Step 0: Loss = 0.05731966719031334
Epoch 7, Step 0: Loss = 0.09645897150039673
Epoch 8, Step 0: Loss = 0.09426860511302948
Epoch 9, Step 0: Loss = 0.07997308671474457
Epoch 10, Step 0: Loss = 0.04628625512123108
Epoch 11, Step 0: Loss = 0.03488966450095177
Epoch 12, Step 0: Loss = 0.0447336845099926
Epoch 13, Step 0: Loss = 0.059439074248075485
Epoch 14, Step 0: Loss = 0.05155950039625168
Epoch 15, Step 0: Loss = 0.037434257566928864
Epoch 16, Step 0: Loss = 0.02944430522620678
Epoch 17, Step 0: Loss = 0.034408509731292725
Epoch 18, Step 0: Loss = 0.06104278191924095
Epoch 19, Step 0: Loss = 0.0354158878326416
Epoch 20, Step 0: Loss = 0.05092674493789673
Epoch 21, Step 0: Loss = 0.017045926302

In [5]:
import torch
from diffusers import DDPMScheduler, UNet2DModel
import matplotlib.pyplot as plt
import os

# Recreate model (must match training)
model = UNet2DModel(
    sample_size=64,
    in_channels=3,
    out_channels=3,
    layers_per_block=2,
    block_out_channels=(64, 128, 256, 512),
    down_block_types=(
        "DownBlock2D",
        "DownBlock2D",
        "AttnDownBlock2D",
        "AttnDownBlock2D",
    ),
    up_block_types=(
        "AttnUpBlock2D",
        "AttnUpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
    ),
).to("cuda")

# Load trained weights
model.load_state_dict(torch.load("animal_diffusion_model.pth"))
model.eval()

# Scheduler
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

# Generate images function
def generate_images(num_images=50):
    with torch.no_grad():
        noise = torch.randn((num_images, 3, 64, 64)).to("cuda")
        images = noise

        for t in noise_scheduler.timesteps:
            noise_pred = model(images, t).sample
            images = noise_scheduler.step(noise_pred, t, images).prev_sample

        images = images.detach().cpu()
        images = (images * 0.5) + 0.5
        images = torch.clamp(images, 0, 1)
        return images

# Generate 50 images
generated_images = generate_images(50)

# Save images
os.makedirs("images", exist_ok=True)

for i, img in enumerate(generated_images):
    img = img.permute(1, 2, 0).numpy()
    plt.imsave(f"images/animal_{i}.png", img)

print("Done! 50 images saved in 'images/' folder.")


Done! 50 images saved in 'images/' folder.


In [6]:
import shutil

shutil.make_archive("images_folder", 'zip', "images")

print("Folder zipped!")


Folder zipped!
