In [1]:
!pip install denoising_diffusion_pytorch

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
from PIL import Image
import shutil
import os
import random
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from pathlib import Path

def prepare_sysu_shape_dataset(
    dataset_path,
    train_path,
    eval_path,
    image_size=128,
    eval_ratio=0.2,
    batch_size=32
):
    """
    Prepare and preprocess the SYSU-Shape dataset for DDPM training, 
    saving resized images.
    
    Args:
        dataset_path (str): Path to the root of the SYSU-Shape dataset.
        train_path (str): Path to store the processed training data.
        eval_path (str): Path to store the processed evaluation data.
        image_size (int): Target size for resizing images.
        eval_ratio (float): Proportion of images to use for evaluation.
        batch_size (int): Batch size for DataLoader.
    
    Returns:
        DataLoader: DataLoader for training and evaluation datasets.
    """
    categories = ['car', 'boat', 'motorbike', 'airplane', 'bicycle']
    
    # Clear existing directories to avoid appending
    if os.path.exists(train_path):
        shutil.rmtree(train_path)  # Delete train_path directory
    if os.path.exists(eval_path):
        shutil.rmtree(eval_path)  # Delete eval_path directory
    
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(eval_path, exist_ok=True)

    # separate the process of resize and toTensor and normalize, 
    # therefore, we let the images stored also being resized
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size))  # Resize to fixed size
    ])
    
    for category in categories:
        image_dir = os.path.join(dataset_path, category, 'images')
        images = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]

        random.shuffle(images)
        eval_size = int(len(images) * eval_ratio)

        # Split images
        eval_images = images[:eval_size]
        train_images = images[eval_size:]

        # Process and save resized images
        for img_set, output_dir in [(train_images, train_path), (eval_images, eval_path)]:
            category_path = os.path.join(output_dir, category)
            os.makedirs(category_path, exist_ok=True)
            
            for img in img_set:
                img_path = os.path.join(image_dir, img)
                with Image.open(img_path) as image:
                    resized_image = transform(image)  # Apply resizing transformation
                    resized_image.save(os.path.join(category_path, img))  # Save resized image
    
    # Clean up unwanted folders in train and eval directories
    for folder in [train_path, eval_path]:
        for subdir in os.listdir(folder):
            if subdir not in categories:  # If folder is not in the fixed categories
                subdir_path = os.path.join(folder, subdir)
                if os.path.isdir(subdir_path):  # Ensure it's a directory
                    print(f"Removing unwanted folder: {subdir_path}")
                    shutil.rmtree(subdir_path)
                    
    # Define preprocessing transformations for DataLoader
    loader_transform = transforms.Compose([
        transforms.ToTensor(),                        # Convert to PyTorch tensor
        transforms.Normalize((0.5,), (0.5,))          # Normalize to [-1, 1]
    ])

    # Create DataLoaders for training and evaluation datasets
    train_dataset = datasets.ImageFolder(root=train_path, transform=loader_transform)
    eval_dataset = datasets.ImageFolder(root=eval_path, transform=loader_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, eval_loader


In [3]:
# call the method
train_loader, eval_loader = prepare_sysu_shape_dataset(
    dataset_path="./sysu-shape-dataset",
    train_path="./processed-datasets/train_data",
    eval_path="./processed-datasets/eval_data",
    eval_ratio=0.2,
    image_size=128,
    batch_size=32
)

In [4]:
from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer

# Define the U-Net model
model = Unet(
    dim=64,  # Base feature map size
    dim_mults=(1, 2, 4, 8)  # U-Net mults
)

# Define the diffusion process
diffusion = GaussianDiffusion(
    model,
    image_size=128,         # Image size
    timesteps=1000,         # Total diffusion timesteps
    sampling_timesteps=250  # Sampling timesteps (for faster sampling)
)

# Trainer configuration
trainer = Trainer(
    diffusion,
    './processed-datasets/train_data',
    train_lr = 8e-5,
    train_num_steps = 700000,         # total training steps
    gradient_accumulate_every = 2,    # gradient accumulation steps
    ema_decay = 0.995,                # exponential moving average decay
    amp = True,                       # turn on mixed precision
    calculate_fid = True              # whether to calculate fid during training
)

trainer.train()

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


  0%|          | 0/700000 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_185834/2543318879.py", line 29, in <module>
    trainer.train()
  File "/home/kyq5pg/.local/lib/python3.10/site-packages/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py", line 1062, in train
    self.accelerator.backward(loss)
  File "/home/kyq5pg/.local/lib/python3.10/site-packages/accelerate/accelerator.py", line 2237, in backward
    self.scaler.scale(loss).backward(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/_tensor.py", line 534, in backward
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py", line 267, in backward
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py", line 767, in _engine_run_backward
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Trace

### Try to use multiple GPUs

In [None]:
import torch
from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer

# Define the U-Net model
model = Unet(
    dim=64,  # Base feature map size
    dim_mults=(1, 2, 4, 8)  # U-Net mults
)

# Define the diffusion process (wrap the unwrapped model)
diffusion = GaussianDiffusion(
    model,
    image_size=128,         # Image size
    timesteps=1000,         # Total diffusion timesteps
    sampling_timesteps=250  # Sampling timesteps (for faster sampling)
)

# Wrap model with DataParallel after creating the diffusion instance
diffusion.model = torch.nn.DataParallel(diffusion.model).cuda()

# Trainer configuration
trainer = Trainer(
    diffusion,
    './processed-datasets/train_data',
    train_lr=8e-5,
    train_num_steps=700000,         # total training steps
    gradient_accumulate_every=2,    # gradient accumulation steps
    ema_decay=0.995,                # exponential moving average decay
    amp=True,                       # turn on mixed precision
    calculate_fid=True              # whether to calculate fid during training
)

# Start training
trainer.train()
