In [None]:
import torch
from torch.utils.data import DataLoader
from model import LatentDiffusionModel
from data import LandscapeDataset, transform
from scheduler import NoiseScheduler
import torch.optim as optim
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMAGE_SIZE = (64, 64)
LATENT_DIM = 512
TIMESTEPS = 1000
BATCH_SIZE = 32
LR = 1e-4
EPOCHS = 100

model = LatentDiffusionModel(
    input_channels=3,
    latent_dim=LATENT_DIM,
    image_size=IMAGE_SIZE,
    num_heads=4
).to(DEVICE)

scheduler = NoiseScheduler(T=TIMESTEPS, latent_dim=LATENT_DIM).to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=LR)
dataset = LandscapeDataset(root_dir='./Landscape Classification/Training Data', transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

model.train()
for epoch in range(EPOCHS):
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for batch in progress_bar:
        x_0 = batch.to(DEVICE)
        
        t = scheduler.sample_timesteps(x_0.shape[0])
        x_t, noise = scheduler.add_noise(x_0, t)
        
        latent = scheduler.get_latent(t)
        predicted_noise = model(x_t, latent)
        
        loss = torch.nn.functional.mse_loss(predicted_noise, noise)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        progress_bar.set_postfix(loss=loss.item())

    torch.save(model.state_dict(), f"ldm_epoch_{epoch+1}.pth")
    # throws error due to large model size ...

Epoch 1/100:   0%|                                                                             | 0/313 [00:00<?, ?it/s]

torch.Size([32, 256, 8, 8]) torch.Size([32, 256, 16, 16]) torch.Size([32, 512])


Epoch 1/100:   0%|                                                                             | 0/313 [00:28<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 11.82 GiB is allocated by PyTorch, and 59.34 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)