### 1. Load MVTec Dataset (train + test)

In [None]:
ds_root = "/kaggle/input/mvtec-ad/carpet"
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# transformation (resize, normalize)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Load TRAIN set (normal images only)
train_dataset = datasets.ImageFolder(
    root="/kaggle/input/mvtec-ad/carpet/train",
    transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Load TEST set (normal + anomalous images)
test_dataset = datasets.ImageFolder(
    root="/kaggle/input/mvtec-ad/carpet/test",
    transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print(f"Train images: {len(train_dataset)}, Test images: {len(test_dataset)}")
print(f"Classes: {train_dataset.classes}")


### 2. Load Ground Truth Mask (MVTec)

In [None]:
from PIL import Image

def load_ground_truth_mask(image_name, gt_dir):
    mask_path = os.path.join(
        "/kaggle/input/mvtec-ad/carpet/ground_truth",
        image_name.replace(".png", "_mask.png")
    )
    return Image.open(mask_path)


### 3. Fine-Tuning DDPM UNet on Normal Images

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from diffusers import DDPMScheduler, UNet2DModel
from tqdm import tqdm

# CONFIGURATION
DATASET_ROOT = "/kaggle/input/mvtec-ad/carpet/train"
OUTPUT_DIR = "./fine_tuned_unet"
BATCH_SIZE = 2
IMAGE_SIZE = 128
EPOCHS = 30
LR = 3e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# DATASET
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])
train_dataset = datasets.ImageFolder(root=DATASET_ROOT, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f"Loaded {len(train_dataset)} training images from {DATASET_ROOT}")

# LOAD PRETRAINED MODEL
unet = UNet2DModel.from_pretrained(
    "google/ddpm-celebahq-256", use_safetensors=False
).to(DEVICE)

unet.enable_gradient_checkpointing()

# Freeze all params first
for param in unet.parameters():
    param.requires_grad = False

# Unfreeze last UNet block + mid_block
for name, param in unet.named_parameters():
    if ("up_blocks.2" in name or "up_blocks.3" in name 
        or "mid_block" in name or "down_blocks.3" in name):
        param.requires_grad = True

print("Fine-tuning last UNet block + mid_block only.")

# SCHEDULER + OPTIMIZER
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, unet.parameters()), lr=LR)
criterion = nn.MSELoss()

# Enable mixed precision
scaler = torch.cuda.amp.GradScaler()

# TRAINING LOOP
unet.train()
for epoch in range(EPOCHS):
    total_loss = 0.0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for images, _ in loop:
        images = images.to(DEVICE)
        noise = torch.randn_like(images)
        timesteps = torch.randint(
            0, noise_scheduler.config.num_train_timesteps,
            (images.shape[0],), device=DEVICE
        )
        noisy_images = noise_scheduler.add_noise(images, noise, timesteps)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():  
            noise_pred = unet(noisy_images, timesteps).sample
            loss = criterion(noise_pred, noise)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{EPOCHS}] | Avg Loss: {avg_loss:.6f}")

# SAVE MODEL
os.makedirs(OUTPUT_DIR, exist_ok=True)
unet.save_pretrained(OUTPUT_DIR)
print(f"Fine-tuned UNet saved at {OUTPUT_DIR}")


### 4. Load Stable Diffusion UNet & Select Layers to Fine-Tune

In [None]:
from diffusers import StableDiffusionPipeline

pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
unet = pipeline.unet

for param in unet.parameters():
    param.requires_grad = False

for name, param in unet.named_parameters():
    if "up_blocks.3" in name or "mid_block" in name:
        param.requires_grad = True


### 5. Running Reverse Diffusion for Reconstruction

In [None]:
import os
import torch
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.utils.data import DataLoader
from torchvision import datasets
from diffusers import DDPMScheduler, UNet2DModel
from tqdm import tqdm
import numpy as np
from PIL import Image

# CONFIGURATION
TEST_ROOT = "/kaggle/input/mvtec-ad/carpet/test"
MODEL_PATH = "./fine_tuned_unet"
OUTPUT_DIR = "./reconstructions"
BATCH_SIZE = 2
IMAGE_SIZE = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# LOAD TEST DATA
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])
test_dataset = datasets.ImageFolder(root=TEST_ROOT, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Loaded {len(test_dataset)} test images from {TEST_ROOT}")

# LOAD MODEL + SCHEDULER
unet = UNet2DModel.from_pretrained(MODEL_PATH).to(DEVICE)
unet.eval()
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

# REVERSE DIFFUSION FUNCTION
@torch.no_grad()
def reconstruct_images(images, steps=250):
    noisy = images.clone()
    timesteps = torch.linspace(
        noise_scheduler.config.num_train_timesteps - 1, 0,
        steps, dtype=torch.long, device=DEVICE
    )
    for t in timesteps:
        model_output = unet(noisy, t).sample
        noisy = noise_scheduler.step(model_output, t, noisy).prev_sample
    return noisy

# INFERENCE
anomaly_scores = []
for i, (images, labels) in enumerate(tqdm(test_loader, desc="Testing")):
    images = images.to(DEVICE)

    # Reconstruct images
    recon = reconstruct_images(images)

    # Pixel-wise reconstruction error
    pixel_error = ((images - recon) ** 2).mean(dim=[1, 2, 3])
    anomaly_scores.extend(pixel_error.cpu().numpy())

    # Save comparison image
    if i == 0:
        vutils.save_image(torch.cat([images, recon], dim=0),
                          os.path.join(OUTPUT_DIR, "comparison.png"),
                          nrow=BATCH_SIZE, normalize=True)

# RESULTS
anomaly_scores = np.array(anomaly_scores)
print(f"Computed anomaly scores for {len(anomaly_scores)} images.")
print(f"Mean Score: {anomaly_scores.mean():.6f}, Max Score: {anomaly_scores.max():.6f}")

# SAVE CSV
import pandas as pd
pd.DataFrame({"score": anomaly_scores, "label": test_dataset.targets}).to_csv(
    os.path.join(OUTPUT_DIR, "anomaly_scores.csv"), index=False
)
print(f"Saved anomaly scores to {OUTPUT_DIR}/anomaly_scores.csv")
