### DeeplabV3

In [None]:
# PGD for MMDetection 3.x (DeeplabV3, COCO)
import os
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image

import cv2
from skimage.metrics import peak_signal_noise_ratio as compare_psnr
from skimage.metrics import structural_similarity as compare_ssim

import mmseg
from mmengine.config import Config
from mmseg.utils import register_all_modules
from mmseg.apis import init_model

###############################################################################
# 1. Basic Setup
###############################################################################
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

set_seed()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")

CONFIG_FILE = 'C:/Users/heheh/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py'
CHECKPOINT_FILE = 'C:/Users/heheh/mmsegmentation/checkpoints/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth'
IMAGE_DIR = "C:/Users/heheh/val2017/val2017"
MASK_DIR = "C:/Users/heheh/mmsegmentation/data/coco_stuff164k/annotations/val2017"
ADV_SAVE_DIR = "C:/Users/heheh/mmsegmentation/data/semantic_adv_deeplabv3_2"

NUM_CLASSES = 171
IMAGE_HEIGHT = 512
IMAGE_WIDTH = 512
IGNORE_INDEX = 255

# COCO-Stuff label mapping
COCO_STUFF_LABEL_MAP = {
    0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
    11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20,
    22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30,
    35: 31, 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40,
    46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50,
    56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58, 64: 59, 65: 60,
    67: 61, 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70,
    80: 71, 81: 72, 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80,
    92: 81, 93: 82, 94: 83, 95: 84, 96: 85, 97: 86, 98: 87, 99: 88, 100: 89, 101: 90,
    102: 91, 103: 92, 104: 93, 105: 94, 106: 95, 107: 96, 108: 97, 109: 98, 110: 99, 111: 100,
    112: 101, 113: 102, 114: 103, 115: 104, 116: 105, 117: 106, 118: 107, 119: 108, 120: 109, 121: 110,
    122: 111, 123: 112, 124: 113, 125: 114, 126: 115, 127: 116, 128: 117, 129: 118, 130: 119, 131: 120,
    132: 121, 133: 122, 134: 123, 135: 124, 136: 125, 137: 126, 138: 127, 139: 128, 140: 129, 141: 130,
    142: 131, 143: 132, 144: 133, 145: 134, 146: 135, 147: 136, 148: 137, 149: 138, 150: 139, 151: 140,
    152: 141, 153: 142, 154: 143, 155: 144, 156: 145, 157: 146, 158: 147, 159: 148, 160: 149, 161: 150,
    162: 151, 163: 152, 164: 153, 165: 154, 166: 155, 167: 156, 168: 157, 169: 158, 170: 159, 171: 160,
    172: 161, 173: 162, 174: 163, 175: 164, 176: 165, 177: 166, 178: 167, 179: 168, 180: 169, 181: 170,
    182: 171, 255: 255
}

###############################################################################
# 2. COCO Dataset (원본 사이즈 저장 기능 추가)
###############################################################################
def get_all_files(image_dir, mask_dir, img_exts=('.jpg', '.jpeg', '.png')):
    """이미지-마스크 쌍이 존재하는 파일들만 반환"""
    files = sorted([f for f in os.listdir(image_dir) if f.endswith(img_exts)])
    return [f for f in files if os.path.exists(os.path.join(mask_dir, f.rsplit('.', 1)[0] + '.png'))]

class COCODataset(Dataset):
    def __init__(self, image_dir, mask_dir, max_images=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        
        self.file_list = get_all_files(image_dir, mask_dir)
        if max_images:
            self.file_list = self.file_list[:max_images]
        
        print(f"[INFO] Found {len(self.file_list)} valid image-mask pairs")

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        fname = self.file_list[idx]
        img_path = os.path.join(self.image_dir, fname)
        mask_path = os.path.join(self.mask_dir, fname.rsplit('.', 1)[0] + '.png')

        # Load original image and get original size
        original_image = Image.open(img_path).convert("RGB")
        original_size = original_image.size  # (width, height)

        # Resize for model processing
        image = original_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)
        image = np.array(image, dtype=np.float32)

        # Load and resize mask
        mask = Image.open(mask_path)
        mask = mask.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.NEAREST)
        mask = np.array(mask, dtype=np.uint8)

        # Apply label mapping
        mapped_mask = np.full_like(mask, fill_value=255, dtype=np.uint8)
        for original_label, mapped_label in COCO_STUFF_LABEL_MAP.items():
            mapped_mask[mask == original_label] = mapped_label

        # Normalize image
        mean = np.array([123.675, 116.28, 103.53]).reshape(1, 1, 3)
        std = np.array([58.395, 57.12, 57.375]).reshape(1, 1, 3)
        image = (image - mean) / std
        image = image.transpose(2, 0, 1)  # (C,H,W)

        # Convert to tensors
        image_tensor = torch.from_numpy(image).float()
        mask_tensor = torch.from_numpy(mapped_mask).long()
        
        return image_tensor, mask_tensor, fname, original_size

def custom_collate_fn(batch):
    """Custom collate function to handle original_size as non-tensor"""
    images, masks, filenames, original_sizes = zip(*batch)
    
    # Tensor로 변환할 것들
    images = torch.stack(images)
    masks = torch.stack(masks)
    
    # Tensor로 변환하지 않을 것들 (튜플로 유지)
    filenames = list(filenames)
    original_sizes = list(original_sizes)  # tuple로 유지
    
    return images, masks, filenames, original_sizes

def get_dataloader(image_dir, mask_dir, batch_size=1, max_images=None):
    dataset = COCODataset(image_dir, mask_dir, max_images)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=True,
        drop_last=False,
        collate_fn=custom_collate_fn  # 추가
    )
    return dataloader

###############################################################################
# 3. Model Loading
###############################################################################
def load_deeplabv3_model():
    """DeepLabV3 모델 로드"""
    register_all_modules()
    
    cfg = Config.fromfile(CONFIG_FILE)
    cfg.model.pretrained = None
    cfg.model.train_cfg = None
    
    if "test_cfg" in cfg.model and cfg.model.test_cfg is not None:
        cfg.model.test_cfg.mode = "whole"

    model = init_model(cfg, checkpoint=CHECKPOINT_FILE, device=device)
    model.eval()
    print(f"[INFO] Loaded DeepLabV3 model on device: {device}")
    return model

###############################################################################
# 4. PGD Attack Function
###############################################################################
def get_model_output(model, x, return_logits=False):
    """모델 출력 얻기"""
    x = x.to(device, dtype=torch.float32, non_blocking=True)
    with torch.no_grad():
        outs = model(x, mode="tensor")

    if isinstance(outs, torch.Tensor):
        pass
    elif isinstance(outs, (list, tuple)) and len(outs) > 0:
        outs = outs[0]
    elif isinstance(outs, dict) and "logits" in outs:
        outs = outs["logits"]
    else:
        raise TypeError(f"Unexpected output type: {type(outs)}")

    outs = F.interpolate(outs, size=(IMAGE_HEIGHT, IMAGE_WIDTH), mode="bilinear", align_corners=False)
    
    if return_logits:
        return outs
    else:
        return outs.argmax(dim=1)

def pgd_attack(model, images, labels, epsilon, alpha, num_steps, ignore_index=255):
    """PGD 공격"""
    images = images.clone().detach().to(device, dtype=torch.float32)
    labels = labels.clone().detach().to(device)
    delta = torch.zeros_like(images, requires_grad=True)

    mean = torch.tensor([123.675, 116.28, 103.53]).view(1, 3, 1, 1).to(device, dtype=torch.float32)
    std = torch.tensor([58.395, 57.12, 57.375]).view(1, 3, 1, 1).to(device, dtype=torch.float32)
    min_vals = (0 - mean) / std
    max_vals = (255 - mean) / std

    for step in range(num_steps):
        outs = model(images + delta, mode="tensor")
        if isinstance(outs, list):
            outs = outs[0]
        elif isinstance(outs, dict) and "logits" in outs:
            outs = outs["logits"]

        outs = F.interpolate(outs, size=labels.shape[-2:], mode="bilinear", align_corners=False)
        loss = F.cross_entropy(outs, labels, ignore_index=ignore_index)
        loss.backward()

        grad = delta.grad.detach()
        delta.data.add_(alpha * torch.sign(grad))
        delta.data.clamp_(-epsilon, epsilon)
        delta.data = torch.min(torch.max(images + delta.data, min_vals), max_vals) - images
        delta.grad.zero_()
        delta.requires_grad = True

    adv = torch.clamp(images + delta, min_vals, max_vals)
    return adv.detach()

###############################################################################
# 5. Image Saving Functions (원본 사이즈로 .jpg 저장)
###############################################################################
def denormalize_image(tensor_img):
    """정규화된 텐서를 원본 이미지로 변환"""
    mean = torch.tensor([123.675, 116.28, 103.53]).view(1, 3, 1, 1).to(tensor_img.device)
    std = torch.tensor([58.395, 57.12, 57.375]).view(1, 3, 1, 1).to(tensor_img.device)
    
    denorm = tensor_img * std + mean
    denorm = torch.clamp(denorm, 0, 255)
    return denorm

def save_adversarial_image_with_original_size(adv_tensor, filename, original_size, save_dir):
    """Adversarial 이미지를 원본 사이즈 .jpg 형태로 저장"""
    os.makedirs(save_dir, exist_ok=True)
    
    # Denormalize and convert to numpy
    denorm = denormalize_image(adv_tensor)
    img_np = denorm[0].cpu().numpy().transpose(1, 2, 0).astype(np.uint8)
    
    # Convert to PIL
    img_pil = Image.fromarray(img_np)
    
    # Resize back to original size
    img_pil = img_pil.resize(original_size, Image.BILINEAR)
    
    # Change extension to .jpg and save
    base_name = os.path.splitext(filename)[0]
    jpg_filename = base_name + '.jpg'
    save_path = os.path.join(save_dir, jpg_filename)
    
    # Save as JPEG with high quality
    img_pil.save(save_path, 'JPEG', quality=95)
    
    return save_path

###############################################################################
# 6. Evaluation Metrics
###############################################################################
def compute_miou(pred, target, num_classes, ignore_index=255):
    """mIoU 계산"""
    ious = []
    for cls in range(num_classes):
        pred_mask = (pred == cls)
        target_mask = (target == cls)
        
        if ignore_index is not None:
            valid_mask = (target != ignore_index)
            pred_mask = pred_mask & valid_mask
            target_mask = target_mask & valid_mask
        
        intersection = (pred_mask & target_mask).sum()
        union = (pred_mask | target_mask).sum()
        
        if union == 0:
            ious.append(float('nan'))
        else:
            ious.append(float(intersection) / float(union))
    
    return np.nanmean(ious)

def compute_pixel_accuracy(pred, target, ignore_index=255):
    """Pixel Accuracy 계산"""
    if ignore_index is not None:
        valid_mask = (target != ignore_index)
        pred_valid = pred[valid_mask]
        target_valid = target[valid_mask]
    else:
        pred_valid = pred
        target_valid = target
    
    if len(pred_valid) == 0:
        return 0.0
    
    return float((pred_valid == target_valid).sum()) / len(pred_valid)

def compute_attack_success_rate(pred_clean, pred_adv, target, ignore_index=255):
    """Attack Success Rate 계산"""
    if ignore_index is not None:
        valid_mask = (target != ignore_index)
        pred_clean_valid = pred_clean[valid_mask]
        pred_adv_valid = pred_adv[valid_mask]
        target_valid = target[valid_mask]
    else:
        pred_clean_valid = pred_clean
        pred_adv_valid = pred_adv
        target_valid = target
    
    if len(pred_clean_valid) == 0:
        return 0.0
    
    correct_clean = (pred_clean_valid == target_valid)
    incorrect_adv = (pred_adv_valid != target_valid)
    
    success = (correct_clean & incorrect_adv).sum()
    return float(success) / len(pred_clean_valid)

###############################################################################
# 7. Main Functions (PGD만 수행)
###############################################################################
def generate_and_save_pgd_images(model, dataloader, save_dir, **attack_params):
    """PGD adversarial 이미지들을 생성하고 저장"""
    print(f"\n[INFO] Generating PGD adversarial images...")
    
    os.makedirs(save_dir, exist_ok=True)
    saved_files = []
    
    for batch_idx, (images, labels, filenames, original_sizes) in enumerate(tqdm(dataloader, desc="Generating PGD")):
        images = images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.long)
        
        # Generate adversarial images using PGD
        adv_images = pgd_attack(model, images, labels, **attack_params)
        
        # Save each image in the batch with original size
        for i, (filename, original_size) in enumerate(zip(filenames, original_sizes)):
            save_path = save_adversarial_image_with_original_size(adv_images[i:i+1], filename, original_size, save_dir)
            saved_files.append((filename, save_path))
    
    print(f"[INFO] Saved {len(saved_files)} PGD images to {save_dir}")
    return saved_files

def evaluate_pgd_images(model, clean_dataloader, adv_save_dir):
    """저장된 PGD adversarial 이미지들을 평가"""
    print(f"\n[INFO] Evaluating PGD adversarial images...")
    
    results = {
        'pixel_accuracy': [],
        'miou': [],
        'attack_success_rate': []
    }
    
    for batch_idx, (clean_images, labels, filenames, original_sizes) in enumerate(tqdm(clean_dataloader, desc="Evaluating PGD")):
        clean_images = clean_images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.long)
        
        # Load adversarial images
        adv_images_list = []
        valid_indices = []
        
        for i, filename in enumerate(filenames):
            base_name = os.path.splitext(filename)[0]
            adv_filename = base_name + '.jpg'
            adv_path = os.path.join(adv_save_dir, adv_filename)
            
            if os.path.exists(adv_path):
                adv_img = Image.open(adv_path).convert("RGB")
                adv_img = adv_img.resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)
                adv_img = np.array(adv_img, dtype=np.float32)
                
                # Normalize
                mean = np.array([123.675, 116.28, 103.53]).reshape(1, 1, 3)
                std = np.array([58.395, 57.12, 57.375]).reshape(1, 1, 3)
                adv_img = (adv_img - mean) / std
                adv_img = adv_img.transpose(2, 0, 1)
                adv_images_list.append(adv_img)
                valid_indices.append(i)
            else:
                print(f"[WARN] Adversarial image not found: {adv_path}")
        
        if not adv_images_list:
            continue
            
        adv_images = torch.stack([torch.from_numpy(img) for img in adv_images_list]).to(device, dtype=torch.float32)
        
        # Filter clean images and labels for valid indices
        clean_images_valid = clean_images[valid_indices]
        labels_valid = labels[valid_indices]
        
        # Get predictions
        with torch.no_grad():
            pred_clean = get_model_output(model, clean_images_valid)
            pred_adv = get_model_output(model, adv_images)
        
        # Calculate metrics for each valid image
        for i in range(len(labels_valid)):
            pred_clean_np = pred_clean[i].cpu().numpy()
            pred_adv_np = pred_adv[i].cpu().numpy()
            label_np = labels_valid[i].cpu().numpy()
            
            # Pixel accuracy
            acc = compute_pixel_accuracy(pred_adv_np, label_np, IGNORE_INDEX)
            results['pixel_accuracy'].append(acc)
            
            # mIoU
            miou = compute_miou(pred_adv_np, label_np, NUM_CLASSES, IGNORE_INDEX)
            results['miou'].append(miou)
            
            # Attack success rate
            asr = compute_attack_success_rate(pred_clean_np, pred_adv_np, label_np, IGNORE_INDEX)
            results['attack_success_rate'].append(asr)
    
    # Calculate averages
    avg_results = {}
    for key, values in results.items():
        if values:
            avg_results[f'avg_{key}'] = np.mean(values)
            avg_results[f'std_{key}'] = np.std(values)
        else:
            avg_results[f'avg_{key}'] = 0.0
            avg_results[f'std_{key}'] = 0.0
    
    return avg_results

def main():
    """메인 함수 - PGD만 수행"""
    # 1. Load model
    model = load_deeplabv3_model()
    
    # 2. Prepare dataloader (10개 이미지로 빠른 테스트)
    test_loader = get_dataloader(IMAGE_DIR, MASK_DIR, batch_size=1, max_images=5000)
    
    # 3. PGD attack parameters
    std_tensor = torch.tensor([58.395, 57.12, 57.375]).view(1, 3, 1, 1).to(device)
    epsilon = (8.0 / std_tensor)  # 8/255 in normalized space
    alpha = (2.0 / std_tensor)    # 2/255 in normalized space
    num_steps = 20
    
    pgd_params = {
        "epsilon": epsilon,
        "alpha": alpha,
        "num_steps": num_steps,
        "ignore_index": IGNORE_INDEX
    }
    
    # 4. Generate and save PGD adversarial images
    pgd_save_dir = os.path.join(ADV_SAVE_DIR, "pgd")
    saved_files = generate_and_save_pgd_images(model, test_loader, pgd_save_dir, **pgd_params)
    
    # 5. Evaluate the saved PGD images
    results = evaluate_pgd_images(model, test_loader, pgd_save_dir)
    
    # 6. Add attack info to results
    results['attack_name'] = 'PGD'
    results['num_images'] = len(saved_files)
    results['save_directory'] = pgd_save_dir
    
    # 7. Print results
    print(f"\n[RESULT] PGD Results (10 test images):")
    print(f"  Pixel Accuracy: {results['avg_pixel_accuracy']:.4f} ± {results['std_pixel_accuracy']:.4f}")
    print(f"  mIoU: {results['avg_miou']:.4f} ± {results['std_miou']:.4f}")
    print(f"  Attack Success Rate: {results['avg_attack_success_rate']:.4f} ± {results['std_attack_success_rate']:.4f}")
    print(f"  Number of images saved: {results['num_images']}")
    print(f"  Save directory: {results['save_directory']}")
    
    # 8. Save results to CSV
    df = pd.DataFrame([results])
    results_csv = os.path.join(ADV_SAVE_DIR, "deeplabv3_pgd_results_5000imgs.csv")
    df.to_csv(results_csv, index=False)
    print(f"\n[INFO] Results saved to: {results_csv}")
    
    print(f"\n{'='*60}")
    print("DEEPLABV3 PGD ATTACK RESULTS (5000 TEST IMAGES)")
    print(f"{'='*60}")
    print(f"Pixel Accuracy: {results['avg_pixel_accuracy']:.4f}")
    print(f"mIoU: {results['avg_miou']:.4f}")
    print(f"Attack Success Rate: {results['avg_attack_success_rate']:.4f}")
    print(f"Images processed: {results['num_images']}")

if __name__ == "__main__":
    main()


### Mask R-CNN

In [None]:
# PGD for MMDetection 3.x (Mask R-CNN, COCO)
import os, torch
from mmengine.config import Config
from mmengine.runner import Runner
from torchvision.utils import save_image
from PIL import Image
import numpy as np

# ========= User paths =========
CONFIG ='C:/Users/heheh/mmdetection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py'
CHECKPOINT = 'C:/Users/heheh/mmdetection/checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth'
DATA_ROOT = 'C:/Users/heheh/mmdetection/data/coco'
ANN_FILE  = os.path.join(DATA_ROOT, 'annotations', 'instances_val2017.json')
IMG_DIR   = os.path.join(DATA_ROOT, 'val2017')
ADV_SAVE_DIR = os.path.join(DATA_ROOT, 'instance_maskrcnn_adv')
os.makedirs(ADV_SAVE_DIR, exist_ok=True)

# ========= Attack hyperparams (pixel-space) =========
epsilon = 8/255.0     # L_inf epsilon in [0,1] space
alpha   = 2/255.0     # step size in [0,1]
num_steps = 20

# ========= 1) Build runner/model + dataloader =========
cfg = Config.fromfile(CONFIG)
cfg.default_scope = 'mmdet'
cfg.load_from = CHECKPOINT
cfg.work_dir = './work_dirs/pgd_adv_eval'

# Point the test dataset to COCO val
test_dataset = cfg.test_dataloader.dataset
test_dataset.type = 'CocoDataset'
test_dataset.data_root = DATA_ROOT
test_dataset.ann_file = ANN_FILE
test_dataset.data_prefix = dict(img=IMG_DIR)
test_dataset.test_mode = True

# optional: limit to first N images while debugging
# test_dataset.indices = list(range(20))

# Evaluator (keep if you want to also report metrics on adv images later)
cfg.test_evaluator = dict(type='CocoMetric', ann_file=ANN_FILE, metric=['bbox','segm'])

# Visualizer (avoid empty list crash)
if hasattr(cfg, 'visualizer'):
    cfg.visualizer.vis_backends = None

runner = Runner.from_cfg(cfg)
model = runner.model
model.eval()
device = next(model.parameters()).device

# ========= 2) Utility: convert pixel-space eps/alpha to normalized space =========
# MMDet defaults use mean/std in 0-255 scale.
preproc = model.data_preprocessor
mean = torch.tensor(preproc.mean, device=device).view(1,3,1,1)      # shape (1,C,1,1)
std  = torch.tensor(preproc.std,  device=device).view(1,3,1,1)

# bounds in normalized space for [0,255] pixel range:
lower = (0.0 - mean) / std
upper = (255.0 - mean) / std

# pixel-space eps -> normalized-space eps (per channel)
eps_norm   = (epsilon * 255.0) / std
alpha_norm = (alpha   * 255.0) / std

# ========= 3) DataLoader (use same as test) =========
test_loader = runner.build_dataloader(cfg.test_dataloader)

# ========= 4) PGD loop per image and SAVE =========
def total_loss_sum(losses):
    """Sum all tensors in the nested dict returned by model(..., mode='loss')."""
    total = 0.0
    for v in losses.values():
        if isinstance(v, dict):
            for vv in v.values():
                if torch.is_tensor(vv):
                    total = total + vv.sum()
        elif torch.is_tensor(v):
            total = total + v.sum()
    return total

@torch.no_grad()
def save_adv_in_original_size(norm_img, ds, save_path):
    """
    norm_img: (1,3,H_pad,W_pad) normalized tensor ((img-mean)/std) in the model space
    ds:       DetDataSample for this image (contains ori/img/pad shapes)
    Saves an RGB image at the original size (ori_w, ori_h).
    """
    # 1) denorm back to 0..255
    x = norm_img.clone() * std + mean            # (1,3,H,W) in 0..255
    x = x.clamp(0, 255)[0].permute(1, 2, 0).cpu().numpy().astype(np.uint8)  # (H,W,3)

    # 2) crop padding (keep only real img region)
    img_h, img_w = ds.metainfo['img_shape'][:2]   # after resize, before pad
    x = x[:img_h, :img_w, :]                      # remove bottom/right pad

    # 3) resize back to original size
    ori_h, ori_w = ds.metainfo['ori_shape'][:2]
    pil = Image.fromarray(x)                      # x is RGB here
    pil = pil.resize((ori_w, ori_h), resample=Image.BILINEAR)

    # 4) save
    pil.save(save_path, quality=95)

# Enable grads inside loop
torch.set_grad_enabled(True)

for batch in test_loader:
    # batch is a dict with 'inputs' (list of tensors) and 'data_samples' (list of DetDataSample)
    # Preprocessor already ran in DataLoader collate -> inputs are normalized tensors on device after model.data_preprocessor?
    # In test loader, they are raw; we must preprocess using model.data_preprocessor manually:
    data = model.data_preprocessor(batch, training=False)  # returns dict with 'inputs' tensor and 'data_samples' list

    inputs = data['inputs']            # Tensor (N,C,H,W), normalized with mean/std
    data_samples = data['data_samples']  # list of DetDataSample with metainfo set
    N = inputs.shape[0]

    # Process images one-by-one (simpler to keep filenames, shapes consistent)
    for i in range(N):
        img = inputs[i:i+1].detach().clone().to(device)  # (1,C,H,W) normalized
        ds  = [data_samples[i]]  # list with one sample

        # Get a filename to save under
        img_name = ds[0].metainfo.get('img_path', None)
        if img_name is None:
            # fallback to ori_filename if available
            img_name = ds[0].metainfo.get('ori_filename', f'idx_{ds[0].metainfo.get("img_id","unk")}.jpg')
        base = os.path.splitext(os.path.basename(img_name))[0]
        save_path = os.path.join(ADV_SAVE_DIR, f'{base}.jpg')

        # Initialize adv as the clean normalized image
        x_adv = img.clone().detach()
        x_adv.requires_grad_(True)

        for step in range(num_steps):
            # Make sure metainfo shapes reflect current tensor size
            H, W = x_adv.shape[2], x_adv.shape[3]
            ds[0].set_metainfo({
                'img_shape': (H, W, 3),
                'pad_shape': (H, W, 3),
                'batch_input_shape': (H, W)
            })

            # Zero model grads; keep only x_adv gradient
            model.zero_grad(set_to_none=True)

            # Forward in "loss" mode
            losses = model(x_adv, ds, mode='loss')                                                                                                          
            loss = total_loss_sum(losses)
            loss.backward()

            # PGD update in normalized space
            with torch.no_grad():
                grad = x_adv.grad
                x_adv = x_adv + alpha_norm * torch.sign(grad)                       
                # Project back to the L_inf ball around original img
                x_adv = torch.max(torch.min(x_adv, img + eps_norm), img - eps_norm)
                # Clamp to valid normalized range
                x_adv = torch.max(torch.min(x_adv, upper), lower)

            x_adv.requires_grad_(True)
                              
        # Save adv image (denormalize to 0..255 and write PNG)
        with torch.no_grad():
            save_adv_in_original_size(x_adv, ds[0], save_path)

        print(f'[PGD] Saved: {save_path}')


### Panoptic FPN

In [None]:
# PGD for PanopticFPN (MMDetection 3.x)
import os, copy, torch, numpy as np
from PIL import Image
from mmengine.config import Config
from mmengine.runner import Runner
from mmengine.runner.checkpoint import load_checkpoint

# ====== Paths ======
CONFIG = r'C:/Users/heheh/mmdetection/configs/panoptic_fpn/panoptic-fpn_r50_fpn_1x_coco.py'
CHECKPOINT = r'C:/Users/heheh/mmdetection/checkpoints/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth'
DATA_ROOT = r'C:/Users/heheh/mmdetection/data/coco'
PAN_JSON  = os.path.join(DATA_ROOT, 'annotations', 'panoptic_val2017.json')
IMG_DIR   = os.path.join(DATA_ROOT, 'val2017')
PAN_SEG   = os.path.join(DATA_ROOT, 'annotations', 'panoptic_val2017')  # PNG dir
ADV_SAVE_DIR = os.path.join(DATA_ROOT, 'panoptic_fpn_adv')
os.makedirs(ADV_SAVE_DIR, exist_ok=True)

# ====== Attack hyperparams (pixel-space) ======
epsilon   = 8/255.0           # L_inf radius
alpha     = 2/255.0           # step size
num_steps = 20
random_start = True           # good for stronger attacks

# ====== Build model ======
cfg = Config.fromfile(CONFIG)
cfg.default_scope = 'mmdet'
cfg.load_from = CHECKPOINT
cfg.work_dir = './work_dirs/pgd_panoptic'
if hasattr(cfg, 'visualizer'):
    cfg.visualizer.vis_backends = None

runner = Runner.from_cfg(cfg)
model = runner.model
model.eval()
device = next(model.parameters()).device
# Ensure weights are loaded for ad-hoc usage:
load_checkpoint(model, CHECKPOINT, map_location=device)
model.cfg = cfg  # handy, if you later use APIs that expect .cfg

# ====== Build an ATTACK dataloader WITH GT (test_mode=False) ======
# Use the training pipeline (loads bboxes/masks/sem seg) but point to VAL panoptic
attack_dl_cfg = copy.deepcopy(cfg.train_dataloader)
ds = attack_dl_cfg.dataset
# If your train_dataloader wraps the dataset (e.g., RepeatDataset), unwrap to the innermost:
while hasattr(ds, 'dataset'):
    ds = ds.dataset
ds.type = 'CocoPanopticDataset'
ds.data_root = DATA_ROOT
ds.ann_file = PAN_JSON
ds.data_prefix = dict(img=IMG_DIR, seg=PAN_SEG)  # img dir + GT panoptic PNG dir
ds.test_mode = False  # IMPORTANT: include ground-truth for losses

# Keep batch size 1 (simplifies per-image saving & metadata handling)
attack_dl_cfg.batch_size = 1
attack_dl_cfg.num_workers = 2

attack_loader = runner.build_dataloader(attack_dl_cfg)

# ====== Normalization bounds (model uses mean/std in 0-255 space) ======
pre = model.data_preprocessor
mean = torch.tensor(pre.mean, device=device).view(1,3,1,1)
std  = torch.tensor(pre.std,  device=device).view(1,3,1,1)
lower = (0.0   - mean) / std
upper = (255.0 - mean) / std

eps_norm   = (epsilon * 255.0) / std
alpha_norm = (alpha   * 255.0) / std

# ====== Utilities ======
def sum_all_losses(losses: dict):
    total = 0.0
    for v in losses.values():
        if isinstance(v, dict):
            for t in v.values():
                if torch.is_tensor(t):
                    total = total + t.sum()
        elif torch.is_tensor(v):
            total = total + v.sum()
    return total

@torch.no_grad()
def save_adv_in_original_size(norm_img_1CHW, data_sample, save_path):
    """Denorm -> unpad -> resize back to ori size -> save"""
    x = norm_img_1CHW.clone() * std + mean        # 0..255
    x = x.clamp(0,255)[0].permute(1,2,0).cpu().numpy().astype(np.uint8)  # HxWx3

    img_h, img_w = data_sample.metainfo['img_shape'][:2]   # after resize, before pad
    x = x[:img_h, :img_w, :]                               # remove pad

    ori_h, ori_w = data_sample.metainfo['ori_shape'][:2]
    Image.fromarray(x).resize((ori_w, ori_h), Image.BILINEAR).save(save_path, quality=95)

# ====== PGD loop ======
torch.set_grad_enabled(True)

for data_batch in attack_loader:
    # Normalize & collate like the runner would
    data = model.data_preprocessor(data_batch, training=False)  # {'inputs': tensor, 'data_samples': list}
    imgs = data['inputs']             # (1,3,H,W), normalized
    samples = data['data_samples']    # [DetDataSample] with GT for panoptic (instances + semantic)

    x = imgs.detach().clone().to(device)       # clean normalized
    if random_start:
        x = x + torch.empty_like(x).uniform_(-1,1) * eps_norm
        x = torch.max(torch.min(x, x + eps_norm), x - eps_norm)  # stay in ball
        x = torch.max(torch.min(x, upper), lower)                # valid range

    x.requires_grad_(True)

    # Derive a good filename
    meta = samples[0].metainfo
    base = os.path.splitext(os.path.basename(meta.get('ori_filename', meta.get('img_path', 'img'))))[0]
    save_path = os.path.join(ADV_SAVE_DIR, f'{base}.jpg')

    for t in range(num_steps):
        H, W = x.shape[2], x.shape[3]
        samples[0].set_metainfo({
            'img_shape': (H, W, 3),
            'pad_shape': (H, W, 3),
            'batch_input_shape': (H, W),
        })

        model.zero_grad(set_to_none=True)
        losses = model(x, samples, mode='loss')   # PanopticFPN: RPN/ROI + semantic head losses
        loss = sum_all_losses(losses)
        loss.backward()

        with torch.no_grad():
            grad = x.grad
            x = x + alpha_norm * torch.sign(grad)           # untargeted: maximize total loss
            x = torch.max(torch.min(x, imgs + eps_norm), imgs - eps_norm)  # project to L_inf ball
            x = torch.max(torch.min(x, upper), lower)       # clamp valid range

        x.requires_grad_(True)

    with torch.no_grad():
        save_adv_in_original_size(x, samples[0], save_path)
    print(f'[PGD PanopticFPN] Saved {save_path}')
