In [None]:
# PGD for MMDetection 3.x (Mask R-CNN, COCO)
import os, torch
from mmengine.config import Config
from mmengine.runner import Runner
from torchvision.utils import save_image
from PIL import Image
import numpy as np

# ========= User paths =========
CONFIG ='C:/Users/heheh/mmdetection/configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py'
CHECKPOINT = 'C:/Users/heheh/mmdetection/checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth'
DATA_ROOT = 'C:/Users/heheh/mmdetection/data/coco'
ANN_FILE  = os.path.join(DATA_ROOT, 'annotations', 'instances_val2017.json')
IMG_DIR   = os.path.join(DATA_ROOT, 'val2017')
ADV_SAVE_DIR = os.path.join(DATA_ROOT, 'instance_maskrcnn_adv')
os.makedirs(ADV_SAVE_DIR, exist_ok=True)

# ========= Attack hyperparams (pixel-space) =========
epsilon = 8/255.0     # L_inf epsilon in [0,1] space
alpha   = 2/255.0     # step size in [0,1]
num_steps = 20

# ========= 1) Build runner/model + dataloader =========
cfg = Config.fromfile(CONFIG)
cfg.default_scope = 'mmdet'
cfg.load_from = CHECKPOINT
cfg.work_dir = './work_dirs/pgd_adv_eval'

# Point the test dataset to COCO val
test_dataset = cfg.test_dataloader.dataset
test_dataset.type = 'CocoDataset'
test_dataset.data_root = DATA_ROOT
test_dataset.ann_file = ANN_FILE
test_dataset.data_prefix = dict(img=IMG_DIR)
test_dataset.test_mode = True

# optional: limit to first N images while debugging
# test_dataset.indices = list(range(20))

# Evaluator (keep if you want to also report metrics on adv images later)
cfg.test_evaluator = dict(type='CocoMetric', ann_file=ANN_FILE, metric=['bbox','segm'])

# Visualizer (avoid empty list crash)
if hasattr(cfg, 'visualizer'):
    cfg.visualizer.vis_backends = None

runner = Runner.from_cfg(cfg)
model = runner.model
model.eval()
device = next(model.parameters()).device

# ========= 2) Utility: convert pixel-space eps/alpha to normalized space =========
# MMDet defaults use mean/std in 0-255 scale.
preproc = model.data_preprocessor
mean = torch.tensor(preproc.mean, device=device).view(1,3,1,1)      # shape (1,C,1,1)
std  = torch.tensor(preproc.std,  device=device).view(1,3,1,1)

# bounds in normalized space for [0,255] pixel range:
lower = (0.0 - mean) / std
upper = (255.0 - mean) / std

# pixel-space eps -> normalized-space eps (per channel)
eps_norm   = (epsilon * 255.0) / std
alpha_norm = (alpha   * 255.0) / std

# ========= 3) DataLoader (use same as test) =========
test_loader = runner.build_dataloader(cfg.test_dataloader)

# ========= 4) PGD loop per image and SAVE =========
def total_loss_sum(losses):
    """Sum all tensors in the nested dict returned by model(..., mode='loss')."""
    total = 0.0
    for v in losses.values():
        if isinstance(v, dict):
            for vv in v.values():
                if torch.is_tensor(vv):
                    total = total + vv.sum()
        elif torch.is_tensor(v):
            total = total + v.sum()
    return total

@torch.no_grad()
def save_adv_in_original_size(norm_img, ds, save_path):
    """
    norm_img: (1,3,H_pad,W_pad) normalized tensor ((img-mean)/std) in the model space
    ds:       DetDataSample for this image (contains ori/img/pad shapes)
    Saves an RGB image at the original size (ori_w, ori_h).
    """
    # 1) denorm back to 0..255
    x = norm_img.clone() * std + mean            # (1,3,H,W) in 0..255
    x = x.clamp(0, 255)[0].permute(1, 2, 0).cpu().numpy().astype(np.uint8)  # (H,W,3)

    # 2) crop padding (keep only real img region)
    img_h, img_w = ds.metainfo['img_shape'][:2]   # after resize, before pad
    x = x[:img_h, :img_w, :]                      # remove bottom/right pad

    # 3) resize back to original size
    ori_h, ori_w = ds.metainfo['ori_shape'][:2]
    pil = Image.fromarray(x)                      # x is RGB here
    pil = pil.resize((ori_w, ori_h), resample=Image.BILINEAR)

    # 4) save
    pil.save(save_path, quality=95)

# Enable grads inside loop
torch.set_grad_enabled(True)

for batch in test_loader:
    # batch is a dict with 'inputs' (list of tensors) and 'data_samples' (list of DetDataSample)
    # Preprocessor already ran in DataLoader collate -> inputs are normalized tensors on device after model.data_preprocessor?
    # In test loader, they are raw; we must preprocess using model.data_preprocessor manually:
    data = model.data_preprocessor(batch, training=False)  # returns dict with 'inputs' tensor and 'data_samples' list

    inputs = data['inputs']            # Tensor (N,C,H,W), normalized with mean/std
    data_samples = data['data_samples']  # list of DetDataSample with metainfo set
    N = inputs.shape[0]

    # Process images one-by-one (simpler to keep filenames, shapes consistent)
    for i in range(N):
        img = inputs[i:i+1].detach().clone().to(device)  # (1,C,H,W) normalized
        ds  = [data_samples[i]]  # list with one sample

        # Get a filename to save under
        img_name = ds[0].metainfo.get('img_path', None)
        if img_name is None:
            # fallback to ori_filename if available
            img_name = ds[0].metainfo.get('ori_filename', f'idx_{ds[0].metainfo.get("img_id","unk")}.jpg')
        base = os.path.splitext(os.path.basename(img_name))[0]
        save_path = os.path.join(ADV_SAVE_DIR, f'{base}.jpg')

        # Initialize adv as the clean normalized image
        x_adv = img.clone().detach()
        x_adv.requires_grad_(True)

        for step in range(num_steps):
            # Make sure metainfo shapes reflect current tensor size
            H, W = x_adv.shape[2], x_adv.shape[3]
            ds[0].set_metainfo({
                'img_shape': (H, W, 3),
                'pad_shape': (H, W, 3),
                'batch_input_shape': (H, W)
            })

            # Zero model grads; keep only x_adv gradient
            model.zero_grad(set_to_none=True)

            # Forward in "loss" mode
            losses = model(x_adv, ds, mode='loss')                                                                                                          
            loss = total_loss_sum(losses)
            loss.backward()

            # PGD update in normalized space
            with torch.no_grad():
                grad = x_adv.grad
                x_adv = x_adv + alpha_norm * torch.sign(grad)                       
                # Project back to the L_inf ball around original img
                x_adv = torch.max(torch.min(x_adv, img + eps_norm), img - eps_norm)
                # Clamp to valid normalized range
                x_adv = torch.max(torch.min(x_adv, upper), lower)

            x_adv.requires_grad_(True)
                              
        # Save adv image (denormalize to 0..255 and write PNG)
        with torch.no_grad():
            save_adv_in_original_size(x_adv, ds[0], save_path)

        print(f'[PGD] Saved: {save_path}')
