In [None]:
import os
import numpy as np
from PIL import Image

import random
import torch
import torch.nn.functional as F
from torchvision.utils import save_image
import torch.optim as optim

from torchvision import transforms
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
import timm
import lpips
from helper import load_images_from_path, norm_imagenet, denorm_imagenet

val_transforms = transforms.Compose([
    transforms.Resize((384, 384)),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    # normalize_img,
])

def load_img(path, transforms=None):
    img = Image.open(path).convert("RGB")
    img = transforms(img).unsqueeze(0).to(device)
    return img

def norm_tensor(tensor):
    t = tensor.clone().detach()
    
    min_val = t.min()
    max_val = t.max()

    tensor_norm = (tensor - min_val) / (max_val - min_val)

    print(f"Tensor normalized: min={tensor_norm.min()}, max={tensor_norm.max()}")
    
    return tensor_norm, min_val, max_val

def denorm_tensor(tensor, original_min=None, original_max=None):
    t = tensor.clone().detach()

    return t * (original_max - original_min) + original_min

def create_random_mask(img_pt, num_masks=1, mask_percentage=0.1, max_attempts=100):
    _, _, height, width = img_pt.shape
    mask_area = int(height * width * mask_percentage)
    masks = torch.zeros((num_masks, 1, height, width), dtype=img_pt.dtype)

    if mask_percentage >= 0.999:
        # Full mask for entire image
        return torch.ones((num_masks, 1, height, width), dtype=img_pt.dtype).to(img_pt.device)

    for ii in range(num_masks):
        placed = False
        attempts = 0
        while not placed and attempts < max_attempts:
            attempts += 1

            max_dim = int(mask_area ** 0.5)
            mask_width = random.randint(1, max_dim)
            mask_height = mask_area // mask_width

            # Allow broader aspect ratios for larger masks
            aspect_ratio = mask_width / mask_height if mask_height != 0 else 0
            if 0.25 <= aspect_ratio <= 4:  # Looser ratio constraint
                if mask_height <= height and mask_width <= width:
                    x_start = random.randint(0, width - mask_width)
                    y_start = random.randint(0, height - mask_height)
                    overlap = False
                    for jj in range(ii):
                        if torch.sum(masks[jj, :, y_start:y_start + mask_height, x_start:x_start + mask_width]) > 0:
                            overlap = True
                            break
                    if not overlap:
                        masks[ii, :, y_start:y_start + mask_height, x_start:x_start + mask_width] = 1
                        placed = True

        if not placed:
            # Fallback: just fill a central region if all attempts fail
            print(f"Warning: Failed to place mask {ii}, using fallback.")
            center_h = height // 2
            center_w = width // 2
            half_area = int((mask_area // 2) ** 0.5)
            h_half = min(center_h, half_area)
            w_half = min(center_w, half_area)
            masks[ii, :, center_h - h_half:center_h + h_half, center_w - w_half:center_w + w_half] = 1

    return masks.to(img_pt.device)

In [None]:
class Params:
    """Hyperparameters and configuration settings for FreqMark."""
    def __init__(self):
        # --- System & Paths ---
        self.device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
        self.image_path = '/mnt/nas5/suhyeon/datasets/DIV2K_train_HR/0002.png'

        # --- Model Configurations ---
        self.vae_model_name = "stabilityai/stable-diffusion-2-1"
        self.vae_subfolder = "vae"
        self.dino_model_repo = 'facebookresearch/dinov2'
        self.dino_model_name = 'dinov2_vits14'
        
        # --- Image Size Parameters ---
        self.vae_image_size = 512
        self.image_size = 384
        self.transform = transforms.Compose([
            # transforms.Resize(256),
            # transforms.CenterCrop(224),
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ToTensor(),
        ])

        # --- FreqMark Core Parameters ---
        self.message_bits = 2
        self.feature_dim = 128
        self.margin = 1.0
        self.grid_size = 28
        self.num_patches = self.grid_size*self.grid_size

        # --- Optimization Parameters ---
        self.lr = 2.0
        self.steps = 400
        self.lambda_p = 0.05
        self.lambda_i = 0.25

        # --- Robustness Parameters ---
        self.eps1_std = 0.25 
        self.eps2_std = 0.06
        
        # --- Demo/Evaluation Parameters ---
        self.batch_size = 4
        self.num_test_images = 1

In [None]:
def compute_psnr(a, b):
    mse = F.mse_loss(a, b).item()
    if mse == 0:
        return 100.0
    return 20 * torch.log10(1.0 / torch.sqrt(torch.tensor(mse)))

def calculate_iou(pred_mask, gt_mask):
    # Ensure masks are binary
    # pred_mask_bin = (pred_mask < 0).float()
    pred_mask_bin = torch.sigmoid(pred_mask)
    pred_mask_bin = (pred_mask_bin > 0.65).float() # Thresholding at 0.65
    gt_mask_bin = (gt_mask > 0).float() # Ground truth might not be 0/1

    save_image(pred_mask, "pred.png")
    save_image(pred_mask_bin, "pred_bin.png")
    save_image(gt_mask_bin, "gt.png")
    save_image(pred_mask_bin * gt_mask_bin, "intersection.png")
    save_image(pred_mask_bin + gt_mask_bin, "union.png")

    # Intersection and Union
    intersection = (pred_mask_bin * gt_mask_bin).sum()
    union = (pred_mask_bin + gt_mask_bin).sum() - intersection

    iou = intersection / (union + 1e-6) # Add epsilon to avoid division by zero
    return iou.item()

def calculate_cosine_similarity_matrix(F_a, F_b):
    # F_a, F_b: [B, C, H, W] 형태 가정
    B, C, H, W = F_a.shape
    
    # 
    # 1. (B, C, H, W) -> (B, H, W, C) -> (B, H*W, C) 형태로 변환 (Patch Vector화)
    F_a = F_a.permute(0, 2, 3, 1).reshape(B, H * W, C)
    F_b = F_b.permute(0, 2, 3, 1).reshape(B, H * W, C)
    
    # 2. L2 정규화 (방향만 비교)
    F_a_norm = F.normalize(F_a, p=2, dim=-1)
    F_b_norm = F.normalize(F_b, p=2, dim=-1)
    
    # 3. 코사인 유사도 계산 (F_a와 F_b 간의 각 패치별 내적)
    # 결과: [B, H*W] (패치별 코사인 유사도)
    cosine_sim = (F_a_norm * F_b_norm).sum(dim=-1)
    
    return cosine_sim.mean().item(), cosine_sim.max().item(), cosine_sim.min().item()

def generate_universal_vectors(args, feature_dim):
    """
    어떤 Feature가 들어와도 DC 성분(크기)을 무시하고 
    방향만 검출할 수 있는 Universal Vector 생성
    """
    # 1. 랜덤 생성
    vecs = torch.randn(1, feature_dim)
    
    # 2. [핵심] Zero-Mean Centering (평균 제거)
    # 각 벡터(row)의 평균을 계산해서 뺌 -> 합이 0이 됨
    vecs = vecs - vecs.mean(dim=1, keepdim=True)
    
    # 3. Sign Quantization (강건성 향상)
    # 0인 경우를 방지하기 위해 아주 작은 noise 추가 후 sign
    vecs = torch.sign(vecs + 1e-6)
    
    # 4. L2 Normalization
    vecs = vecs / torch.norm(vecs, p=2, dim=1, keepdim=True)
    
    return vecs.to(args.device)

In [None]:
# img_path = "/mnt/nas5/suhyeon/projects/freq-loc/secret_code/0002.png"
# img_path = "/mnt/nas5/suhyeon/projects/freq-loc/secret_code/analysis_dist_wm_step400.png"
# img_path = "/mnt/nas5/suhyeon/projects/freq-loc/baseline/20251119-105815/watermarked/0088.png"
seed = 45
proportion_masked = 0.3
trials = 5

In [None]:
pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "sd-legacy/stable-diffusion-inpainting",
    # torch_dtype=torch.float16,
    cache_dir='/mnt/nas5/suhyeon/caches'
).to(device)

args = Params()

# secret_key = torch.load('./learned_directional_vector.pt')
# freqmark.direction_vectors = torch.tensor(secret_key).to(args.device)
# print(freqmark.direction_vectors)

image_encoder = timm.create_model(
    'convnext_small.dinov3_lvd1689m',  # 또는 ConvNeXt 변형 (예: dinov3_convnext_t)
    pretrained=True,
    features_only=True
).to(device)
image_encoder.eval()

torch.manual_seed(seed)
generator = torch.Generator(device=device).manual_seed(seed)
to_tensor = transforms.ToTensor()

# watermarked = load_img(img_path, transforms=args.transform)
original = load_img('/mnt/nas5/suhyeon/datasets/valAGE-Set/0088.png', transforms=val_transforms)

# original = F.interpolate(original, size=(512, 512), mode="bilinear", align_corners=False)
# watermarked = F.interpolate(watermarked, size=(512, 512), mode="bilinear", align_corners=False)

psnrs = []
ious = []
logits = []

orig_inside_list = []
orig_outside_list = []
inpaint_inside_list = []
inpaint_outside_list = []

epsilon = 1e-6
# direction_vectors = torch.load('/mnt/nas5/suhyeon/projects/freq-loc/random_vec_univ.pt').to(args.device)
direction_vectors = generate_universal_vectors(args, feature_dim=192)

for _ in range(trials):
    mask = create_random_mask(original, num_masks=1, mask_percentage=proportion_masked)

    img_norm, min_norm, max_norm = norm_tensor(original)

    # original = F.interpolate(original, size=(args.image_size, args.image_size), mode="bilinear", align_corners=False)

    features = image_encoder(original)[0]
    B, C, H, W = features.shape
    features = features.permute(0, 2, 3, 1).view(B, H * W, C)
    # B, H, W, C = features.shape
    # features = features.view(B, H * W, C)
    features_norm = features / (torch.norm(features, p=2, dim=-1, keepdim=True) + epsilon)
    original_cs = torch.matmul(features_norm, direction_vectors.T)

    # 마스크를 Feature Map 크기에 맞게 리사이즈 [B, 1, H, W]
    mask_small = F.interpolate(mask, size=(H, W), mode='bilinear', align_corners=False).squeeze(1).bool()
    
    # 1. Original: Inside Mask (Inpainting Target Area)
    # mask_small [B, H, W] -> [B, H*W]
    cs_inside_orig = original_cs[mask_small.view(B, H*W)] 
    orig_inside_list.append(cs_inside_orig.mean().item())
    
    # 2. Original: Outside Mask (Background)
    cs_outside_orig = original_cs[~mask_small.view(B, H*W)]
    orig_outside_list.append(cs_outside_orig.mean().item())
    
    img_edit_pil = pipe(prompt="", image=img_norm, mask_image=mask, generator=generator).images[0]
    img_edit = to_tensor(img_edit_pil)
    img_edit = img_edit.unsqueeze(0).to(device)
    img_edit = F.interpolate(img_edit, size=(args.image_size, args.image_size), mode="bilinear", align_corners=False)

    features = image_encoder(img_edit)[0]
    B, C, H, W = features.shape
    features = features.permute(0, 2, 3, 1).view(B, H * W, C)
    # B, H, W, C = features.shape
    # features = features.view(B, H * W, C)
    features_norm = features / (torch.norm(features, p=2, dim=-1, keepdim=True) + epsilon)
    inpaint_cs = torch.matmul(features_norm, direction_vectors.T)

    # 3. Inpainted: Inside Mask (Attack Result Area)
    cs_inside_inpaint = inpaint_cs[mask_small.view(B, H*W)]
    inpaint_inside_list.append(cs_inside_inpaint.mean().item())
    
    # 4. Inpainted: Outside Mask (Background)
    cs_outside_inpaint = inpaint_cs[~mask_small.view(B, H*W)]
    inpaint_outside_list.append(cs_outside_inpaint.mean().item())

    # img_edit = denorm_tensor(img_edit, min_norm, max_norm)  # [1, 3, H, W]

    save_image(original, "./exp_cs/original.png")
    save_image(img_edit, "./exp_cs/inpainted.png")

def calculate_metrics(data_list):
    if not data_list:
        return 0.0, 0.0
    return np.mean(data_list), np.std(data_list)

avg_orig_inside, std_orig_inside = calculate_metrics(orig_inside_list)
avg_orig_outside, std_orig_outside = calculate_metrics(orig_outside_list)
avg_inpaint_inside, std_inpaint_inside = calculate_metrics(inpaint_inside_list)
avg_inpaint_outside, std_inpaint_outside = calculate_metrics(inpaint_outside_list)

print("\n" + "="*75)
print("✨ 마스크 영역별 코사인 유사도 분석 (평균)")
print("="*75)
print(f"총 Trial 횟수: {trials}회")

print("\n## 1. Original Image Baseline (워터마킹 목표점)")
print("-" * 75)
print(f"| 영역 | 평균 Cos Sim | STD (변동성) |")
print(f"|:---|:---|:---|")
print(f"| 마스크 안쪽 (Inpaint Target) | {avg_orig_inside:.4f} | {std_orig_inside:.4f} |")
print(f"| 마스크 바깥쪽 (Background) | {avg_orig_outside:.4f} | {std_orig_outside:.4f} |")

print("\n## 2. Inpainted Image Result (Attack Effectiveness)")
print("-" * 75)
print(f"| 영역 | 평균 Cos Sim | Original 대비 변화량 | 진단 |")
print(f"|:---|:---|:---|:---|")
print(f"| 마스크 안쪽 (Attack Result) | {avg_inpaint_inside:.4f} | {(avg_inpaint_inside - avg_orig_inside):+.4f} |")
print(f"| 마스크 바깥쪽 (Leakage Check) | {avg_inpaint_outside:.4f} | {(avg_inpaint_outside - avg_orig_outside):+.4f} |")
print("-" * 75)

# 최종 진단: Localization 성공 여부 판단
diff_inside = avg_inpaint_inside - avg_orig_inside
diff_outside = avg_inpaint_outside - avg_orig_outside

print("\n### 최종 진단: Localization 가능성")
if diff_inside < -0.05 and abs(diff_outside) < 0.01:
    print(f"✅ Localization 성공 가능성 높음: 마스크 안쪽에서만 Cos Sim이 크게 하락({diff_inside:.4f})하여 공격 감지에 적합합니다.")
elif abs(diff_inside) < 0.01:
    print(f"❌ Localization 실패 (너무 강건함): Inpainting 공격이 워터마크를 제거하지 못하고 Cos Sim을 유지하고 있습니다.")
elif abs(diff_inside) > 0.05 and abs(diff_outside) > 0.05:
    print(f"⚠️ 실패 (전역 변화/Leakage): 마스크 안팎 모두 Cos Sim이 크게 변하여, Inpainting이 전역적인 특징 변화를 유발하고 있습니다.")

print("="*75)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]An error occurred while trying to fetch /mnt/nas5/suhyeon/caches/models--sd-legacy--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /mnt/nas5/suhyeon/caches/models--sd-legacy--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
Loading pipeline components...:  14%|█▍        | 1/7 [00:00<00:01,  5.92it/s]An error occurred while trying to fetch /mnt/nas5/suhyeon/caches/models--sd-legacy--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /mnt/nas5/suhyeon/caches/models--sd-legacy--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet.
Defaulting to unsafe 

Tensor normalized: min=0.0, max=1.0


100%|██████████| 50/50 [00:08<00:00,  6.05it/s]


Tensor normalized: min=0.0, max=1.0


100%|██████████| 50/50 [00:08<00:00,  6.01it/s]


Tensor normalized: min=0.0, max=1.0


100%|██████████| 50/50 [00:08<00:00,  6.00it/s]


Tensor normalized: min=0.0, max=1.0


100%|██████████| 50/50 [00:08<00:00,  6.00it/s]


Tensor normalized: min=0.0, max=1.0


100%|██████████| 50/50 [00:08<00:00,  6.01it/s]



✨ 마스크 영역별 코사인 유사도 분석 (평균)
총 Trial 횟수: 5회

## 1. Original Image Baseline (워터마킹 목표점)
---------------------------------------------------------------------------
| 영역 | 평균 Cos Sim | STD (변동성) |
|:---|:---|:---|
| 마스크 안쪽 (Inpaint Target) | -0.0168 | 0.0045 |
| 마스크 바깥쪽 (Background) | -0.0203 | 0.0020 |

## 2. Inpainted Image Result (Attack Effectiveness)
---------------------------------------------------------------------------
| 영역 | 평균 Cos Sim | Original 대비 변화량 | 진단 |
|:---|:---|:---|:---|
| 마스크 안쪽 (Attack Result) | -0.0097 | +0.0071 |
| 마스크 바깥쪽 (Leakage Check) | -0.0162 | +0.0040 |
---------------------------------------------------------------------------

### 최종 진단: Localization 가능성
❌ Localization 실패 (너무 강건함): Inpainting 공격이 워터마크를 제거하지 못하고 Cos Sim을 유지하고 있습니다.


layer [1]
===========================================================================
✨ 마스크 영역별 코사인 유사도 분석 (평균)
===========================================================================
총 Trial 횟수: 5회

1. Original Image Baseline (워터마킹 목표점)
---------------------------------------------------------------------------
| 영역 | 평균 Cos Sim | STD (변동성) |
|:---|:---|:---|
| 마스크 안쪽 (Inpaint Target) | -0.0168 | 0.0045 |
| 마스크 바깥쪽 (Background) | -0.0203 | 0.0020 |

2. Inpainted Image Result (Attack Effectiveness)
---------------------------------------------------------------------------
| 영역 | 평균 Cos Sim | Original 대비 변화량 | 진단 |
|:---|:---|:---|:---|
| 마스크 안쪽 (Attack Result) | -0.0097 | +0.0071 |
| 마스크 바깥쪽 (Leakage Check) | -0.0162 | +0.0040 |
---------------------------------------------------------------------------

최종 진단: Localization 가능성
❌ Localization 실패 (너무 강건함): Inpainting 공격이 워터마크를 제거하지 못하고 Cos Sim을 유지하고 있습니다.
===========================================================================

<!--  -->