1. Install Dependencies

In [1]:
# HPS dependencies
! pip install ftfy regex tqdm
! pip install git+https://github.com/openai/CLIP.git
! pip install hpsv2

# Stable Diffusion dependencies
! pip install diffusers

# Adversarial attack dependencies
! pip install torchattacks

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-hf2tgqpy
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-hf2tgqpy
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
!mkdir -p clip && wget https://github.com/openai/CLIP/raw/main/clip/bpe_simple_vocab_16e6.txt.gz -P /usr/local/lib/python3.11/dist-packages/hpsv2/src/open_clip

--2025-03-09 13:00:41--  https://github.com/openai/CLIP/raw/main/clip/bpe_simple_vocab_16e6.txt.gz
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/openai/CLIP/main/clip/bpe_simple_vocab_16e6.txt.gz [following]
--2025-03-09 13:00:41--  https://raw.githubusercontent.com/openai/CLIP/main/clip/bpe_simple_vocab_16e6.txt.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1356917 (1.3M) [application/octet-stream]
Saving to: ‘/usr/local/lib/python3.11/dist-packages/hpsv2/src/open_clip/bpe_simple_vocab_16e6.txt.gz.6’


2025-03-09 13:00:41 (35.9 MB/s) - ‘/usr/local/lib/python3.11/dist-packages/hpsv2/src/o

2. Imports

In [3]:
import os
import re
import gc
import ast
from datetime import datetime
import random
import argparse
from tqdm import tqdm
from google.colab import drive
from collections import OrderedDict

from abc import ABC, abstractmethod
from typing import Union, List, Dict, Tuple, Optional

import numpy as np
import pandas as pd
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from diffusers import DiffusionPipeline, StableDiffusionPipeline, StableDiffusion3Pipeline

import clip
import hpsv2
from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
import PIL
from PIL import Image

from torchattacks.attack import Attack, wrapper_method



3. Connect to Google Drive

In [4]:
drive.mount("/content/drive",force_remount=True)
os.chdir("/content/drive/My Drive")

Mounted at /content/drive


4. Model Code

In [5]:
class ModelLoadingError(Exception):
    """Exception raised when there is an error loading the model."""
    pass

class InferenceError(Exception):
    """Exception raised when an error occurs during inference."""
    pass

In [6]:
class BaseModel(ABC):
    @abstractmethod
    def load_model(self):
        """
        Load the open-weights model or make an API connection to the closed-source model.
        """
        pass

    @abstractmethod
    def inference(
        self, inputs: Union[List[str], torch.Tensor], captions: Optional[List[str]] = None
    ) -> Union[torch.Tensor, List[float]]:
        """
        Run inference on a batch of inputs with optional captions.

        Args:
            inputs (Union[List[str], torch.Tensor]): A batch of text prompts or a batch of images.
            captions (Optional[List[str]]): Optional text captions associated with the inputs for reward models.

        Returns:
            Union[torch.Tensor, List[float]]: A batch of model outputs or a list of reward scores.
        """
        pass

In [7]:
class HPSv1Model(BaseModel):
    def __init__(self, model_path: str):
        """
        Args:
            model_path (str): Path to the HPSv1 model checkpoint.
        """
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_path = model_path
        self.load_model()

    def load_model(self):
        try:
            self.model, self.preprocess_function = clip.load("ViT-L/14", device=self.device)
            checkpoint = torch.load(self.model_path)

            if "state_dict" not in checkpoint:
                raise ModelLoadingError("Checkpoint does not contain 'state_dict'.")

            self.model.load_state_dict(checkpoint["state_dict"])
            self.tokenizer = clip.tokenize
            self.model.eval()

        except FileNotFoundError as e:
            raise ModelLoadingError(f"Model checkpoint not found at '{self.model_path}'.") from e
        except Exception as e:
            raise ModelLoadingError(f"Error loading model: {e}") from e

    def inference(self, inputs: torch.Tensor, captions: Union[List[str], torch.Tensor]) -> List[float]:
        """
        Runs inference on a batch of images and corresponding captions.
        Returns a batch of reward scores.
        """
        if not isinstance(inputs, torch.Tensor):
            raise TypeError("Expected 'inputs' to be of type torch.Tensor (i.e. images).")
        if not (isinstance(captions, torch.Tensor) or (isinstance(captions, list) and all(isinstance(c, str) for c in captions))):
            raise TypeError("Expected 'captions' to be either a torch.Tensor or a list of strings.")
        if inputs.shape[0] != len(captions):
            raise ValueError("Number of 'inputs' and 'captions' must match.")

        try:
            with torch.no_grad():
                image_features = self.model.encode_image(inputs.to(self.device))

                if not isinstance(captions, torch.Tensor):
                    text_tokens = self.tokenizer(captions).to(self.device)
                else:
                    text_tokens = captions.to(self.device)
                text_features = self.model.encode_text(text_tokens)

                image_features = image_features / image_features.norm(dim=-1, keepdim=True)
                text_features = text_features / text_features.norm(dim=-1, keepdim=True)

                # Convert cosine similarity scores to percentages as in the original paper
                similarity_scores = (image_features @ text_features.T).diag() * 100
            return similarity_scores.tolist()
        except Exception as e:
            raise InferenceError(f"Inference failed: {e}") from e

    def inference_with_grad(self, inputs: torch.Tensor, captions: List[str]) -> List[float]:
        """
        Runs inference on a batch of images and corresponding captions.
        Returns a batch of reward scores.
        """
        if not isinstance(inputs, torch.Tensor):
            raise TypeError("Expected 'inputs' to be of type torch.Tensor (i.e. images).")
        if not isinstance(captions, list) or not all(isinstance(c, str) for c in captions):
            raise TypeError("Expected 'captions' to be a list of strings.")
        if inputs.shape[0] != len(captions):
            raise ValueError("Number of 'inputs' and 'captions' must match.")

        try:
            text_tokens = clip.tokenize(captions).to(self.device)
            image_features, text_features = self.model(inputs, text_tokens)
            return (image_features @ text_features.T).diag() * 100
        except Exception as e:
            raise InferenceError(f"Inference failed: {e}") from e

In [8]:
class HPSv2Model(BaseModel):
    def __init__(self, model_path: str):
        """
        Args:
            model_path (str): Path to the HPSv2 model checkpoint.
        """
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_path = model_path
        self.load_model()

    def load_model(self):
        try:
            self.model, _, self.preprocess_function = create_model_and_transforms(
                "ViT-H-14",
                "laion2B-s32B-b79K",
                precision="amp",
                device=self.device,
                jit=False,
                force_quick_gelu=False,
                force_custom_text=False,
                force_patch_dropout=False,
                force_image_size=None,
                pretrained_image=False,
                image_mean=None,
                image_std=None,
                light_augmentation=True,
                aug_cfg={},
                output_dict=True,
                with_score_predictor=False,
                with_region_predictor=False
            )

            checkpoint = torch.load(self.model_path)
            if "state_dict" not in checkpoint:
                raise ModelLoadingError("Checkpoint does not contain 'state_dict'.")

            self.model.load_state_dict(checkpoint["state_dict"])
            self.tokenizer = get_tokenizer("ViT-H-14")
            self.model.eval()

        except FileNotFoundError as e:
            raise ModelLoadingError(f"Model checkpoint not found at '{self.model_path}'.") from e
        except Exception as e:
            raise ModelLoadingError(f"Error loading model: {e}") from e

    def inference(self, inputs: torch.Tensor, captions: Union[List[str], torch.Tensor]) -> List[float]:
        """
        Runs inference on a batch of images and corresponding captions.
        Returns a batch of reward scores.
        """
        if not isinstance(inputs, torch.Tensor):
            raise TypeError("Expected 'inputs' to be a list of PIL.Image objects.")
        if not (isinstance(captions, torch.Tensor) or (isinstance(captions, list) and all(isinstance(c, str) for c in captions))):
            raise TypeError("Expected 'captions' to be either a torch.Tensor or a list of strings.")
        if len(inputs) != len(captions):
            raise ValueError("Number of 'inputs' and 'captions' must match.")

        try:
            with torch.no_grad():
                if not isinstance(captions, torch.Tensor):
                    text_tokens = self.tokenizer(captions).to(self.device)
                else:
                    text_tokens = captions.to(self.device)
                inputs = inputs.to(self.device)

                with torch.cuda.amp.autocast():
                    outputs = self.model(inputs, text_tokens)
                    image_features, text_features = outputs["image_features"], outputs["text_features"]
                    similarity_scores = (image_features @ text_features.T).diag() * 100
                return similarity_scores.tolist()

        except Exception as e:
            raise InferenceError(f"Inference failed: {e}") from e


    def inference_with_grad(self, inputs: torch.Tensor, captions: List[str]) -> List[float]:
        """
        Runs inference on a batch of images and corresponding captions.
        Returns a batch of reward scores.
        """
        if not isinstance(inputs, torch.Tensor):
            raise TypeError("Expected 'inputs' to be a list of PIL.Image objects.")
        if not isinstance(captions, list) or not all(isinstance(c, str) for c in captions):
            raise TypeError("Expected 'captions' to be a list of strings.")
        if len(inputs) != len(captions):
            raise ValueError("Number of 'inputs' and 'captions' must match.")

        try:
            text_tokens = self.tokenizer(captions).to(self.device)
            inputs = inputs.to(self.device)

            with torch.cuda.amp.autocast():
                outputs = self.model(inputs, text_tokens)
                image_features, text_features = outputs["image_features"], outputs["text_features"]
                return (image_features @ text_features.T).diag() * 100

        except Exception as e:
            raise InferenceError(f"Inference failed: {e}") from e

In [9]:
class BaseDiffusionModel(BaseModel):
    def __init__(self, model_path: str, offload_to_cpu: bool = False, resolution: int = None, **kwargs):
        """
        Args:
            model_path (str): Path or repository ID of the diffusion model checkpoint.
        """
        self.seed = 42

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_path = model_path
        self.offload_to_cpu = offload_to_cpu
        self.resolution = resolution
        self.kwargs = kwargs

        self.diffusion_pipeline = self._get_diffusion_pipeline()
        self.load_model()

    def _get_diffusion_pipeline(self):
        """ Subclasses should override this to return the correct pipeline. """
        return DiffusionPipeline

    def load_model(self):
        try:
            self.model = self.diffusion_pipeline.from_pretrained(
                self.model_path,
                **self.kwargs
            ).to(self.device)
            if self.offload_to_cpu:
                self.model.enable_model_cpu_offload()

        except MemoryError as e:
            if hasattr(self, "model"):
                del self.model
                torch.cuda.empty_cache()
            raise ModelLoadingError(f"Memory error occurred while loading the model. Consider using a smaller model: {e}")
        except FileNotFoundError as e:
            raise ModelLoadingError(f"Model checkpoint not found at '{self.model_path}'.") from e
        except Exception as e:
            raise ModelLoadingError(f"Failed to load diffusion model: {e}") from e

    def inference(
        self, inputs: List[str], captions: Optional[List[str]] = None
    ):
        """
        Runs inference on a batch of prompts.
        Returns a batch of images corresponding to the prompts.
        """
        if not isinstance(inputs, list) or not all(isinstance(c, str) for c in inputs):
            raise TypeError("Expected 'inputs' to be a list of strings.")

        try:
            # Create one generator per prompt to ensure reproducibility
            generators = [
                torch.Generator(self.device).manual_seed(self.seed) for _ in range(len(inputs))
            ]
            if self.resolution:
                images = self.model(
                    prompt=inputs, generator=generators,
                    height=self.resolution, width=self.resolution # use 1:1 aspect ratio
                ).images
                return images
            else:
                images = self.model(
                    prompt=inputs, generator=generators,
                ).images
                return images

        except Exception as e:
            raise InferenceError(f"Inference failed: {e}")

In [10]:
class StableDiffusionModel(BaseDiffusionModel):
    def __init__(self, model_path: str, offload_to_cpu: bool = False, resolution: int = None, **kwargs):
        """
        Note:
            model_path (str): Path to the Stable Diffusion model.
                              Must include 'stable-diffusion-1', 'stable-diffusion-2', or 'stable-diffusion-3' after '<repo-owner>/'
                              for simplicity.
        """

        # Load the model with float16 precision.
        # If your GPU supports torch.bfloat16 for lower memory usage with similar precision to FP32,
        # consider switching the torch_dtype accordingly.
        if "torch_dtype" not in kwargs:
            kwargs["torch_dtype"] = torch.float16
        super().__init__(model_path, offload_to_cpu, resolution, **kwargs)

    def _get_diffusion_pipeline(self):
        version_tag = self.model_path.split("/")[-1].lower()

        if re.search(r'(stable-diffusion-?(v-?|v)?1(?:-\d+)?)(.*)?$', version_tag):
            return StableDiffusionPipeline
        elif re.search(r'(stable-diffusion-?(v-?|v)?2(?:-\d+)?)(.*)?$', version_tag):
            return DiffusionPipeline
        elif re.search(r'(stable-diffusion-?(v-?|v)?3(?:-\d+)?)(.*)?$', version_tag):
            return StableDiffusion3Pipeline
        else:
            raise ValueError(
                "Model path must match 'stable-diffusion-1', 'stable-diffusion-v1', 'stable-diffusion-v-1', "
                "'stable-diffusion-2', 'stable-diffusion-v2', etc."
            )

In [11]:
class ModelFactory:
    @staticmethod
    def create_model(
        model_type: str, model_path: str,
        **kwargs,
    ) -> BaseModel:
        """
        Creates and returns an instance of a model subclass based on the model_type.

        Args:
            model_type (str): The type of model to create. Supported values are:
                - "hpsv1": For HPSv1 reward models.
                - "hpsv2": For HPSv2 reward models.
                - "sd": For stable diffusion text-to-image models.
            model_path (str): The path or repository ID of the model checkpoint.

        Returns:
            BaseModel: An instance of the requested model.

        Raises:
            ValueError: If an unsupported model_type is provided.
        """
        if model_type == "hpsv1":
            return HPSv1Model(model_path)
        elif model_type == "hpsv2":
            return HPSv2Model(model_path)
        elif model_type == "sd":
            return StableDiffusionModel(model_path, **kwargs)
        else:
            raise ValueError("Unsupported model type. Use 'sd' for stable diffusion models or 'hps' for HPS models.")

5. Define Arguments and Utils

In [12]:
def check_reward_model(value):
    valid_versions = ["v1.0", "v2.0"]
    if value not in valid_versions:
        raise argparse.ArgumentTypeError(
            "reward_model_name must be one of: 'v1.0', 'v2.0'.")
    return value

def parse_transfer_test_args():
    parser = argparse.ArgumentParser(
        description="Argument partser for attack process."
    )

    # Models group
    models = parser.add_argument_group("models")
    models.add_argument("--reward_model_name", type=check_reward_model, required=True,
                        help="HPS reward model version: v1.0, v2.0")

    # Misc group
    misc = parser.add_argument_group("misc")
    misc.add_argument("--original_images_path", type=str, required=True,
                        help="Path where original images are stored")
    misc.add_argument("--adversarial_images_path", type=str, required=True,
                        help="Path where adversarial images are stored")

    args = parser.parse_args()
    return args

In [13]:
def compute_reward_statistics(top_k_prompts, adv_rewards):
    """
    Compute reward statistics using both the original rewards from top_k_prompts
    and the adversarial rewards in adv_rewards.

    Parameters:
        top_k_prompts (list): List of tuples (category, prompt, original_reward, image)
        adv_rewards (list): List of adversarial rewards corresponding to each prompt.
    """
    if not top_k_prompts or not adv_rewards:
        return {
            "average_original": 0.0,
            "average_adversarial": 0.0,
            "per_category_original": {},
            "per_category_adversarial": {}
        }

    original_rewards = [entry[2] for entry in top_k_prompts]
    avg_original = sum(original_rewards) / len(original_rewards)
    avg_adv = sum(adv_rewards) / len(adv_rewards)

    per_category_orig = {}
    per_category_adv = {}

    for (cat, _, orig_reward, _), adv_reward in zip(top_k_prompts, adv_rewards):
        per_category_orig.setdefault(cat, []).append(orig_reward)
        per_category_adv.setdefault(cat, []).append(adv_reward)

    per_category_orig_avg = {cat: sum(scores) / len(scores) for cat, scores in per_category_orig.items()}
    per_category_adv_avg = {cat: sum(scores) / len(scores) for cat, scores in per_category_adv.items()}

    return {
        "average_original": avg_original,
        "average_adversarial": avg_adv,
        "per_category_original": per_category_orig_avg,
        "per_category_adversarial": per_category_adv_avg
    }

In [14]:
def clear_cuda_memory_and_force_gc(force: bool = False):
    """
    Clears the CUDA memory cache and forces garbage collection if the allocated memory
    exceeds a certain threshold or if explicitly forced.

    Args:
        force (bool): If True, CUDA cache will be cleared and garbage collection
                      will be forced regardless of the memory threshold.
    """

    memory_allocated = torch.cuda.max_memory_reserved()
    memory_total = torch.cuda.get_device_properties("cuda").total_memory

    memory_threshold = memory_total * 0.7
    if memory_allocated > memory_threshold or force:
        torch.cuda.empty_cache()
        gc.collect()

In [15]:
def numerical_key(filename):
    # Extract the number from filename like "image_1.png"
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else -1

In [16]:
class SampledDataset(Dataset):
    def __init__(self, prompts, images=None, transforms=None):
        self.data = [{"category": c, "prompt": p} for c, p in zip(prompts["category"], prompts["prompt"])]
        self.images = images
        self.transforms = transforms
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.images is not None:
            return self.transforms(self.images[idx]), self.data[idx]
        return self.data[idx]

6. Run Transfer Tests

In [17]:
reward_model = ModelFactory.create_model(
    model_type="hpsv2",
    model_path="HPS_v2_compressed.pt" #HPS_v2_compressed
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  checkpoint = torch.load(checkpoint_path, map_location=map_location)
  checkpoint = torch.load(self.model_path)


In [18]:
def run_transfer_test(run_transfer_test_args):
    print(run_transfer_test_args)

    original_image_directory = run_transfer_test_args.original_images_path
    prompts_file = os.path.join(original_image_directory, "prompts.txt")
    original_prompts = {"category": [], "prompt": []}
    with open(prompts_file, "r") as pf:
        for line in pf:
            content = line.split(": ", 1)[1].strip()
            if content.startswith("(") and content.endswith(")"):
                category, prompt = ast.literal_eval(content)
                original_prompts["category"].append(category)
                original_prompts["prompt"].append(prompt)

    generated_image_directory = run_transfer_test_args.adversarial_images_path
    generated_prompts_file = os.path.join(generated_image_directory, "prompts.txt")
    generated_prompts = {"category": [], "prompt": [], "orig_r": [], "adv_r": []}
    with open(generated_prompts_file, "r") as pf:
        for line in pf:
            parts = line.split(": ", 1)
            content = parts[1].strip()
            if content.startswith("(") and content.endswith(")"):
                category, prompt, orig_r, adv_r = ast.literal_eval(content)
                generated_prompts["category"].append(category)
                generated_prompts["prompt"].append(prompt)

    original_image_files = sorted([f for f in os.listdir(original_image_directory) if f.endswith(".png")], key=numerical_key)
    original_images = [Image.open(os.path.join(original_image_directory, img_file)) for img_file in original_image_files]

    generated_image_files = sorted([f for f in os.listdir(generated_image_directory) if f.endswith(".png")], key=numerical_key)
    generated_images = [Image.open(os.path.join(generated_image_directory, img_file)) for img_file in generated_image_files]

    original_mapping = {}
    for idx, (cat, prompt) in enumerate(zip(original_prompts["category"], original_prompts["prompt"])):
        original_mapping[(cat, prompt)] = original_images[idx]

    corresponding_original_images = []
    for gen_cat, gen_prompt in zip(generated_prompts["category"], generated_prompts["prompt"]):
        orig_img = original_mapping.get((gen_cat, gen_prompt))
        corresponding_original_images.append(orig_img)

    # Get original reward scores
    original_reward_scores = []
    original_dataset = SampledDataset(
        prompts=original_prompts, images=original_images,
        transforms=reward_model.preprocess_function
    )
    dataloader = DataLoader(original_dataset, batch_size=8, shuffle=False)
    for batch in dataloader:
        images, prompts = batch
        prompt_texts = prompts["prompt"]
        original_reward_scores.extend(reward_model.inference(inputs=images, captions=prompt_texts))
    generated_prompts["orig_r"] = original_reward_scores

    # Get generated reward scores
    generated_reward_scores = []
    generated_dataset = SampledDataset(
        prompts=generated_prompts, images=generated_images,
        transforms=reward_model.preprocess_function
    )
    dataloader = DataLoader(generated_dataset, batch_size=8, shuffle=False)
    for batch in dataloader:
        images, prompts = batch
        prompt_texts = prompts["prompt"]
        generated_reward_scores.extend(reward_model.inference(inputs=images, captions=prompt_texts))
    generated_prompts["adv_r"] = generated_reward_scores
    generated_prompts_tuples = list(zip(
        generated_prompts["category"],
        generated_prompts["prompt"],
        generated_prompts["orig_r"],
        generated_prompts["adv_r"]
    ))

    stats = compute_reward_statistics(generated_prompts_tuples, generated_prompts["adv_r"])
    print("\n" + "=" * 40)
    print("Overall Reward Statistics:")
    print(f"  Original: {stats['average_original']} | Adversarial: {stats['average_adversarial']}\n")

    print("Per-Category Comparison:")
    all_categories = set(stats["per_category_original"].keys()).union(stats["per_category_adversarial"].keys())
    for cat in all_categories:
        orig = stats["per_category_original"].get(cat, 0)
        adv = stats["per_category_adversarial"].get(cat, 0)
        print(f"  {cat}: Original = {orig} | Adversarial = {adv}")
    print("=" * 40)

    clear_cuda_memory_and_force_gc(force=True)

6.1 Gaussian Noise Transfer Test

In [19]:
import sys
sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--reward_model_name", "v2.0",
    "--original_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35",
    "--adversarial_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/gn/v1.0/2025-03-09-05-43-05"
]

args = parse_transfer_test_args()
run_transfer_test(args)

Namespace(reward_model_name='v2.0', original_images_path='outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35', adversarial_images_path='outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/gn/v1.0/2025-03-09-05-43-05')


  with torch.cuda.amp.autocast():



Overall Reward Statistics:
  Original: 29.1546875 | Adversarial: 25.83359375

Per-Category Comparison:
  paintings: Original = 28.925 | Adversarial = 25.975
  photo: Original = 30.4765625 | Adversarial = 24.0234375
  anime: Original = 28.669270833333332 | Adversarial = 26.869791666666668
  concept-art: Original = 29.357142857142858 | Adversarial = 25.361607142857142


6.2 FGSM (Aggregate reward score over batches) Transfer Test

In [20]:
sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--reward_model_name", "v2.0",
    "--original_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35",
    "--adversarial_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/fgsm/v1.0/2025-03-09-05-43-07"
]

args = parse_transfer_test_args()
run_transfer_test(args)

Namespace(reward_model_name='v2.0', original_images_path='outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35', adversarial_images_path='outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/fgsm/v1.0/2025-03-09-05-43-07')


  with torch.cuda.amp.autocast():



Overall Reward Statistics:
  Original: 29.1546875 | Adversarial: 25.9359375

Per-Category Comparison:
  paintings: Original = 28.925 | Adversarial = 26.08125
  photo: Original = 30.4765625 | Adversarial = 24.203125
  anime: Original = 28.669270833333332 | Adversarial = 26.8828125
  concept-art: Original = 29.357142857142858 | Adversarial = 25.515625


6.3 FGSM (Update each image individually) Transfer Test

In [21]:
sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--reward_model_name", "v2.0",
    "--original_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35",
    "--adversarial_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/fgsm/v1.0/2025-03-09-05-43-10"
]

args = parse_transfer_test_args()
run_transfer_test(args)

Namespace(reward_model_name='v2.0', original_images_path='outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35', adversarial_images_path='outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/fgsm/v1.0/2025-03-09-05-43-10')


  with torch.cuda.amp.autocast():



Overall Reward Statistics:
  Original: 29.1546875 | Adversarial: 25.9359375

Per-Category Comparison:
  paintings: Original = 28.925 | Adversarial = 26.08125
  photo: Original = 30.4765625 | Adversarial = 24.203125
  anime: Original = 28.669270833333332 | Adversarial = 26.8828125
  concept-art: Original = 29.357142857142858 | Adversarial = 25.515625


6.4 PGD Transfer Test

In [23]:
sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--reward_model_name", "v2.0",
    "--original_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35",
    "--adversarial_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/pgd/v1.0/2025-03-09-05-44-04"
]

args = parse_transfer_test_args()
run_transfer_test(args)

Namespace(reward_model_name='v2.0', original_images_path='outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35', adversarial_images_path='outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/pgd/v1.0/2025-03-09-05-44-04')


  with torch.cuda.amp.autocast():



Overall Reward Statistics:
  Original: 29.1546875 | Adversarial: 25.80234375

Per-Category Comparison:
  paintings: Original = 28.925 | Adversarial = 25.984375
  photo: Original = 30.4765625 | Adversarial = 24.1796875
  anime: Original = 28.669270833333332 | Adversarial = 26.78125
  concept-art: Original = 29.357142857142858 | Adversarial = 25.296875


6.5 SPSA Transfer Test

In [24]:
sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--reward_model_name", "v2.0",
    "--original_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35",
    "--adversarial_images_path", "outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/spsa/v1.0/2025-03-09-05-45-04"
]

args = parse_transfer_test_args()
run_transfer_test(args)

Namespace(reward_model_name='v2.0', original_images_path='outputs/stable-diffusion-3-medium-diffusers/hps/2025-03-08-06-08-35', adversarial_images_path='outputs/stable-diffusion-3-medium-diffusers/hps_adversarial/spsa/v1.0/2025-03-09-05-45-04')


  with torch.cuda.amp.autocast():



Overall Reward Statistics:
  Original: 29.1546875 | Adversarial: 25.93359375

Per-Category Comparison:
  paintings: Original = 28.925 | Adversarial = 26.096875
  photo: Original = 30.4765625 | Adversarial = 24.2578125
  anime: Original = 28.669270833333332 | Adversarial = 26.869791666666668
  concept-art: Original = 29.357142857142858 | Adversarial = 25.493303571428573
