In [None]:
import os
import json
import torch
import pyiqa
import logging
import datasets
import torchvision
import numpy as np
from PIL import Image
from tqdm.auto import tqdm

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["MODELSCOPE_LOG_LEVEL"] = str(logging.ERROR)
torchvision.disable_beta_transforms_warning()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
seed = 42
images_evaluated_per_prompt = 3

## Load Dataset

In [None]:
n = 250 # number of permutations/trials
m = 3 # number of punctuations injected
k = 3 # number of images generated per permutation
t = 20 # number of inference steps
sampler_name = "NSGAIISampler" # sampler to get approximate best permutation of perturbations
original_prompt_dir = "generated_images/coco/original_prompt/"
result_dir = f"coco/untargeted_attack/punctuation/concurrent_injection/n={n}_m={m}_k={k}_t={t}_{sampler_name}/"
image_gen_dir = "generated_images/" + result_dir
eval_dir = "eval/" + result_dir

adversarial_prompts_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "results.json"},
    split = "eval",
    field = "data"
)

## Generate Images for Evaluation

In [None]:
from diffusers import DiffusionPipeline

model_id = "CompVis/stable-diffusion-v1-4"
pipeline = DiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype = torch.bfloat16,
    safety_checker = None
).to(device)
pipeline.set_progress_bar_config(disable = True)

In [None]:
os.makedirs(image_gen_dir, exist_ok = True)
for entry in tqdm(adversarial_prompts_dataset):
    adversarial_images = pipeline(entry["Adversarial Prompt"].strip(), num_images_per_prompt = images_evaluated_per_prompt, generator = torch.manual_seed(seed)).images
    for i, adversarial_image in enumerate(adversarial_images):
        adversarial_image.save(image_gen_dir + f"{entry['id']}_{i}.png")

## Alignment Evaluation Metrics

### DSG

In [None]:
from DSG.dsg.vqa_utils import MPLUG, calc_vqa_score

vqa_model = MPLUG()

In [None]:
with open("eval/coco_captions_dsg.json", "r") as f:
    dsg_eval_dataset = json.load(f)
    questions = dsg_eval_dataset["data"]
    f.close()

result = {"data": []}
for index, entry in enumerate(tqdm(adversarial_prompts_dataset)):
    adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt)]
    qid2question = questions[index]["VQA"]["Question"]
    qid2answers = [{qid: vqa_model.vqa(adversarial_image, question).lower() for qid, question in qid2question.items()} for adversarial_image in adversarial_images]
    dsg_scores = [calc_vqa_score(qid2answer)["average_score_without_dependency"] for qid2answer in qid2answers]
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "VQA": {"Question": qid2question, **{f"Answer {i}": qid2answer for i, qid2answer in enumerate(qid2answers)}}, "Score": sum(dsg_scores) / len(dsg_scores)})

with open(eval_dir + "dsg.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
dsg_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "dsg.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", dsg_eval_dataset[0])
print("DSG -", sum(dsg_eval_dataset["Score"]) / len(dsg_eval_dataset))

### VQAScore

In [None]:
from t2v_metrics.t2v_metrics import VQAScore

clip_flant5_score = VQAScore(model = "clip-flant5-xl")

result = {"data": []}
for entry in tqdm(adversarial_prompts_dataset):
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "Score": clip_flant5_score(images = [image_gen_dir + f"{entry['id']}_{i}.png" for i in range(images_evaluated_per_prompt)], texts = [entry["Original Prompt"]]).detach().cpu().mean().item()})

with open(eval_dir + "vqascore.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
vqascore_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "vqascore.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", vqascore_eval_dataset[0])
print("VQAScore -", sum(vqascore_eval_dataset["Score"]) / len(vqascore_eval_dataset))

### CLIPScore

In [None]:
from torchmetrics.multimodal.clip_score import CLIPScore

clip_score = CLIPScore(model_name_or_path = "openai/clip-vit-large-patch14").to(device)

result = {"data": []}
for entry in tqdm(adversarial_prompts_dataset):
    adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt)]
    clip_scores = [clip_score(torch.tensor(np.asarray(adversarial_image)).permute(2, 0, 1).to(device), entry["Original Prompt"]).detach().cpu().item() for adversarial_image in adversarial_images]
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "Score": sum(clip_scores) / len(clip_scores)})
    
with open(eval_dir + "clipscore.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
clip_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "clipscore.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", clip_eval_dataset[0])
print("CLIPScore -", sum(clip_eval_dataset["Score"]) / len(clip_eval_dataset))

### FID

In [None]:
import numpy as np
from torchmetrics.image.fid import FrechetInceptionDistance

fid = FrechetInceptionDistance(feature = 192).to(device)
images = [Image.open(original_prompt_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt) for entry in adversarial_prompts_dataset]
images = [torch.tensor(np.asarray(image)).permute(2, 0, 1).to(device) for image in images]
adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt) for entry in adversarial_prompts_dataset]
adversarial_images = [torch.tensor(np.asarray(adversarial_image)).permute(2, 0, 1).to(device) for adversarial_image in adversarial_images]
fid.update(torch.stack(images), real = True)
fid.update(torch.stack(adversarial_images), real = False)
print("FID -", fid.compute().detach().item())

## Quality Evaluation Metrics

### LIQE

In [None]:
liqe = pyiqa.create_metric("liqe", device = device)

result = {"data": []}
for entry in tqdm(adversarial_prompts_dataset):
    adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt)]
    adversarial_images = torch.tensor(np.stack(adversarial_images)).permute(0, 3, 1, 2).to(device) / 255
    liqe_score = liqe(adversarial_images).detach().cpu().mean().item()
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "Score": liqe_score})
    
with open(eval_dir + "liqe.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
liqe_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "liqe.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", liqe_eval_dataset[0])
print("LIQE -", sum(liqe_eval_dataset["Score"]) / len(liqe_eval_dataset))

### MUSIQ

In [None]:
musiq = pyiqa.create_metric("musiq", device = device)

result = {"data": []}
for entry in tqdm(adversarial_prompts_dataset):
    adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt)]
    adversarial_images = torch.tensor(np.stack(adversarial_images)).permute(0, 3, 1, 2).to(device) / 255
    musiq_score = musiq(adversarial_images).detach().cpu().mean().item()
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "Score": musiq_score})
    
with open(eval_dir + "musiq.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
musiq_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "musiq.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", musiq_eval_dataset[0])
print("MUSIQ -", sum(musiq_eval_dataset["Score"]) / len(musiq_eval_dataset))

### PIQE

In [None]:
piqe = pyiqa.create_metric("piqe", device = device)

result = {"data": []}
for entry in tqdm(adversarial_prompts_dataset):
    adversarial_images = [Image.open(image_gen_dir + f"{entry['id']}_{i}.png") for i in range(images_evaluated_per_prompt)]
    adversarial_images = torch.tensor(np.stack(adversarial_images)).permute(0, 3, 1, 2).to(device) / 255
    piqe_score = piqe(adversarial_images).detach().cpu().mean().item()
    result["data"].append({"Adversarial Prompt": entry["Adversarial Prompt"], "Score": piqe_score})
    
with open(eval_dir + "piqe.json", "w") as f:
    f.write(json.dumps(result))
    f.close()

In [None]:
piqe_eval_dataset = datasets.load_dataset(
    "json",
    data_files = {"eval": eval_dir + "piqe.json"},
    split = "eval",
    field = "data"
)

print("Sample Eval -", piqe_eval_dataset[0])
print("PIQE -", sum(piqe_eval_dataset["Score"]) / len(piqe_eval_dataset))