In [1]:
base_path = "output/model"
constraints = {
    "Positive": "Sentiment/Positive/constrained_eps_32_batch_8",
    "Negative": "Sentiment/Negative/constrained_eps_32_batch_8",
    "Neutral": "Sentiment/Neutral/constrained_eps_32_batch_8",
    "Formal": "Formality/Formal/constrained_eps_32_batch_8",
    "Informal": "Formality/Informal/constrained_eps_32_batch_8",
    "English": "Language/English/constrained_eps_32_batch_8",
    "French": "Language/French/constrained_eps_32_batch_8",
    "Spanish": "Language/Spanish/constrained_eps_32_batch_8",
    "Left": "Politics/Left/constrained_eps_32_batch_8",
    "Right": "Politics/Right/constrained_eps_32_batch_8",
    "Spam": "Attack/Spam/constrained_eps_32_batch_8",
    "Injection": "Attack/Injection/constrained_eps_32_batch_8"
}

file_dict = {}
for key, constraint in constraints.items():
    # First add paths with format base_path/i/constraint/
    paths = [f"{base_path}/{i}/{constraint}/" for i in range(0,5)]
    # Then add paths with format base_path/coco_i/constraint/
    paths.extend([f"{base_path}/coco_{i}/{constraint}/" for i in range(1,11)])
    file_dict[key] = paths

# change the model name to the model you want to use
model_name="llava"
####################################

if model_name == 'minigpt4':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]
elif model_name == 'llava':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]
elif model_name == 'blip':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]

# Model Initialization

In [None]:
# Model Initialization and Embedding Calculation
import torch
from PIL import Image

if model_name == "llava":
    import argparse
    from llava_llama_2.utils import get_model
    
    def parse_args():
        parser = argparse.ArgumentParser(description="Demo")
        parser.add_argument("--model_path", type=str, default="./ckpts/llava_llama_2_13b_chat_freeze")
        parser.add_argument("--gpu_id", type=int, default=0, help="specify the gpu to load the model.")
        parser.add_argument("--model_base", type=str, default=None)
        return parser.parse_args(args=['--gpu_id', '0'])

    args = parse_args()
    tokenizer, model, image_processor, model_name = get_model(args)
    model.eval()

elif model_name == "minigpt4":
    import argparse
    from minigpt4.common.config import Config
    from minigpt4.common.registry import registry
    from minigpt_utils import prompt_wrapper, generator
    
    def parse_args():
        parser = argparse.ArgumentParser(description="Demo")
        parser.add_argument("--cfg-path", default="eval_configs/minigpt4_eval.yaml", help="path to configuration file.")
        parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
        parser.add_argument("--mode", type=str, default='VisualChatBot', choices=["TextOnly", "VisualChatBot"])
        parser.add_argument("--image_file", type=str, default='./image.bmp', help="Image file")
        parser.add_argument("--output_file", type=str, default='./result.jsonl', help="Output file.")
        parser.add_argument("--options", nargs="+")
        return parser.parse_args(args=['--cfg-path', 'eval_configs/minigpt4_eval.yaml', 
                                     '--gpu-id', '0', '--mode', 'VisualChatBot', 
                                     '--image_file', 'clean_images/0.png', 
                                     '--output_file', './result.jsonl'])
    args = parse_args()
    cfg = Config(args)
    model_config = cfg.model_cfg
    model_config.device_8bit = args.gpu_id
    model = registry.get_model_class(model_config.arch).from_config(model_config).to(f'cuda:{args.gpu_id}')
    vis_processor = registry.get_processor_class(cfg.datasets_cfg.cc_sbu_align.vis_processor.train.name).from_config(cfg.datasets_cfg.cc_sbu_align.vis_processor.train)
    my_generator = generator.Generator(model=model)

elif model_name == "blip":
    import argparse
    from lavis.models import load_model_and_preprocess
    
    def parse_args():
        parser = argparse.ArgumentParser(description="Demo")
        parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
        parser.add_argument("--data_path", type=str, default="instruction_data/0/Sentiment/dataset.csv")
        parser.add_argument("--image_file", type=str, default='./image.bmp', help="Image file")
        parser.add_argument("--output_file", type=str, default='./result.jsonl', help="Output file.")
        parser.add_argument("--instruction", type=str, default=None, choices=["positive", "negative", "neutral", "irony", "non_irony", "formal", "informal", "french", "english", "spanish", "left", "right", "inference_content_evaluation", "injection", "spam"])
        parser.add_argument("--image_index", type=int, default=0)
        return parser.parse_args(args=['--data_path', 'instruction_data/0/Attack/dataset.csv',
                                     '--image_file', 'clean_images/0.png',
                                     '--output_file', './result.jsonl'])

    args = parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model, vis_processor, _ = load_model_and_preprocess(
        name='blip2_vicuna_instruct',
        model_type='vicuna13b',
        is_eval=True,
        device=device
    )
    model.eval()
    img = Image.open(args.image_file).convert('RGB')
    img = vis_processor["eval"](img).unsqueeze(0).to(device)


# Cosine similarity between the embeddings of perturbed inputs and their transformations

In [4]:
from PIL import Image
from torchvision import transforms
import torch
import matplotlib.pyplot as plt

def denormalize(images):
    mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()
    std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()
    return images * std[None, :, None, None] + mean[None, :, None, None]

def load_image(image_path):
    return Image.open(image_path).convert('RGB')

def calculate_similarity(image1, image2, model_name, model):
    cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
    
    # Process images if not already tensors
    if not isinstance(image1, torch.Tensor):
        if model_name == 'llava':
            image1 = image_processor.preprocess(image1, return_tensors='pt')['pixel_values'].cuda()
            image2 = image_processor.preprocess(image2, return_tensors='pt')['pixel_values'].cuda()
        elif model_name == 'minigpt4':
            image1 = [vis_processor(image1).unsqueeze(0).to(model.device)]
            image2 = [vis_processor(image2).unsqueeze(0).to(model.device)]
        elif model_name == 'blip':
            image1 = vis_processor["eval"](image1).unsqueeze(0).to(device)
            image2 = vis_processor["eval"](image2).unsqueeze(0).to(device)
    
    # Get embeddings based on model type
    if model_name == 'llava':
        emb1 = model.encode_images(image1.half())
        emb2 = model.encode_images(image2.half())
    elif model_name == 'minigpt4':
        prompt1 = prompt_wrapper.Prompt(model=model, img_prompts=[[image1]])
        prompt2 = prompt_wrapper.Prompt(model=model, img_prompts=[[image2]])
        emb1, emb2 = prompt1.img_embs[0][0], prompt2.img_embs[0][0]
    elif model_name == 'blip':
        with model.maybe_autocast():
            emb1 = model.ln_vision(model.visual_encoder(image1))
            emb2 = model.ln_vision(model.visual_encoder(image2))
            
    return cos(emb1.view(-1).to(torch.float32), emb2.view(-1).to(torch.float32))

In [None]:
from torchvision.utils import save_image

sim_dict = {}

def jpeg_transform(X):
    save_image(torch.squeeze(denormalize(X)), 'dummy.jpg')
    image = load_image('dummy.jpg')
    if model_name == 'blip':
        return vis_processor["eval"](image).unsqueeze(0).cuda()
    elif model_name =='llava':
        return image_processor.preprocess(image, return_tensors='pt')['pixel_values'].cuda()
    elif model_name == 'minigpt4':
        return vis_processor(image).unsqueeze(0).cuda()

transformations = [
    jpeg_transform,
    transforms.GaussianBlur(kernel_size=9, sigma=(1.0, 5.0)),
    transforms.RandomAffine(degrees=45),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.2),
    transforms.RandomHorizontalFlip(p=1),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5)
]

for transform in transformations:
    similarities = []
    for key, files in file_dict.items():
        for file in files:
            image = load_image(file + "bad_prompt.bmp")
            image_tensor = (vis_processor["eval"](image).unsqueeze(0).to(device) if model_name == 'blip'
                          else image_processor.preprocess(image, return_tensors='pt')['pixel_values'].cuda() if model_name == 'llava'
                          else vis_processor(image).unsqueeze(0).to(model.device))
            transformed = transform(image_tensor).cuda()
            similarities.append(calculate_similarity(image_tensor, transformed, model_name, model))
    
    sims = torch.stack(similarities)
    sim_dict[transform] = {
        'mean': round(torch.mean(sims).item(), 3),
        'std': round(torch.std(sims).item(), 3)
    }

print(sim_dict)

# Baseline: Cosine similarity between the embeddings of unperturbed inputs and their transformation

In [None]:
file_list = [f"clean_images/coco_{i}.jpg" for i in range(1, 11)]

for transform in transformations:
    similarities = []
    for file in file_list:
        image = load_image(file)
        image_tensor = (vis_processor["eval"](image).unsqueeze(0).to(device) if model_name == 'blip'
                       else image_processor.preprocess(image, return_tensors='pt')['pixel_values'].cuda() if model_name in ['llava', 'ssim']
                       else vis_processor(image).unsqueeze(0).to(model.device))
        transformed = transform(image_tensor).cuda()
        similarities.append(calculate_similarity(image_tensor, transformed, model_name, model))
    
    sims = torch.stack(similarities)
    sim_dict[transform] = {
        'mean': round(torch.mean(sims).item(), 3),
        'std': round(torch.std(sims).item(), 3)
    }

print(sim_dict)