In [3]:
base_path = "output/model"
constraints = {
    "Baseline": "baseline_1",
    "Positive": "Sentiment/Positive/constrained_eps_32_batch_8",
    "Negative": "Sentiment/Negative/constrained_eps_32_batch_8",
    "Neutral": "Sentiment/Neutral/constrained_eps_32_batch_8",
    "Formal": "Formality/Formal/constrained_eps_32_batch_8",
    "Informal": "Formality/Informal/constrained_eps_32_batch_8",
    "English": "Language/English/constrained_eps_32_batch_8",
    "French": "Language/French/constrained_eps_32_batch_8",
    "Spanish": "Language/Spanish/constrained_eps_32_batch_8",
    "Left": "Politics/Left/constrained_eps_32_batch_8",
    "Right": "Politics/Right/constrained_eps_32_batch_8",
    "Spam": "Attack/Spam/constrained_eps_32_batch_8",
    "Injection": "Attack/Injection/constrained_eps_32_batch_8"
}

file_dict = {}
for key, constraint in constraints.items():
    # First add paths with format base_path/i/constraint/
    paths = [f"{base_path}/{i}/{constraint}/" for i in range(0,5)]
    # Then add paths with format base_path/coco_i/constraint/
    paths.extend([f"{base_path}/coco_{i}/{constraint}/" for i in range(1,11)])
    file_dict[key] = paths

model_name="minigpt4"
if model_name == 'minigpt4':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]
elif model_name == 'llava':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]
elif model_name == 'blip':
    for category, paths in file_dict.items():
        file_dict[category] = [path.replace("model", model_name) for path in paths]

# Binary classfication

In [4]:
import json

def get_classification_result(file_path):
    # Initialize an empty list to store the extracted outputs
    total_length=0
    output_list = []
    # Open the JSON file and read it line by line
    with open(file_path, 'r') as file:
        for line in file:
            # Parse the JSON line
            json_line = json.loads(line)

            # Check if 'output' key exists in the JSON object
            if 'output' in json_line:
                # Add the value of 'output' key to the outputs list
                output_list.append(json_line['output'])
    count=0
    #iterate through the list of instructions and print the sentiment
    for o in output_list:
        if "yes" in o.lower() or "oui" in o.lower():
            count+=1
            total_length+=1
        elif "no" in o.lower():
            total_length+=1
        else:
            count+=1
            total_length+=1
    return count/total_length


classification_dict={}
#iterate through the dictionary and calculate the result
for key in file_dict:
    result_count = []
    for file in file_dict[key]:
        result=get_classification_result(file+"content_classification_result.jsonl")
        result_count.append(result)
    import numpy as np
    mean_value = sum(result_count)/len(result_count)
    std_dev = np.std(result_count)
    classification_dict[key] = {"mean": round(mean_value, 2), "std": round(std_dev, 2)}

# Calculate means
classification_dict["sentiment"] = {
    "mean": (classification_dict["Positive"]["mean"] + classification_dict["Negative"]["mean"] + classification_dict["Neutral"]["mean"])/3,
    "std": np.sqrt((classification_dict["Positive"]["std"]**2 + classification_dict["Negative"]["std"]**2 + classification_dict["Neutral"]["std"]**2)/3)
}
classification_dict["language"] = {
    "mean": (classification_dict["English"]["mean"] + classification_dict["French"]["mean"] + classification_dict["Spanish"]["mean"])/3,
    "std": np.sqrt((classification_dict["English"]["std"]**2 + classification_dict["French"]["std"]**2 + classification_dict["Spanish"]["std"]**2)/3)
}
classification_dict["formality"] = {
    "mean": (classification_dict["Formal"]["mean"] + classification_dict["Informal"]["mean"])/2,
    "std": np.sqrt((classification_dict["Formal"]["std"]**2 + classification_dict["Informal"]["std"]**2)/2)
}
classification_dict["politics"] = {
    "mean": (classification_dict["Left"]["mean"] + classification_dict["Right"]["mean"])/2,
    "std": np.sqrt((classification_dict["Left"]["std"]**2 + classification_dict["Right"]["std"]**2)/2)
}
classification_dict["Attack"] = {
    "mean": (classification_dict["Spam"]["mean"] + classification_dict["Injection"]["mean"])/2,
    "std": np.sqrt((classification_dict["Spam"]["std"]**2 + classification_dict["Injection"]["std"]**2)/2)
}

classification_dict

{'Baseline': {'mean': 1.0, 'std': 0.0},
 'Positive': {'mean': 1.0, 'std': 0.0},
 'Negative': {'mean': 0.97, 'std': 0.09},
 'Neutral': {'mean': 0.93, 'std': 0.25},
 'Formal': {'mean': 1.0, 'std': 0.01},
 'Informal': {'mean': 1.0, 'std': 0.0},
 'English': {'mean': 0.96, 'std': 0.11},
 'French': {'mean': 0.94, 'std': 0.21},
 'Spanish': {'mean': 0.98, 'std': 0.04},
 'Left': {'mean': 0.88, 'std': 0.31},
 'Right': {'mean': 1.0, 'std': 0.01},
 'Spam': {'mean': 0.87, 'std': 0.31},
 'Injection': {'mean': 0.98, 'std': 0.06},
 'sentiment': {'mean': 0.9666666666666667, 'std': 0.153405779986718},
 'language': {'mean': 0.96, 'std': 0.1388044187577134},
 'formality': {'mean': 1.0, 'std': 0.007071067811865475},
 'politics': {'mean': 0.94, 'std': 0.2193171219946131},
 'Attack': {'mean': 0.925, 'std': 0.22327113561766107}}

# Similarity between oringinal and perturbed image
## LLaVA

In [2]:
import argparse
from PIL import Image
import torch
import matplotlib.pyplot as plt

def parse_args():
    parser = argparse.ArgumentParser(description="Demo")
    parser.add_argument("--model_path", type=str, default="./ckpts/llava_llama_2_13b_chat_freeze")
    parser.add_argument("--gpu_id", type=int, default=0, help="specify the gpu to load the model.")
    parser.add_argument("--model_base", type=str, default=None)
    args = parser.parse_args(args=['--gpu_id', '0'])
    return args

# ========================================
#             Model Initialization
# ========================================
print('>>> Initializing Models')
from llava_llama_2.utils import get_model
args = parse_args()
print('model = ', args.model_path)
tokenizer, model, image_processor, model_name = get_model(args)
model.eval()
print('[Initialization Finished]\n')

>>> Initializing Models


You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565
You are using a model of type llava to instantiate a model of type llava_llama_2. This is not supported for all configurations of models and can yield errors.


model =  ./ckpts/llava_llama_2_13b_chat_freeze
llava_llama_2_13b_chat_freeze




Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  return torch.load(checkpoint_file, map_location="cpu")


[Initialization Finished]



## Minigpt4

In [1]:
import argparse
import random
import numpy as np
import torch
import torch.backends.cudnn as cudnn
# import gradio as gr
from PIL import Image

from minigpt4.common.config import Config
from minigpt4.common.dist_utils import get_rank
from minigpt4.common.registry import registry

# imports modules for registration
from minigpt4.datasets.builders import *
from minigpt4.models import *
from minigpt4.processors import *
from minigpt4.runners import *
from minigpt4.tasks import *

from minigpt_utils import prompt_wrapper, generator

def parse_args():
    parser = argparse.ArgumentParser(description="Demo")

    parser.add_argument("--cfg-path", default="eval_configs/minigpt4_eval.yaml", help="path to configuration file.")
    parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
    parser.add_argument("--mode", type=str, default='VisualChatBot',
                        choices=[ "TextOnly", "VisualChatBot" ],
                        help="Inference Mode: TextOnly: Text model only (Vicuna) \n VisualChatBot: Vision model + Text model (MiniGPT4) ")
    parser.add_argument("--image_file", type=str, default='./image.bmp',
                        help="Image file")
    parser.add_argument("--output_file", type=str, default='./result.jsonl',
                        help="Output file.")
    parser.add_argument(
        "--options",
        nargs="+",
        help="override some settings in the used config, the key-value pair "
        "in xxx=yyy format will be merged into config file (deprecate), "
        "change to --cfg-options instead.",
    )
    args = parser.parse_args(args=['--cfg-path', 'eval_configs/minigpt4_eval.yaml', 
                               '--gpu-id', '0', '--mode', 'VisualChatBot', '--image_file', 'clean_images/0.png', '--output_file', './result.jsonl'])
    # args = parser.parse_args()
    return args

def setup_seeds(config):
    seed = config.run_cfg.seed + get_rank()
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    cudnn.benchmark = False
    cudnn.deterministic = True

# ========================================
#             Model Initialization
# ========================================

print('>>> Initializing Models')
args = parse_args()
cfg = Config(args)
model_config = cfg.model_cfg
model_config.device_8bit = args.gpu_id
model_cls = registry.get_model_class(model_config.arch)
model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))
vis_processor_cfg = cfg.datasets_cfg.cc_sbu_align.vis_processor.train
vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
print('Initialization Finished')
my_generator = generator.Generator(model=model)



>>> Initializing Models
Loading VIT


  state_dict = torch.load(cached_file, map_location="cpu")


Loading VIT Done
Loading Q-Former


100%|██████████| 413M/413M [00:04<00:00, 99.7MB/s] 
  checkpoint = torch.load(cached_file, map_location="cpu")
You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


Loading Q-Former Done
Loading LLAMA


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  return torch.load(checkpoint_file, map_location="cpu")


Loading LLAMA Done
Load BLIP2-LLM Checkpoint: ckpts/pretrained_minigpt4.pth


  ckpt = torch.load(ckpt_path, map_location="cpu")


Initialization Finished


## BLIP

In [2]:
import argparse
import os
import random

import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
import json



def parse_args():

    parser = argparse.ArgumentParser(description="Demo")
    parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
    parser.add_argument("--data_path", type=str, default="instruction_data/0/Sentiment/dataset.csv")
    parser.add_argument("--image_file", type=str, default='./image.bmp',
                        help="Image file")
    parser.add_argument("--output_file", type=str, default='./result.jsonl',
                        help="Output file.")
    parser.add_argument("--instruction", type=str, default=None,
                    choices=[ "positive", "negative", "neutral", "irony", "non_irony", "formal", "informal", "french", "english", "spanish", "left","right","inference_content_evaluation","injection","spam"],
                        help="Instruction to be used for the attack.")
    parser.add_argument("--image_index", type=int, default=0)
    # args = parser.parse_args()
    args = parser.parse_args(args=['--data_path', 'instruction_data/0/Attack/dataset.csv', 
                                '--image_file', 'clean_images/0.png', '--output_file', './result.jsonl'])
    return args


# ========================================
#             Model Initialization
# ========================================

print('>>> Initializing Models')

from lavis.models import load_model_and_preprocess

args = parse_args()
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

# remember to modify the parameter llm_model in ./lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml to the path that store the vicuna weights
model, vis_processor, _ = load_model_and_preprocess(
        name='blip2_vicuna_instruct',
        model_type='vicuna13b',
        is_eval=True,
        device=device,
    )
model.eval()
"""
Source code of the model in:
    ./lavis/models/blip2_models/blip2_vicuna_instruct.py
"""

>>> Initializing Models


  from .autonotebook import tqdm as notebook_tqdm
  state_dict = torch.load(cached_file, map_location="cpu")
Loading checkpoint shards: 100%|██████████| 3/3 [00:09<00:00,  3.14s/it]
100%|██████████| 2.12G/2.12G [00:16<00:00, 138MB/s] 
  checkpoint = torch.load(cached_file, map_location="cpu")


'\nSource code of the model in:\n    ./lavis/models/blip2_models/blip2_vicuna_instruct.py\n'

In [6]:
from PIL import Image
from pytorch_msssim import ssim
from torchvision import transforms
import torch
import matplotlib.pyplot as plt

def denormalize(images):
    mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()
    std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()
    images = images * std[None, :, None, None]
    images = images + mean[None, :, None, None]
    return images

def load_image(image_path):
    return Image.open(image_path).convert('RGB')

def calculate_similarity(image1, image2, metric, model, model_name):
    # Prepare images based on model type
    if not isinstance(image1, torch.Tensor):
        if model_name == 'llava':
            image1 = image_processor.preprocess(image1, return_tensors='pt')['pixel_values'].cuda()
            image2 = image_processor.preprocess(image2, return_tensors='pt')['pixel_values'].cuda()
        elif model_name == 'minigpt4':
            image1 = vis_processor(image1).unsqueeze(0).to(model.device)
            image2 = vis_processor(image2).unsqueeze(0).to(model.device)
        elif model_name == 'blip':
            image1 = vis_processor["eval"](image1).unsqueeze(0).to(device)
            image2 = vis_processor["eval"](image2).unsqueeze(0).to(device)
    
    # Calculate similarity based on metric
    if metric == 'ssim':
        return ssim(image1, image2, data_range=1, size_average=True)
    
    # For embedding-based metrics
    cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
    
    if metric == 'llava':
        emb_image1 = model.encode_images(image1.half())
        emb_image2 = model.encode_images(image2.half())
    elif metric == 'minigpt4':
        image1, image2 = [image1], [image2]
        prompt1 = prompt_wrapper.Prompt(model=model, img_prompts=[image1])
        prompt2 = prompt_wrapper.Prompt(model=model, img_prompts=[image2])
        emb_image1 = prompt1.img_embs[0][0]
        emb_image2 = prompt2.img_embs[0][0]
    elif metric == 'blip':
        with model.maybe_autocast():
            emb_image1 = model.ln_vision(model.visual_encoder(image1))
            emb_image2 = model.ln_vision(model.visual_encoder(image2))
    
    return cos(emb_image1.view(-1).to(torch.float32), emb_image2.view(-1).to(torch.float32))

def calculate_transformations_sim(image, metric, model, model_name):
    transformations = [
        transforms.GaussianBlur(kernel_size=9, sigma=(1.0, 5.0)),
        transforms.RandomAffine(degrees=45),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.2),
        transforms.RandomHorizontalFlip(p=1),
        transforms.RandomPerspective(distortion_scale=0.5, p=1)
    ]
    # Prepare image tensor based on model
    if model_name == 'llava':
        image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].cuda()
    elif model_name == 'minigpt4':
        image_tensor = vis_processor(image).unsqueeze(0).cuda()
    elif model_name == 'blip':
        image_tensor = vis_processor["eval"](image).unsqueeze(0).to(device)
    
    # Calculate similarity for each transformation
    similarities = []
    for transform in transformations:
        transformed_image = transform(image_tensor)
        denorm_img = denormalize(image_tensor)
        denorm_transformed = denormalize(transformed_image)
        sim = calculate_similarity(denorm_img, denorm_transformed, metric, model, model_name)
        similarities.append(sim)
    
    return sum(similarities) / len(similarities)


In [10]:
sim_dict={}
# sim_metric='minigpt4'
sim_metric='minigpt4'
model_name='minigpt4'
# sim_metric='ssim'
temp_list=[]
for i in range(1,10):
    image_file1="clean_images/coco_" + str(i) + ".jpg"
    image1 = load_image(image_file1)
    for j in range(i+1,11):
        image_file2="clean_images/coco_" + str(j) + ".jpg"
        image2 = load_image(image_file2)
        sim =calculate_similarity(image1, image2, sim_metric, model, model_name)
        temp_list.append(sim)
sim_dict['random pairs'] = {'mean': round(torch.mean(torch.stack(temp_list)).item(), 3),
                           'std': round(torch.std(torch.stack(temp_list)).item(), 3)}
for key in file_dict:
    if key=="Baseline":
        continue
    temp_list=[]
    for file in file_dict[key]:
        image_file1=file+"clean_prompt.bmp"
        image1 = load_image(image_file1)
        image_file2=file+"bad_prompt.bmp"
        image2 = load_image(image_file2)
        sim =calculate_similarity(image1, image2, sim_metric, model, model_name)
        temp_list.append(sim)
    #add key and the mean of the cosine similarity to a dictionary
    sim_dict[key]= {'mean': round(torch.mean(torch.stack(temp_list)).item(), 3),
                           'std': round(torch.std(torch.stack(temp_list)).item(), 3)}

# Call the function with the dummy_loader and the list of transformations
temp_list=[]
for i in range(1,11):
    image_file="clean_images/coco_" + str(i) + ".jpg"
    image = load_image(image_file)
    temp_list.append(calculate_transformations_sim(image, sim_metric, model, model_name))
sim_dict['augmentations']= {'mean': round(torch.mean(torch.stack(temp_list)).item(), 3),
                           'std': round(torch.std(torch.stack(temp_list)).item(), 3)}

  return torch.cuda.amp.autocast(dtype=dtype)


In [14]:
import numpy as np
n = 15  # sample size per sub row

# Define groups
groups = {
    'sentiment': ['Positive', 'Negative', 'Neutral'],
    'language': ['English', 'French', 'Spanish'],
    'formality': ['Formal', 'Informal'],
    'politics': ['Left', 'Right'],
    'attack': ['Spam', 'Injection']
}

result = {}

for task, keys in groups.items():
    means = np.array([sim_dict[k]['mean'] for k in keys])
    stds = np.array([sim_dict[k]['std'] for k in keys])
    
    mean_avg = np.mean(means)
    
    S_within = np.sum((n - 1) * stds**2)
    S_between = np.sum(n * (means - mean_avg)**2)
    S_total = S_within + S_between
    var_avg = S_total / (len(keys) * n - 1)
    std_avg = np.sqrt(var_avg)
    
    result[task] = {'mean': mean_avg, 'std': std_avg}

# Compute overall across all 5 tasks
all_means = np.array([result[task]['mean'] for task in result])
all_stds = np.array([result[task]['std'] for task in result])

mean_overall = np.mean(all_means)

S_within_overall = np.sum((n - 1) * all_stds**2)
S_between_overall = np.sum(n * (all_means - mean_overall)**2)
S_total_overall = S_within_overall + S_between_overall
var_overall = S_total_overall / (len(all_means) * n - 1)
std_overall = np.sqrt(var_overall)

# Print results
for task in result:
    print(f"{task}: mean = {result[task]['mean']:.3f}, std = {result[task]['std']:.3f}")
print(f"\nOverall: mean = {mean_overall:.3f}, std = {std_overall:.3f}")

sentiment: mean = 0.557, std = 0.121
language: mean = 0.584, std = 0.119
formality: mean = 0.600, std = 0.096
politics: mean = 0.568, std = 0.136
attack: mean = 0.592, std = 0.136

Overall: mean = 0.580, std = 0.120
