
### 1. Setup and Configuration

1.A. Install Necessary Libraries

In [1]:
# -*- coding: utf-8 -*-
"""
Description @Team: to review:
Modality Conflict Experiments for BLIP-2 and LLaVA on Google Colab.

This script performs two experiments to assess how BLIP-2 and LLaVA handle conflicts between visual and textual information:
1. Text Conflict: The model is presented with an image and a misleading
   textual question based on a generated caption, testing its ability to
   reject the falsehood.
2. Image Conflict: The model is presented with a perturbed image and asked
   to describe it, testing whether it acknowledges the perturbation or
   describes the original semantic content.

Requirements:
- Google Colab environment (GPU recommended: T4 or better).
- Google Drive account for dataset caching.
- Installation of necessary libraries.

Potential Issues & Notes:
- Model loading, especially for BLIP-2, can sometimes be sensitive to library
  versions (transformers, accelerate, bitsandbytes). If encountering errors
  like shape mismatches or KeyErrors during loading, consider:
    - Checking the model's Hugging Face page
    - Pinning library versions.
- Ensure sufficient Colab resources (GPU).
- The first run will download the COCO dataset subset, which may take time depending
  on network speed. Subsequent runs should load from the cache in Google Drive.
"""

!pip install --upgrade -q pip
!pip install --upgrade -q transformers accelerate bitsandbytes torch torchvision torchaudio fiftyone requests Pillow scikit-learn sentence-transformers pandas==2.2.2

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.8 MB[0m [31m11.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m1.6/1.8 MB[0m [31m23.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m865.2/865.2 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.1/393.1 MB[0m [31

### 1.B. Import Libraries

In [5]:
import torch
import torchvision.transforms as transforms
from transformers import AutoProcessor, Blip2ForConditionalGeneration, LlavaForConditionalGeneration
from PIL import Image
import requests
import os
import re
# import fiftyone as fo
# import fiftyone.zoo as foz
from google.colab import drive
import pandas as pd
import random
import numpy as np
from tqdm.notebook import tqdm
import warnings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Suppressing warnings
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')

# NLTK resources for text evaluation (stopwords, tokenizer)
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### 1.C. Configuration Variables

In [29]:
SAMPLE_SIZE = 100  # Number of samples from COCO to use (@Team: adjust samples as needed, but take >10)
BLIP2_MODEL_ID = "Salesforce/blip2-opt-2.7b"
LLAVA_MODEL_ID = "llava-hf/llava-1.5-7b-hf"

# Name for the FiftyOne dataset
DATASET_NAME = "coco-2017-validation-modality-conflict-subset2"

# Path in Google Drive to store cached dataset files ensuring persistence across Colab sessions.
DRIVE_PATH = "/content/drive/MyDrive/VLM_Project_Data"

# Path in Google Drive to store cached dataset files ensuring persistence across Colab sessions.
RESULT_PATH = "/content/drive/MyDrive/VLM_Project_Data/Results"

# Use GPU if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# Conflict map for the Text Conflict experiment
# Keys are words likely to appear in captions, values are plausible conflicting replacements.
# @Team: Review and add more based on COCO dataset object categories
conflict_map = {
    "man": "woman", "woman": "man",
    "boy": "girl", "girl": "boy",
    "cat": "dog", "dog": "cat",
    "car": "bus", "bus": "car",
    "train": "airplane", "airplane": "train",
    "table": "chair", "chair": "table",
    "red": "blue", "blue": "red", "green": "yellow", "yellow": "green",
    "day": "night", "night": "day",
    "indoors": "outdoors", "outdoors": "indoors",
    "standing": "sitting", "sitting": "standing",
    "walking": "running", "running": "walking",
    "open": "closed", "closed": "open",
    "on": "under", "under": "on",
    "left": "right", "right": "left",
    "white": "black", "black": "white",
    "small": "large", "large": "small",
    "sunny": "cloudy", "cloudy": "sunny",
    "tree": "building", "building": "tree",
    "beach": "mountain", "mountain": "beach",
    "phone": "book", "book": "phone"
}

# Image transformations for the Image Conflict experiment
# Apply significant visual changes like rotation and blur.
# @Team: @TODO: V2: Analyze effect of applying global non-semeantic transformation vs other alternatives like masking
PERTURBATION_TRANSFORM = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.GaussianBlur(kernel_size=5),
    transforms.ToTensor()
])

# Keywords to look for in model descriptions to identify acknowledged perturbations.
PERTURBATION_KEYWORDS = ["rotated", "blurry", "blurred", "gaussian blur", "noise", "noisy", "transformed", "distorted", "artifact", "unclear", "obscured"]

Using device: cuda



### 1.D. Mount Google Drive to access the persistent storage path defined in `DRIVE_PATH`.

In [7]:
try:
    drive.mount('/content/drive')
    # Create the target directory if it doesn't exist
    os.makedirs(DRIVE_PATH, exist_ok=True)
    print(f"Google Drive mounted. Cache directory: {DRIVE_PATH}")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")
    print("Dataset caching will not work. Proceeding with local storage (will be lost after session).")
    DRIVE_PATH = "/tmp/VLM_Project_Data_Local" # Fallback to temporary local storage
    os.makedirs(DRIVE_PATH, exist_ok=True)

Mounted at /content/drive
Google Drive mounted. Cache directory: /content/drive/MyDrive/VLM_Project_Data


## 2. Dataset Loading (COCO via FiftyOne)

Load the COCO-2017 validation split using FiftyOne Zoo.

Uses `dataset_dir=DRIVE_PATH` to cache/load data from Google Drive.

Checks if the dataset already exists to avoid re-downloading.

Selects `SAMPLE_SIZE` random samples.

Makes the FiftyOne dataset persistent.

In [None]:
import os
import random
from PIL import Image, ImageDraw
from pycocotools.coco import COCO

def download_coco(split="val2017", save_dir="/content/drive/MyDrive/VLM_Project_Data"):
    """Downloads COCO 2017 images and annotations for a given split."""
    os.makedirs(save_dir, exist_ok=True)
    if split == "val2017":
        img_url = "http://images.cocodataset.org/zips/val2017.zip"
        ann_url = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"
    else:
        raise ValueError(f"Unsupported split: {split}")

    img_zip = os.path.join(save_dir, f"{split}.zip")
    ann_zip = os.path.join(save_dir, "annotations_trainval2017.zip")

    if not os.path.exists(os.path.join(save_dir, split)):
        print(f"Downloading {split} images...")
        os.system(f"wget -q {img_url} -O {img_zip}")
        os.system(f"unzip -q {img_zip} -d {save_dir}")

    if not os.path.exists(os.path.join(save_dir, "annotations")):
        print("Downloading annotations...")
        os.system(f"wget -q {ann_url} -O {ann_zip}")
        os.system(f"unzip -q {ann_zip} -d {save_dir}")

def load_coco_dataset(dataset_name, split, dataset_dir, max_samples):
    """
    Loads a subset of COCO dataset using pycocotools and returns sample images + annotations.

    Arguments:
        dataset_name (str): Custom name (not used directly, kept for compatibility).
        split (str): Either "validation" or "train".
        dataset_dir (str): Directory containing or to store COCO dataset files.
        max_samples (int): Number of images to load.

    Returns:
        list of dicts with 'image', 'info', and 'annotations'.
    """
    split_folder = "val2017" if "val" in split.lower() else "train2017"
    images_dir = os.path.join(dataset_dir, split_folder)
    ann_path = os.path.join(dataset_dir, "annotations", f"instances_{split_folder}.json")

    # Download if not found
    if not os.path.exists(images_dir) or not os.path.exists(ann_path):
        print("Dataset not found locally. Downloading now...")
        download_coco(split=split_folder, save_dir=dataset_dir)

    # Load COCO
    coco = COCO(ann_path)
    img_ids = coco.getImgIds()
    selected_ids = random.sample(img_ids, min(max_samples, len(img_ids)))

    print(f"Loaded {len(selected_ids)} samples from {split_folder}.")

    samples = []
    for img_id in selected_ids:
        img_info = coco.loadImgs(img_id)[0]
        file_path = os.path.join(images_dir, img_info["file_name"])

        if not os.path.exists(file_path):
            print(f"Warning: Image file missing: {file_path}")
            continue

        image = Image.open(file_path).convert("RGB")
        draw = ImageDraw.Draw(image)

        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        for ann in anns:
            x, y, w, h = ann["bbox"]
            category = coco.loadCats(ann["category_id"])[0]["name"]
            draw.rectangle([x, y, x + w, y + h], outline="red", width=2)
            draw.text((x, y), category, fill="red")

        samples.append({
            "image": image,
            "info": img_info,
            "annotations": anns
        })

    if not samples:
        print("Error: No valid samples were loaded.")
        return None

    return samples

Downloading and loading dataset 'coco-2017' split 'validation'.
This may take a while...
Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


INFO:fiftyone.utils.coco:Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


 100% |██████|    1.9Gb/1.9Gb [15.8s elapsed, 0s remaining, 135.5Mb/s]      


INFO:eta.core.utils: 100% |██████|    1.9Gb/1.9Gb [15.8s elapsed, 0s remaining, 135.5Mb/s]      


Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Downloading 100 images


INFO:fiftyone.utils.coco:Downloading 100 images


 100% |██████████████████| 100/100 [1.0m elapsed, 0s remaining, 1.9 images/s]      


INFO:eta.core.utils: 100% |██████████████████| 100/100 [1.0m elapsed, 0s remaining, 1.9 images/s]      


Writing annotations for 100 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 100 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'validation'


 100% |█████████████████| 100/100 [754.5ms elapsed, 0s remaining, 132.5 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 100/100 [754.5ms elapsed, 0s remaining, 132.5 samples/s]      


Dataset 'coco-2017-validation-modality-conflict-subset2' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-2017-validation-modality-conflict-subset2' created


Copied dataset from /root/fiftyone/coco-2017 to /content/drive/MyDrive/VLM_Project_Data
Selected 100 samples for the experiment.


In [None]:
# load coco dataset using pycocotools and print few sample images
coco_view = load_coco_dataset(DATASET_NAME, "validation", DRIVE_PATH, SAMPLE_SIZE)

if coco_view is None:
    raise SystemExit("Failed to load dataset. Exiting experiment.")

# Display first 3 images (in notebooks)
for sample in coco_view[:3]:
    display(sample["image"])

## 3. Model Loading Utilities

Defining functions to load BLIP-2 and LLaVA models with appropriate quantization to fit within Colab's memory constraints.

In [9]:
def load_blip2(model_id, device, drive_model_path=None):
    """
    Loads the BLIP-2 model and processor with 8-bit quantization.
    If drive_model_path is provided, loads from there or saves to it after download.
    """
    try:
        if drive_model_path and os.path.exists(drive_model_path):
            print(f"Loading BLIP-2 from local path: {drive_model_path}")
            processor = AutoProcessor.from_pretrained(drive_model_path.replace("blip2_model", "blip2_processor"))
            model = Blip2ForConditionalGeneration.from_pretrained(
                drive_model_path,
                load_in_8bit=True,
                device_map="auto",
                torch_dtype=torch.float16
            )
        else:
            print(f"Downloading BLIP-2 model: {model_id}")
            processor = AutoProcessor.from_pretrained(model_id)
            model = Blip2ForConditionalGeneration.from_pretrained(
                model_id,
                load_in_8bit=True,
                device_map="auto",
                torch_dtype=torch.float16
            )

            # Save to Drive for reuse
            if drive_model_path:
                model.save_pretrained(drive_model_path)
                processor.save_pretrained(drive_model_path.replace("blip2_model", "blip2_processor"))
                print(f"Saved BLIP-2 model and processor to: {drive_model_path}")

        print("BLIP-2 model loaded successfully.")
        return model, processor

    except ImportError:
        print("Error: `bitsandbytes` library not found. Please ensure it's installed for 8-bit loading.")
        return None, None
    except Exception as e:
        print(f"Error loading BLIP-2 model: {e}")
        print("Check model ID, internet connection, and library compatibility.")
        return None, None

In [10]:
def load_llava(model_id, device, drive_model_path=None):
    """
    Loads the LLaVA model and processor with 4-bit quantization.
    If drive_model_path is provided, loads from there or saves to it after download.
    """
    try:
        if drive_model_path and os.path.exists(drive_model_path):
            print(f"Loading LLaVA from local path: {drive_model_path}")
            processor = AutoProcessor.from_pretrained(drive_model_path.replace("llava_model", "llava_processor"))
            model = LlavaForConditionalGeneration.from_pretrained(
                drive_model_path,
                load_in_4bit=True,
                device_map="auto",
                torch_dtype=torch.float16
            )
        else:
            print(f"Downloading LLaVA model: {model_id}")
            processor = AutoProcessor.from_pretrained(model_id)
            model = LlavaForConditionalGeneration.from_pretrained(
                model_id,
                load_in_4bit=True,
                device_map="auto",
                torch_dtype=torch.float16
            )

            # Save to Drive for reuse
            if drive_model_path:
                model.save_pretrained(drive_model_path)
                processor.save_pretrained(drive_model_path.replace("llava_model", "llava_processor"))
                print(f"Saved LLaVA model and processor to: {drive_model_path}")

        processor.tokenizer.padding_side = "left"
        print("LLaVA model loaded successfully.")
        return model, processor

    except ImportError:
        print("Error: `bitsandbytes` library not found. Please ensure it's installed for 4-bit loading.")
        return None, None
    except Exception as e:
        print(f"Error loading LLaVA model: {e}")
        print("Check model ID, internet connection, and library compatibility.")
        return None, None

## 4. Helper Functions

Defining helper functions for core tasks: caption generation, creating misleading statements, applying image perturbations, and evaluating
model responses based on heuristic rules.

In [11]:
import torch
import re

def generate_caption(model, processor, image, device, model_type, max_tokens=30, prompt_text=None):
    """
    Generates text (caption or answer) for a given image using BLIP-2 or LLaVA.
    """
    raw_image = image.convert("RGB")
    inputs = None
    full_prompt_for_llava = None

    try:
        if model_type == 'blip2':
            if prompt_text:
                inputs = processor(images=raw_image, text=prompt_text, return_tensors="pt")
            else:
                inputs = processor(images=raw_image, return_tensors="pt")

        elif model_type == 'llava':
            if prompt_text:
                conversation = [
                    {"role": "user", "content": [
                        {"type": "image"},
                        {"type": "text", "text": prompt_text}
                    ]}
                ]
            else:
                conversation = [
                    {"role": "user", "content": [
                        {"type": "image"},
                        {"type": "text", "text": "Describe this image in detail."}
                    ]}
                ]
            full_prompt_for_llava = processor.apply_chat_template(conversation, add_generation_prompt=True)
            inputs = processor(text=full_prompt_for_llava, images=raw_image, return_tensors="pt")

        else:
            raise ValueError("Invalid model_type specified. Use 'blip2' or 'llava'.")

        # Move tensors to device, convert only float tensors to float16 if on cuda
        dtype = torch.float16 if device == "cuda" else torch.float32
        for k, v in inputs.items():
            if v.dtype.is_floating_point:
                inputs[k] = v.to(device=device, dtype=dtype)
            else:
                inputs[k] = v.to(device=device)

        # Generate
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=False)

        generated_ids = outputs

        # Decode
        if model_type == 'llava':
            full_decoded_text = processor.decode(generated_ids[0], skip_special_tokens=True).strip()
            assistant_marker = "ASSISTANT:"
            marker_index = full_decoded_text.rfind(assistant_marker)
            if marker_index != -1:
                generated_text = full_decoded_text[marker_index + len(assistant_marker):].strip()
            else:
                input_prompt_base = full_prompt_for_llava.split(assistant_marker)[0]
                cleaned_input_prompt = re.sub(r'<image>|\s+', ' ', input_prompt_base).strip()
                if full_decoded_text.startswith(cleaned_input_prompt):
                    generated_text = full_decoded_text[len(cleaned_input_prompt):].strip()
                else:
                    print("Warning: Could not isolate LLaVA response. Returning full output.")
                    generated_text = full_decoded_text

        else:  # BLIP-2
            input_len = inputs['input_ids'].shape[1] if 'input_ids' in inputs else 0
            new_tokens = generated_ids[0][input_len:] if generated_ids[0].shape[0] > input_len else generated_ids[0]
            generated_text = processor.decode(new_tokens, skip_special_tokens=True).strip()
            if prompt_text and generated_text.startswith(prompt_text):
                generated_text = generated_text[len(prompt_text):].strip()

        return generated_text.replace("</s>", "").strip()

    except Exception as e:
        print(f"Error during generation with {model_type}: {e}")
        return None

In [12]:
def make_misleading_statement(caption, conflict_map):
    """
    Attempts to create a misleading version of a caption by replacing a word
    with its conflicting counterpart from the conflict_map.

    Args:
        caption (str): The original image caption.
        conflict_map (dict): A dictionary mapping words to their conflicts.

    Returns:
        tuple: (misleading_caption, original_word, misleading_word) or
               (original_caption, None, None) if no conflict word found.
    """
    if not caption:
        return caption, None, None

    words = re.findall(r'\b\w+\b', caption.lower()) # Tokenize simply
    available_keys = [word for word in words if word in conflict_map]

    if not available_keys:
        return caption, None, None # No replaceable word found

    # Choose a random word to replace from the available keys
    original_word = random.choice(available_keys)
    misleading_word = conflict_map[original_word]

    # Replace the first occurrence of the original word (case-insensitive)
    # Use regex to handle word boundaries
    misleading_caption = re.sub(r'\b' + re.escape(original_word) + r'\b', misleading_word, caption, count=1, flags=re.IGNORECASE)

    # Check if replacement happened (e.g., if casing was unusual)
    if misleading_caption == caption:
         # Try again with forced lower replacement if first attempt failed
         temp_caption_lower = caption.lower()
         if original_word in temp_caption_lower:
              start_index = temp_caption_lower.find(original_word)
              end_index = start_index + len(original_word)
              misleading_caption = caption[:start_index] + misleading_word + caption[end_index:]
         else: # Still no replacement, return original
              return caption, None, None


    return misleading_caption, original_word, misleading_word

In [13]:
def apply_image_perturbation(image, transform):
    try:
        perturbed = transform(image)
        # If it's a tensor, convert it back to PIL
        if isinstance(perturbed, torch.Tensor):
            perturbed = transforms.ToPILImage()(perturbed)
        return perturbed
    except Exception as e:
        print(f"Error in image perturbation: {e}")
        return None

In [14]:
# Prepare stopwords for evaluation functions
stop_words = set(stopwords.words('english'))

In [15]:
def evaluate_response_text(answer_part, clean_caption, misleading_word):
    """
    Evaluates the model's response in the text conflict experiment using heuristics.

    Args:
        answer_part (str): The extracted answer portion from the model's output.
        clean_caption (str): The original, correct caption for the image.
        misleading_word (str): The specific word used to make the question misleading.

    Returns:
        str: The evaluation category.
    """
    if not answer_part:
        return "Generation Error"

    answer_lower = answer_part.lower()
    misleading_word_lower = misleading_word.lower()

    # 1. Check for explicit rejection (negation)
    negations = ["no,", "no.", "not", "isn't", "aren't", "cannot", "can't", "incorrect", "false", "different"]
    if any(neg in answer_lower for neg in negations):
        # Further check: does it reject the misleading word specifically?
        # Example: "No, it is not a [misleading_word], it is a [correct_word]"
        # Simple check: negation present, misleading word might be absent or present
        return "Correct Rejection"

    # 2. Check for agreement with the falsehood
    # Use word boundaries to avoid partial matches (e.g., 'cat' in 'category')
    if re.search(r'\b' + re.escape(misleading_word_lower) + r'\b', answer_lower):
        return "Agreement with Falsehood"

    # 3. Check for implicit rejection (misleading word absent, but relevant info present)
    # Tokenize clean caption and answer, remove stopwords, check for overlap
    clean_tokens = set(t for t in word_tokenize(clean_caption.lower()) if t.isalnum() and t not in stop_words)
    answer_tokens = set(t for t in word_tokenize(answer_lower) if t.isalnum() and t not in stop_words)

    # Check if misleading word is absent AND there's some overlap with clean caption keywords
    # @Team: Revisit this logic
    if misleading_word_lower not in answer_lower and len(clean_tokens.intersection(answer_tokens)) > 0:
         # Threshold: e.g., at least 2 overlapping keywords or > 20% overlap
         # Simple version: any overlap counts if misleading word is absent
         return "Implicit Rejection"

    # 4. Default: If none of the above, classify as confusion or irrelevance
    return "Confusion/Irrelevance"

In [16]:
def evaluate_response_image(generated_description, clean_caption, perturbation_keywords):
    """
    Evaluates the model's response in the image conflict experiment using heuristics.

    Args:
        generated_description (str): The model's description of the perturbed image.
        clean_caption (str): The original, correct caption for the unperturbed image.
        perturbation_keywords (list): List of keywords indicating perturbation awareness.

    Returns:
        str: The evaluation category.
    """
    if not generated_description:
        return "Generation Error"

    desc_lower = generated_description.lower()

    # 1. Check for acknowledgment of perturbation
    if any(keyword in desc_lower for keyword in perturbation_keywords):
        return "Acknowledged Perturbation"

    # 2. Check for ignoring perturbation (high similarity to clean caption)
    # Use TF-IDF cosine similarity as a more robust measure than simple keyword overlap
    try:
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform([clean_caption.lower(), desc_lower])
        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

        # Define a threshold for similarity (e.g., 0.5 or higher indicates ignoring)
        similarity_threshold = 0.5
        if cosine_sim >= similarity_threshold:
            return "Ignored Perturbation"
    except ValueError:
        # Handle cases where one or both strings are empty after stopword removal etc.
        pass # Fall through to default category

    # 3. Default: If perturbation not acknowledged and description differs significantly
    # from the clean caption, classify as 'Other'.
    return "Other/Irrelevant Description"

## 5. Experiment Execution - Text Conflict

Run the text conflict experiment:

Load models (if not already loaded).

Loop through the dataset samples.

For each sample:
- Generate a 'clean' caption.
- Create a misleading question using the `conflict_map`.
- Query both BLIP-2 and LLaVA with the misleading question.
- Evaluate responses using `evaluate_response_text`.
- Store results.

In [17]:
# --- Load Models ---
blip_model, blip_processor = load_blip2(BLIP2_MODEL_ID, DEVICE)
llava_model, llava_processor = load_llava(LLAVA_MODEL_ID, DEVICE)

# --- Initialize Results Storage ---
text_conflict_results = []
processed_samples_text = 0
skipped_samples_text = 0

# --- Check if models loaded successfully ---
if blip_model is None or llava_model is None:
    raise SystemExit("One or both models failed to load. Cannot proceed with experiments.")

Downloading BLIP-2 model: Salesforce/blip2-opt-2.7b


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


processor_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/122k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/10.0G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

BLIP-2 model loaded successfully.
Downloading LLaVA model: llava-hf/llava-1.5-7b-hf


processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.45k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/950 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

LLaVA model loaded successfully.


In [18]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [20]:
print("\n--- Starting Text Conflict Experiment ---")
pbar_text = tqdm(coco_view, total=len(coco_view))

for idx, sample in enumerate(pbar_text):
    sample_id = sample["info"]["id"]
    pbar_text.set_description(f"Processing Sample {sample_id}")

    # Extract filepath and image
    filepath = os.path.join(DRIVE_PATH, "val2017", sample["info"]["file_name"])  # or "train2017" if using train split
    result_base = {
        'sample_id': sample_id,
        'filepath': filepath,
        'clean_caption': None,
        'misleading_caption': None,
        'original_word': None,
        'misleading_word': None,
        'model': None,
        'raw_output': None,
        'answer_part': None,
        'category': 'Skipped'
    }

    try:
        # 1. Load Image
        try:
            image = sample["image"]
        except Exception as e:
            print(f"Error loading image {filepath}: {e}")
            skipped_samples_text += 1
            continue

        # 2. Generate Clean Caption
        clean_caption = generate_caption(blip_model, blip_processor, image, DEVICE, model_type='blip2', prompt_text="a photo of", max_tokens=50)
        if not clean_caption:
            print(f"Warning: Failed to generate clean caption for {sample_id}. Skipping.")
            skipped_samples_text += 1
            continue
        result_base['clean_caption'] = clean_caption

        # 3. Create Misleading Caption
        misleading_caption, original_word, misleading_word = make_misleading_statement(clean_caption, conflict_map)
        if not misleading_word:
            skipped_samples_text += 1
            continue
        result_base['misleading_caption'] = misleading_caption
        result_base['original_word'] = original_word
        result_base['misleading_word'] = misleading_word

        # 4. Run Models
        models_to_run = {
            "BLIP-2": (blip_model, blip_processor, f"Question: Is this {misleading_caption}? Answer:", 'blip2'),
            "LLaVA": (llava_model, llava_processor, f"Is this {misleading_caption}?", 'llava')
        }

        for model_name, (model, processor, prompt, model_type_flag) in models_to_run.items():
            model_result = result_base.copy()
            model_result['model'] = model_name

            generated_text = generate_caption(model, processor, image, DEVICE, model_type=model_type_flag, prompt_text=prompt, max_tokens=50)
            model_result['raw_output'] = generated_text

            if generated_text:
                answer_part = generated_text
                model_result['answer_part'] = answer_part
                category = evaluate_response_text(answer_part, clean_caption, misleading_word)
                model_result['category'] = category
            else:
                model_result['category'] = "Generation Error"

            text_conflict_results.append(model_result)

        processed_samples_text += 1

    except Exception as e:
        print(f"Error processing sample {sample_id} for text conflict: {e}")
        error_result = result_base.copy()
        error_result['category'] = "Processing Error"
        text_conflict_results.append(error_result)

        error_result_llava = error_result.copy()
        error_result_llava['model'] = 'LLaVA'
        text_conflict_results.append(error_result_llava)

        skipped_samples_text += 1

    pbar_text.set_postfix({"Processed": processed_samples_text, "Skipped": skipped_samples_text})

print(f"\n--- Text Conflict Experiment Finished ---")
print(f"Successfully processed {processed_samples_text} samples.")
print(f"Skipped {skipped_samples_text} samples due to errors or no conflict.")



--- Starting Text Conflict Experiment ---


  0%|          | 0/100 [00:00<?, ?it/s]




--- Text Conflict Experiment Finished ---
Successfully processed 62 samples.
Skipped 38 samples due to errors or no conflict.


## 6. Experiment Execution - Image Conflict

Run the image conflict experiment:

Reuse loaded models.

Loop through the same dataset samples.

For each sample:
- Load the original image and get/generate its clean caption.
- Apply the defined image perturbation (`PERTURBATION_TRANSFORM`).
- Ask both BLIP-2 and LLaVA to describe the *perturbed* image.
- Evaluate responses using `evaluate_response_image`.
- Store results.


In [22]:
# --- Initialize Results Storage ---
image_conflict_results = []
processed_samples_image = 0
skipped_samples_image = 0

# Cache clean captions
clean_captions_cache = {
    res['sample_id']: res['clean_caption']
    for res in text_conflict_results
    if res['clean_caption']
}

# --- Loop Through Samples ---
print("\n--- Starting Image Conflict Experiment ---")
pbar_image = tqdm(coco_view, total=len(coco_view))

for sample in pbar_image:
    sample_id = sample["info"]["id"]
    file_name = sample["info"]["file_name"]
    filepath = os.path.join(DRIVE_PATH, "val2017", file_name)  # or "train2017" if using train split

    pbar_image.set_description(f"Processing Sample {sample_id}")

    result_base = {
        'sample_id': sample_id,
        'filepath': filepath,
        'clean_caption': None,
        'perturbation_type': "Rotation+GaussianBlur",
        'model': None,
        'raw_output': None,
        'category': 'Skipped'
    }

    try:
        # 1. Load Image
        try:
            original_image = sample["image"]  # Already a PIL image
        except Exception as e:
            print(f"Error loading image {filepath}: {e}")
            skipped_samples_image += 1
            continue

        # 2. Get Clean Caption (cached or regenerate)
        clean_caption = clean_captions_cache.get(sample_id)
        if not clean_caption:
            clean_caption = generate_caption(blip_model, blip_processor, original_image, DEVICE, model_type='blip2', prompt_text="a photo of", max_tokens=50)
            if not clean_caption:
                print(f"Warning: Failed to get/generate clean caption for {sample_id}. Skipping image conflict.")
                skipped_samples_image += 1
                continue
        result_base['clean_caption'] = clean_caption

        # 3. Perturb Image
        perturbed_image = apply_image_perturbation(original_image, PERTURBATION_TRANSFORM)
        if perturbed_image is None:
            print(f"Warning: Failed to perturb image for {sample_id}. Skipping.")
            skipped_samples_image += 1
            continue

        # --- Run Models ---
        models_to_run = {
            "BLIP-2": (blip_model, blip_processor, "Question: Describe this image. Answer:", 'blip2'),
            "LLaVA": (llava_model, llava_processor, "Describe this image.", 'llava')
        }

        for model_name, (model, processor, prompt, model_type_flag) in models_to_run.items():
            model_result = result_base.copy()
            model_result['model'] = model_name

            # 4/5. Generate Description of Perturbed Image
            generated_description = generate_caption(model, processor, perturbed_image, DEVICE, model_type=model_type_flag, prompt_text=prompt, max_tokens=50)
            model_result['raw_output'] = generated_description

            if generated_description:
                category = evaluate_response_image(generated_description, clean_caption, PERTURBATION_KEYWORDS)
                model_result['category'] = category
            else:
                model_result['category'] = "Generation Error"

            image_conflict_results.append(model_result)

        processed_samples_image += 1

    except Exception as e:
        print(f"Error processing sample {sample_id} for image conflict: {e}")
        error_result = result_base.copy()
        error_result['category'] = "Processing Error"
        image_conflict_results.append(error_result)

        error_result_llava = error_result.copy()
        error_result_llava['model'] = 'LLaVA'
        image_conflict_results.append(error_result_llava)

        skipped_samples_image += 1

    pbar_image.set_postfix({"Processed": processed_samples_image, "Skipped": skipped_samples_image})

print(f"\n--- Image Conflict Experiment Finished ---")
print(f"Successfully processed {processed_samples_image} samples.")
print(f"Skipped {skipped_samples_image} samples due to errors.")


--- Starting Image Conflict Experiment ---


  0%|          | 0/100 [00:00<?, ?it/s]


--- Image Conflict Experiment Finished ---
Successfully processed 100 samples.
Skipped 0 samples due to errors.


## 7. Results Aggregation and Display

Aggregate the results from both experiments into pandas DataFrames,

calculate summary statistics (counts and percentages per category),
display the summaries. saveing the full results to CSV files in Google Drive. ( Team: Save it in different files to view outputs of different runs)

In [23]:
# --- Convert Results to DataFrames ---
text_conflict_df = pd.DataFrame(text_conflict_results)
image_conflict_df = pd.DataFrame(image_conflict_results)

In [24]:
# --- Function to Calculate and Format Summary ---
def create_summary_table(df, experiment_name):
    print(f"\n--- {experiment_name} Results Summary ---")
    if df.empty or 'model' not in df.columns or 'category' not in df.columns:
        print("No results data found or DataFrame is missing required columns ('model', 'category').")
        return None, None

    # Ensure 'category' is treated as categorical for proper grouping, handle potential NaN/None
    df['category'] = df['category'].fillna('Unknown')
    all_categories = df['category'].unique()
    df['category'] = pd.Categorical(df['category'], categories=sorted(all_categories), ordered=True)


    # Calculate counts
    summary_counts = df.groupby('model')['category'].value_counts(dropna=False).unstack(fill_value=0)

    # Calculate percentages
    summary_pct = summary_counts.apply(lambda x: (x / x.sum() * 100).round(1) if x.sum() > 0 else x, axis=1)

    # Combine counts and percentages for display
    summary_display = pd.DataFrame(index=summary_counts.index)
    for category in summary_counts.columns:
        summary_display[f'{category} (Count)'] = summary_counts[category]
        summary_display[f'{category} (%)'] = summary_pct[category]

    # Reorder columns for better readability (Counts and % together per category)
    cols_ordered = []
    for category in summary_counts.columns:
         cols_ordered.append(f'{category} (Count)')
         cols_ordered.append(f'{category} (%)')
    summary_display = summary_display[cols_ordered]


    print(summary_display)
    return summary_counts, summary_pct # Return raw data for potential further use

In [25]:
# --- Analyze and Display Text Conflict Results ---
text_summary_counts, text_summary_pct = create_summary_table(text_conflict_df, "Text Conflict")

# --- Analyze and Display Image Conflict Results ---
image_summary_counts, image_summary_pct = create_summary_table(image_conflict_df, "Image Conflict")


--- Text Conflict Results Summary ---
        Agreement with Falsehood (Count)  Agreement with Falsehood (%)  \
model                                                                    
BLIP-2                                 0                           0.0   
LLaVA                                 29                          46.8   

        Confusion/Irrelevance (Count)  Confusion/Irrelevance (%)  \
model                                                              
BLIP-2                             56                       90.3   
LLaVA                               0                        0.0   

        Correct Rejection (Count)  Correct Rejection (%)  \
model                                                      
BLIP-2                          6                    9.7   
LLaVA                          32                   51.6   

        Implicit Rejection (Count)  Implicit Rejection (%)  
model                                                       
BLIP-2                      

In [26]:
# --- Display Sample DataFrame Heads ---
print("\n--- Text Conflict DataFrame Head ---")
print(text_conflict_df.head())

print("\n--- Image Conflict DataFrame Head ---")
print(image_conflict_df.head())


--- Text Conflict DataFrame Head ---
   sample_id                                           filepath  \
0     467315  /content/drive/MyDrive/VLM_Project_Data/val201...   
1     467315  /content/drive/MyDrive/VLM_Project_Data/val201...   
2      74733  /content/drive/MyDrive/VLM_Project_Data/val201...   
3      74733  /content/drive/MyDrive/VLM_Project_Data/val201...   
4     427997  /content/drive/MyDrive/VLM_Project_Data/val201...   

                               clean_caption  \
0             a table with flowers in a vase   
1             a table with flowers in a vase   
2              a pizza and a beer on a table   
3              a pizza and a beer on a table   
4  a tennis player with red lines around her   

                           misleading_caption original_word misleading_word  \
0              a chair with flowers in a vase         table           chair   
1              a chair with flowers in a vase         table           chair   
2               a pizza and a bee

In [30]:
# ---  Save Results to CSV in Google Drive Team : Please save it to different files to have runs from different runs to analzye results---
SAVE_RESULTS = True # Set to False to disable saving

if SAVE_RESULTS and os.path.exists('/content/drive/MyDrive'): # Check if Drive is mounted
    try:
        text_csv_path = os.path.join(RESULT_PATH, "text_conflict_results_pycocoRun0503.csv")
        image_csv_path = os.path.join(RESULT_PATH, "image_conflict_results_pycocoRun0503.csv")

        text_conflict_df.to_csv(text_csv_path, index=False)
        image_conflict_df.to_csv(image_csv_path, index=False)

        print(f"\nResults saved to Google Drive:")
        print(f"- Text Conflict: {text_csv_path}")
        print(f"- Image Conflict: {image_csv_path}")
    except Exception as e:
        print(f"\nError saving results to Google Drive: {e}")
else:
    if SAVE_RESULTS:
        print("\nSkipping saving results: Google Drive not mounted or SAVE_RESULTS is False.")



Results saved to Google Drive:
- Text Conflict: /content/drive/MyDrive/VLM_Project_Data/Results/text_conflict_results_pycocoRun0503.csv
- Image Conflict: /content/drive/MyDrive/VLM_Project_Data/Results/image_conflict_results_pycocoRun0503.csv
