Load drive and add lib

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import json
import torch
import pandas as pd
from tqdm import tqdm
from PIL import Image
from transformers import LlavaProcessor, LlavaForConditionalGeneration
from sentence_transformers import SentenceTransformer


Load model

In [None]:
BASE_PATH = "/content/drive/MyDrive/VLM_Project"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# LLaVA
model_id = "llava-hf/llava-1.5-7b-hf"
processor = LlavaProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()

# Sentence embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2").to(DEVICE)

USER_PROMPT = "<image>\nDescribe the image in one sentence."


Helper functions

In [3]:
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(
        text=USER_PROMPT,
        images=image,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=50)

    return processor.decode(output[0], skip_special_tokens=True)


Stage 3b main loop

In [None]:
with open(f"{BASE_PATH}/attack_dataset_all/metadata/attacks.json") as f:
    metadata = json.load(f)

# results = []

# for record in tqdm(metadata):
#     clean_path = f"{BASE_PATH}/attack_dataset_all/clean_images/{record['clean_image']}"
#     attacked_path = f"{BASE_PATH}/attack_dataset_all/attacked_images/{record['attacked_image']}"

#     clean_text = generate_caption(clean_path)
#     attacked_text = generate_caption(attacked_path)

#     # embedding
#     emb_clean = embedder.encode(clean_text, convert_to_tensor=True)
#     emb_attack = embedder.encode(attacked_text, convert_to_tensor=True)

#     # cosine similarity
#     sim = torch.nn.functional.cosine_similarity(
#         emb_clean.unsqueeze(0),
#         emb_attack.unsqueeze(0)
#     ).item()

#     semantic_shift = 1 - sim

#     results.append({
#         "attack_id": record["image_id"],
#         "attack_type": record["attack_type"],
#         "semantic_shift": semantic_shift,
#         "clean_output": clean_text,
#         "attacked_output": attacked_text
#     })

from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import torch
from tqdm import tqdm

BATCH_SIZE = 16
results = []

def load_image(path):
    return Image.open(path).convert("RGB")

for i in tqdm(range(0, len(metadata), BATCH_SIZE)):
    batch = metadata[i:i+BATCH_SIZE]

    clean_paths = [f"{BASE_PATH}/attack_dataset_all/clean_images/{r['clean_image']}" for r in batch]
    attacked_paths = [f"{BASE_PATH}/attack_dataset_all/attacked_images/{r['attacked_image']}" for r in batch]

    with ThreadPoolExecutor() as executor:
        clean_images = list(executor.map(load_image, clean_paths))
        attacked_images = list(executor.map(load_image, attacked_paths))

    all_images = clean_images + attacked_images
    all_texts = [USER_PROMPT]*len(all_images)

    inputs = processor(text=all_texts, images=all_images, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=60)

    texts = [processor.decode(o, skip_special_tokens=True) for o in outputs]

    # batch encode
    embeddings = embedder.encode(texts, convert_to_tensor=True, device=DEVICE, batch_size=32)

    emb_clean = embeddings[:len(batch)]
    emb_attack = embeddings[len(batch):]

    sims = torch.nn.functional.cosine_similarity(emb_clean, emb_attack)
    shifts = 1 - sims

    for r, s, t_clean, t_attack in zip(batch, shifts, texts[:len(batch)], texts[len(batch):]):
        results.append({
            "attack_id": r["image_id"],
            "attack_type": r["attack_type"],
            "semantic_shift": s.item(),
            "clean_output": t_clean,
            "attacked_output": t_attack
        })



Summrize and save the results

In [None]:
df = pd.DataFrame(results)

print("\n Mean semantic shift by attack type:")
print(df.groupby("attack_type")["semantic_shift"].mean())

out_path = f"{BASE_PATH}/attack_dataset_all/results_stage3b_semantic_shift.csv"
df.to_csv(out_path, index=False)

print(f"\n Stage 3b results saved to:\n{out_path}")


In [7]:
BASE_PATH = "/content/drive/MyDrive/VLM_Project"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Stage 3b semantic shift results
semantic_shift_path = f"{BASE_PATH}/attack_dataset_all/results_stage3b_semantic_shift.csv"
df_shift = pd.read_csv(semantic_shift_path)

# Compute mean semantic shift by attack type
df_mean = df_shift.groupby("attack_type")["semantic_shift"].mean().reset_index()

# Visualization
plt.figure(figsize=(10,6))
sns.barplot(x="attack_type", y="semantic_shift", data=df_mean, palette="pastel", ci='sd')
plt.title("Stage 3b: Mean Semantic Shift by Attack Type", fontsize=14)
plt.ylabel("Mean Semantic Shift", fontsize=12)
plt.xlabel("Attack Type", fontsize=12)
plt.xticks(rotation=30)
plt.ylim(0, df_mean["semantic_shift"].max()*1.3)
plt.tight_layout()
plt.show()

