In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# load blip model from lavis library
!pip install salesforce-lavis

In [None]:
# HF
!pip install huggingface_hub -q

In [None]:
import torch
import numpy as np
import random
from PIL import Image
from tqdm.notebook import tqdm

In [None]:
from lavis.models import load_model_and_preprocess
from lavis.processors import load_processor

In [None]:
# Make reproducible code
GLOBAL_SEED = 10

np.random.seed(GLOBAL_SEED)
random.seed(GLOBAL_SEED)
torch.manual_seed(GLOBAL_SEED)
torch.use_deterministic_algorithms(True)
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


env: CUBLAS_WORKSPACE_CONFIG=:4096:8


In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

# Load dataset

In [None]:
datasetName = "IllusionAnimals" # dataset name
local_dataset_path = "/content/" # where to save dataset folder
huggigface_repository_path = "VQA-Illusion/IllusionAnimals" # hugging-face dataset path

In [None]:
from huggingface_hub import login
from google.colab import userdata

access_token = userdata.get('HF_TOKEN_ALL')
login(token = access_token)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
USERNAME = userdata.get('HUGGINGFACE_USERNAME')
ACCESS_TOKEN = access_token
%cd {local_dataset_path}
!git clone 'https://{USERNAME}:{ACCESS_TOKEN}@huggingface.co/datasets/{huggigface_repository_path}'
# move to dataset directory
%cd {datasetName}

/content
Cloning into 'IllusionAnimals'...
remote: Enumerating objects: 5027, done.[K
remote: Counting objects: 100% (5024/5024), done.[K
remote: Compressing objects: 100% (5024/5024), done.[K
remote: Total 5027 (delta 5), reused 0 (delta 0), pack-reused 3 (from 1)[K
Receiving objects: 100% (5027/5027), 753.63 KiB | 3.01 MiB/s, done.
Resolving deltas: 100% (5/5), done.
Updating files: 100% (5004/5004), done.
Filtering content: 100% (5001/5001), 931.75 MiB | 12.28 MiB/s, done.
/content/IllusionAnimals


In [None]:
import pandas as pd

df = pd.read_csv('./df_data.csv')
df

Unnamed: 0,image_name,Pprompt,Nprompt,illusion_strength,label
0,IllusionAnimals_1,A raging river flowing through a dense jungle ...,low quality,2.5,cat
1,IllusionAnimals_2,A starry night sky over a tranquil lake,low quality,2.5,cat
2,IllusionAnimals_3,Jaguar (Panthera Onca) patrolling South Americ...,low quality,2.5,cat
3,IllusionAnimals_4,"Flowing lava illuminates cavern walls, ancient...",low quality,2.5,cat
4,IllusionAnimals_5,"Dense forest canopy, sunlight filters through,...",low quality,2.5,cat
...,...,...,...,...,...
995,IllusionAnimals_996,Saharan sandstorm turning daytime into darkness,low quality,2.5,rooster
996,IllusionAnimals_997,Urban city with skyscrapers and traffic,low quality,2.5,rooster
997,IllusionAnimals_998,A serene meadow with wildflowers and butterflies,low quality,2.5,rooster
998,IllusionAnimals_999,Elk bugling in crisp autumn air,low quality,2.5,rooster


# Load Model

In [None]:
model, vis_processors, text_processors = load_model_and_preprocess("blip2_image_text_matching", "pretrain", device=device, is_eval=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

100%|██████████| 1.89G/1.89G [00:11<00:00, 170MB/s]


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

100%|██████████| 712M/712M [00:08<00:00, 89.4MB/s]


# Inference

In [None]:
def answer(model, text_processors, vis_processors, labels, image, method = "itm"):
  best_label = ""
  best_index = ""
  max_prob = -1
  img = vis_processors["eval"](image).unsqueeze(0).to(device)
  for index, label in enumerate(labels):
    txt = text_processors["eval"](label)
    itm_output = model({"image": img, "text_input": txt}, match_head = method)
    if method == "itm":
      itm_score = torch.nn.functional.softmax(itm_output, dim=1)[:, 1].item()
    else:
      itm_score = itm_output

    if itm_score > max_prob:
      max_prob = itm_score
      best_label = label
      best_index = index

  return best_index, best_label


In [None]:
labels = [
    "illusion Cat",
    "illusion Dog",
    "illusion Pigeon",
    "illusion Butterfly",
    "illusion Elephant",
    "illusion Horse",
    "illusion Deer",
    "illusion Snake",
    "illusion Fish",
    "illusion Rooster",
    "no illusion"
]

raw_labels = [
    "Cat",
    "Dog",
    "Pigeon",
    "Butterfly",
    "Elephant",
    "Horse",
    "Deer",
    "Snake",
    "Fish",
    "Rooster",
]

In [None]:
# 5 experiments
raw_answers = {'itm' : [], 'itc' : []}
ill_answers = {'itm' : [], 'itc' : []}
ill_less_answers = {'itm' : [], 'itc' : []}
ill_filtered_answers = {'itm' : [], 'itc' : []}
ill_less_filtered_answers = {'itm' : [], 'itc' : []}

In [None]:
for index, row in tqdm(df.iterrows(), total=len(df)):
  # load 5 images
  raw_image = Image.open(f"./raw_images/{row['image_name']}.jpg").convert("RGB")
  ill_image = Image.open(f"./ill_images/{row['image_name']}.jpg").convert("RGB")
  ill_less_image = Image.open(f"./illusionless_images/{row['image_name']}.jpg").convert("RGB")
  ill_filtered_image = Image.open(f"./illusion_images_filtered/{row['image_name']}.jpg").convert("RGB")
  ill_less_filtered_image = Image.open(f"./illusionless_images_filtered/{row['image_name']}.jpg").convert("RGB")

  # raw images experiment
  # raw_answers['itm'].append(answer(model, text_processors, vis_processors, raw_labels, raw_image, "itm")[1])
  raw_answers['itc'].append(answer(model, text_processors, vis_processors, raw_labels, raw_image, "itc")[1])

  # illusion images experiment
  # ill_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_image, "itm")[1])
  ill_answers['itc'].append(answer(model, text_processors, vis_processors, labels, ill_image, "itc")[1])

  # illusion less images experiment
  # ill_less_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_less_image, "itm")[1])
  ill_less_answers['itc'].append(answer(model, text_processors, vis_processors, labels, ill_less_image, "itc")[1])

  # illusion images experiment
  # ill_filtered_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_filtered_image, "itm")[1])
  ill_filtered_answers['itc'].append(answer(model, text_processors, vis_processors, labels, ill_filtered_image, "itc")[1])

  # illusion images experiment
  # ill_less_filtered_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_less_filtered_image, "itm")[1])
  ill_less_filtered_answers['itc'].append(answer(model, text_processors, vis_processors, labels, ill_less_filtered_image, "itc")[1])

  0%|          | 0/1000 [00:00<?, ?it/s]

# Save Results

In [None]:
# df['raw_itm_answers'] = pd.Series(raw_answers['itm'])
df['raw_itc_answers'] = pd.Series(raw_answers['itc'])

# df['ill_itm_answers'] = pd.Series(ill_answers['itm'])
df['ill_itc_answers'] = pd.Series(ill_answers['itc'])

# df['ill_less_itm_answers'] = pd.Series(ill_less_answers['itm'])
df['ill_less_itc_answers'] = pd.Series(ill_less_answers['itc'])

# df['ill_filtered_itm_answers'] = pd.Series(ill_filtered_answers['itm'])
df['ill_filtered_itc_answers'] = pd.Series(ill_filtered_answers['itc'])

# df['ill_less_filtered_itm_answers'] = pd.Series(ill_less_filtered_answers['itm'])
df['ill_less_filtered_itc_answers'] = pd.Series(ill_less_filtered_answers['itc'])

df.to_csv('/content/drive/MyDrive/Final_project/Evaluations/IllusionAnimals_Blip2_itc.csv', index=False)

In [None]:
df

Unnamed: 0,image_name,Pprompt,Nprompt,illusion_strength,label,raw_itc_answers,ill_itc_answers,ill_less_itc_answers,ill_filtered_itc_answers,ill_less_filtered_itc_answers
0,IllusionAnimals_1,A raging river flowing through a dense jungle ...,low quality,2.5,cat,Cat,illusion Snake,illusion Snake,illusion Cat,illusion Snake
1,IllusionAnimals_2,A starry night sky over a tranquil lake,low quality,2.5,cat,Cat,illusion Fish,illusion Fish,illusion Cat,illusion Snake
2,IllusionAnimals_3,Jaguar (Panthera Onca) patrolling South Americ...,low quality,2.5,cat,Cat,illusion Cat,illusion Cat,illusion Cat,illusion Cat
3,IllusionAnimals_4,"Flowing lava illuminates cavern walls, ancient...",low quality,2.5,cat,Cat,illusion Snake,illusion Snake,illusion Cat,illusion Snake
4,IllusionAnimals_5,"Dense forest canopy, sunlight filters through,...",low quality,2.5,cat,Cat,illusion Cat,illusion Snake,illusion Cat,illusion Snake
...,...,...,...,...,...,...,...,...,...,...
995,IllusionAnimals_996,Saharan sandstorm turning daytime into darkness,low quality,2.5,rooster,Rooster,illusion Dog,illusion Snake,illusion Rooster,illusion Snake
996,IllusionAnimals_997,Urban city with skyscrapers and traffic,low quality,2.5,rooster,Rooster,illusion Rooster,illusion Snake,illusion Rooster,no illusion
997,IllusionAnimals_998,A serene meadow with wildflowers and butterflies,low quality,2.5,rooster,Rooster,illusion Butterfly,illusion Butterfly,illusion Rooster,no illusion
998,IllusionAnimals_999,Elk bugling in crisp autumn air,low quality,2.5,rooster,Rooster,illusion Deer,illusion Deer,illusion Rooster,illusion Deer


# Start from specific index

In [None]:
startIndex = 700

In [None]:
# load df:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Final_project/Evaluations/IllusionAnimals_Blip2_itc.csv')
df

Unnamed: 0,image_name,Pprompt,Nprompt,illusion_strength,label,raw_itc_answers,ill_itc_answers,ill_less_itc_answers,ill_filtered_itc_answers,ill_less_filtered_itc_answers
0,Mnist_1,A field of blooming sunflowers swaying in the ...,low quality,1.5,7,digit 7,illusion digit 0,illusion digit 3,illusion digit 0,illusion digit 0
1,Mnist_2,A peaceful countryside scene with grazing shee...,low quality,1.5,2,digit 2,illusion digit 1,illusion digit 9,illusion digit 9,illusion digit 0
2,Mnist_3,A tranquil pond with lily pads floating on the...,low quality,1.5,1,digit 7,no illusion digit,no illusion digit,illusion digit 0,no illusion digit
3,Mnist_4,A sunny vineyard with rows of ripe grapes,low quality,1.5,0,digit 0,illusion digit 9,illusion digit 0,no illusion digit,illusion digit 0
4,Mnist_5,A picturesque vineyard at sunset with the sky ...,low quality,1.5,4,digit 4,illusion digit 9,illusion digit 9,illusion digit 0,illusion digit 0
...,...,...,...,...,...,...,...,...,...,...
1104,Mnist_1105,Misty jungle surrounded by vibrant flowers and...,low quality,1.5,8,,,,,
1105,Mnist_1106,A forest with blooming flowers,low quality,1.5,0,,,,,
1106,Mnist_1107,"Desolate desert landscape, shifting sands illu...",low quality,1.5,5,,,,,
1107,Mnist_1108,A vast desert with a towering canyon in the di...,low quality,1.5,0,,,,,


In [None]:
# inference

for index, row in tqdm(df.iterrows(), total=len(df)):
  if index < startIndex :
    continue
  # load 5 images
  raw_image = Image.open(f"./raw_images/{row['image_name']}.jpg").convert("RGB")
  ill_image = Image.open(f"./ill_images/{row['image_name']}.jpg").convert("RGB")
  ill_less_image = Image.open(f"./illusionless_images/{row['image_name']}.jpg").convert("RGB")
  ill_filtered_image = Image.open(f"./illusion_images_filtered/{row['image_name']}.jpg").convert("RGB")
  ill_less_filtered_image = Image.open(f"./illusionless_images_filtered/{row['image_name']}.jpg").convert("RGB")

  # raw images experiment
  # raw_answers['itm'].append(answer(model, text_processors, vis_processors, raw_labels, raw_image, "itm")[1])
  df.at[index, 'raw_itc_answers'] = answer(model, text_processors, vis_processors, raw_labels, raw_image, "itc")[1]

  # illusion images experiment
  # ill_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_image, "itm")[1])
  df.at[index, 'ill_itc_answers'] = answer(model, text_processors, vis_processors, labels, ill_image, "itc")[1]

  # illusion less images experiment
  # ill_less_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_less_image, "itm")[1])
  df.at[index, 'ill_less_itc_answers'] = answer(model, text_processors, vis_processors, labels, ill_less_image, "itc")[1]

  # illusion filtered images experiment
  # ill_filtered_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_filtered_image, "itm")[1])
  df.at[index, 'ill_filtered_itc_answers'] = answer(model, text_processors, vis_processors, labels, ill_filtered_image, "itc")[1]

  # illusion less filtered images experiment
  # ill_less_filtered_answers['itm'].append(answer(model, text_processors, vis_processors, labels, ill_less_filtered_image, "itm")[1])
  df.at[index, 'ill_less_filtered_itc_answers'] = answer(model, text_processors, vis_processors, labels, ill_less_filtered_image, "itc")[1]

In [None]:
#save dataframe
df.to_csv('/content/drive/MyDrive/Final_project/Evaluations/IllusionAnimals_Blip2_itc.csv', index=False)

# Calculate Accuracy

In [None]:
# load df
%cd /content/drive/MyDrive/Final_project/Evaluations/
import pandas as pd

df = pd.read_csv('./IllusionAnimals_Blip2_itc.csv')

/content/drive/MyDrive/Final_project/Evaluations


In [None]:
df

Unnamed: 0,image_name,Pprompt,Nprompt,illusion_strength,label,raw_itc_answers,ill_itc_answers,ill_less_itc_answers,ill_filtered_itc_answers,ill_less_filtered_itc_answers
0,IllusionAnimals_1,A raging river flowing through a dense jungle ...,low quality,2.5,cat,Cat,illusion Elephant,illusion Elephant,illusion Cat,illusion Snake
1,IllusionAnimals_2,A starry night sky over a tranquil lake,low quality,2.5,cat,Cat,illusion Deer,illusion Deer,illusion Cat,no illusion
2,IllusionAnimals_3,Jaguar (Panthera Onca) patrolling South Americ...,low quality,2.5,cat,Cat,illusion Deer,illusion Elephant,illusion Cat,illusion Cat
3,IllusionAnimals_4,"Flowing lava illuminates cavern walls, ancient...",low quality,2.5,cat,Cat,illusion Snake,illusion Snake,illusion Cat,illusion Snake
4,IllusionAnimals_5,"Dense forest canopy, sunlight filters through,...",low quality,2.5,cat,Cat,illusion Cat,illusion Deer,illusion Cat,illusion Deer
...,...,...,...,...,...,...,...,...,...,...
995,IllusionAnimals_996,Saharan sandstorm turning daytime into darkness,low quality,2.5,rooster,Rooster,illusion Rooster,illusion Elephant,illusion Rooster,no illusion
996,IllusionAnimals_997,Urban city with skyscrapers and traffic,low quality,2.5,rooster,Rooster,illusion Rooster,no illusion,illusion Rooster,no illusion
997,IllusionAnimals_998,A serene meadow with wildflowers and butterflies,low quality,2.5,rooster,Rooster,illusion Butterfly,illusion Deer,illusion Rooster,illusion Deer
998,IllusionAnimals_999,Elk bugling in crisp autumn air,low quality,2.5,rooster,Rooster,illusion Deer,illusion Deer,illusion Rooster,illusion Deer


In [None]:
total = 0

correct_raw_answers = {'itm' : 0.0, 'itc' : 0.0}
correct_ill_answers = {'itm' : 0.0, 'itc' : 0.0}
correct_ill_less_answers = {'itm' : 0.0, 'itc' : 0.0}
correct_ill_filtered_answers = {'itm' : 0.0, 'itc' : 0.0}
correct_ill_less_filtered_answers = {'itm' : 0.0, 'itc' : 0.0}

for index, row in df.iterrows():
  total += 1

  # raw
  # if row['label'] == row['raw_itm_answers']:
  #   correct_raw_answers['itm'] += 1
  if row['label'] == row['raw_itc_answers'].lower():
    correct_raw_answers['itc'] += 1

  # illusion
  # if row['label'] == row['ill_itm_answers']:
  #   correct_ill_answers['itm'] += 1
  if row['label'] == row['ill_itc_answers'].lower().split()[1]:
    correct_ill_answers['itc'] += 1

  # illusion less
  # if 10 == labels.index(row['ill_less_itm_answers']):
  #   correct_ill_less_answers['itm'] += 1
  if 10 == labels.index(row['ill_less_itc_answers']):
    correct_ill_less_answers['itc'] += 1

  # illusion filtered
  # if row['label'] == row['ill_filtered_itm_answers']:
  #   correct_ill_filtered_answers['itm'] += 1
  if row['label'] == row['ill_filtered_itc_answers'].lower().split()[1]:
    correct_ill_filtered_answers['itc'] += 1

  # illusion less filtered
  # if 10 == labels.index(row['ill_less_filtered_itm_answers']):
  #   correct_ill_less_filtered_answers['itm'] += 1
  if 10 == labels.index(row['ill_less_filtered_itc_answers']):
    correct_ill_less_filtered_answers['itc'] += 1


print("raw:")
# print("itm:", correct_raw_answers['itm'] / total)
print("itc:", correct_raw_answers['itc'] / total)

print("illusion:")
# print("itm:", correct_ill_answers['itm'] / total)
print("itc:", correct_ill_answers['itc'] / total)

print("illusion less:")
# print("itm:", correct_ill_less_answers['itm'] / total)
print("itc:", correct_ill_less_answers['itc'] / total)

print("illusion filtered: ")
# print("itm:", correct_ill_filtered_answers['itm'] / total)
print("itc:", correct_ill_filtered_answers['itc'] / total)

print("illusion less filtered: ")
# print("itm:", correct_ill_less_filtered_answers['itm'] / total)
print("itc:", correct_ill_less_filtered_answers['itc'] / total)

raw:
itc: 1.0
illusion:
itc: 0.356
illusion less:
itc: 0.027
illusion filtered: 
itc: 0.98
illusion less filtered: 
itc: 0.526
