In [1]:
# !pip install timeout-decorator --quiet
# !pip uninstall -y pylibcudagraph-cu12 rmm-cu12 --quiet
# !pip install transformers== --quiet
# !pip install --upgrade transformers tokenizers --quiet


In [None]:
!pip uninstall -y pylibcudagraph-cu12 rmm-cu12 --quiet
!pip install torch torchvision --upgrade --quiet
!pip install transformers==4.44.2 --quiet
!pip install accelerate --quiet
!pip install timeout-decorator --quiet
!pip install scikit-learn --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m865.2/865.2 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.1/393.1 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m106.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m77.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.7/897.7 kB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.0/571.0 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.2/200.2 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import gc
import torch
import pandas as pd
from PIL import Image
from tqdm import tqdm
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score
from timeout_decorator import timeout, TimeoutError

# === Load your curated dataset ===
base_path = "/kaggle/input/vr-dataset/dataset_curated"
subfolders = [f"S{i}" for i in range(1, 7)]
all_dfs = []

for folder in subfolders:
    csv_path = os.path.join(base_path, folder, f"{folder}_qa_data.csv")
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path, header=None, names=["image_path", "question", "answer"])
        df["image_path"] = df["image_path"].apply(
            lambda p: os.path.join(base_path, os.path.normpath(p).split("dataset_curated/")[-1])
        )
        df = df[df["image_path"].apply(os.path.exists)].reset_index(drop=True)
        all_dfs.append(df)
        print(f"Loaded {len(df)} examples from {csv_path}")
    else:
        print(f"CSV not found: {csv_path}")

df_all = pd.concat(all_dfs).reset_index(drop=True)
print(f"\nTotal QA pairs loaded: {len(df_all)}")


Loaded 14366 examples from /kaggle/input/vr-dataset/dataset_curated/S1/S1_qa_data.csv
Loaded 14358 examples from /kaggle/input/vr-dataset/dataset_curated/S2/S2_qa_data.csv
Loaded 14367 examples from /kaggle/input/vr-dataset/dataset_curated/S3/S3_qa_data.csv
Loaded 14366 examples from /kaggle/input/vr-dataset/dataset_curated/S4/S4_qa_data.csv
Loaded 14387 examples from /kaggle/input/vr-dataset/dataset_curated/S5/S5_qa_data.csv
Loaded 14376 examples from /kaggle/input/vr-dataset/dataset_curated/S6/S6_qa_data.csv

Total QA pairs loaded: 86220


In [4]:
# === Output paths ===
pred_path = "/kaggle/working/blip_vqa_predictions.csv"
metrics_path = "/kaggle/working/blip_vqa_metrics.csv"

# === Load BLIP model and processor ===
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
device = torch.device("cuda")
model.to(device)
model.eval()

# === Timeout-wrapped function to predict answer ===
@timeout(10)
def predict_answer(image_path, question):
    try:
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            return ""
        image = Image.open(image_path).convert("RGB")
        image = image.resize((224, 224), Image.BILINEAR)
        prompt = (
            "<image>\n"
            "Based on the image, give strictly one word answer for the following question. "
            f"Question: {question} Answer:"
        )
        inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=25)
        answer = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
        del inputs, outputs
        gc.collect()
        torch.cuda.empty_cache()
        return answer
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# === Main evaluation loop ===

print(predict_answer(
    "/kaggle/input/vr-dataset/dataset_curated/S1/S1_images/bec06177.jpg",
    "What is the metal's color?"
))

start_idx = 0  # Change if resuming
predictions = []
ground_truths = df_all["answer"].tolist()
y_true = []
y_pred = []
results = []
skipped_entries = []

MAX_ROWS = 20_000

for idx, row in tqdm(df_all.iterrows(), total=len(df_all), desc="Processing"):
    if idx < start_idx:
        continue
    if idx >= start_idx + MAX_ROWS:
        break

    img_path = row["image_path"]
    question = row["question"]
    try:
        pred = predict_answer(img_path, question)
    except TimeoutError:
        print(f"Timeout at row {idx}: {img_path}")
        skipped_entries.append({"row": idx, "image_path": img_path, "question": question})
        pred = ""

    predictions.append(pred)
    y_true.append(str(row["answer"]).lower())
    y_pred.append(str(pred).lower())
    results.append({
        "img_path": img_path,
        "question": question,
        "true_answer": str(row["answer"]).lower(),
        "predicted_answer": str(pred).lower()
    })

    # checkpoint every 1,000 (or at the end of this slice)
    if (idx + 1) % 1000 == 0 or (idx + 1) == start_idx + MAX_ROWS:
        # save the predictions chunk
        pd.DataFrame(results).to_csv(
            pred_path,
            mode='w' if not os.path.exists(pred_path) else 'a',
            index=False,
            header=not os.path.exists(pred_path)
        )

        # compute sklearn metrics
        acc = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

        # print & save metrics
        print(
            f"Checkpoint {idx+1}: "
            f"Acc = {acc:.4f}, "
            f"Macro-F1 = {f1:.4f}"
        )
        pd.DataFrame([{
            "step": idx + 1,
            "accuracy": acc,
            "f1_score": f1
        }]).to_csv(
            metrics_path,
            mode='w' if not os.path.exists(metrics_path) else 'a',
            index=False,
            header=not os.path.exists(metrics_path)
        )

        # reset buffers
        results, y_true, y_pred = [], [], []

# === Final evaluation ===

correct = sum(
    1 for p, g in zip(predictions, ground_truths)
    if str(p).lower() == str(g).lower()
)
total = len(predictions)
acc_final = (correct / total) * 100 if total else 0
precision, recall, f1_final, _ = precision_recall_fscore_support(
    [g.lower() for g in ground_truths],
    [p.lower() for p in predictions],
    average='macro', zero_division=0
)

# print summary
print(f"\nTotal Questions: {total}")
print(f"Correct Predictions: {correct}")
print(f"Accuracy: {acc_final:.2f}%")
print(f"Macro Precision: {precision:.2f}")
print(f"Macro Recall: {recall:.2f}")
print(f"Macro F1 Score: {f1_final:.2f}")
print(f"Skipped Entries: {len(skipped_entries)}")
if skipped_entries:
    print(pd.DataFrame(skipped_entries))

# save final predictions CSV
pd.DataFrame({
    "Image Path": df_all["image_path"],
    "Question": df_all["question"],
    "Ground Truth": ground_truths,
    "Predicted": predictions
}).to_csv(pred_path, index=False)

print("\nSample Predictions:")
print(pd.DataFrame({
    "Image Path": df_all["image_path"],
    "Question": df_all["question"],
    "Ground Truth": ground_truths,
    "Predicted": predictions
}).head(10))


2025-05-14 09:06:10.140494: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747213570.340019      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747213570.397886      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

Some kwargs in processor config are unused and will not have any effect: num_query_tokens. 


config.json:   0%|          | 0.00/2.22k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/128k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/5.81G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

white


Processing:   1%|          | 1000/86220 [10:26<15:12:19,  1.56it/s]

Checkpoint 1000: Acc = 0.3950, Macro-F1 = 0.1747


Processing:   2%|▏         | 2000/86220 [20:54<15:15:30,  1.53it/s]

Checkpoint 2000: Acc = 0.4440, Macro-F1 = 0.2203


Processing:   3%|▎         | 3000/86220 [31:21<15:04:30,  1.53it/s]

Checkpoint 3000: Acc = 0.4330, Macro-F1 = 0.1970


Processing:   5%|▍         | 4000/86220 [41:42<14:25:03,  1.58it/s]

Checkpoint 4000: Acc = 0.4260, Macro-F1 = 0.2102


Processing:   6%|▌         | 5000/86220 [51:56<14:12:30,  1.59it/s]

Checkpoint 5000: Acc = 0.4240, Macro-F1 = 0.2069


Processing:   7%|▋         | 6000/86220 [1:02:09<13:56:50,  1.60it/s]

Checkpoint 6000: Acc = 0.4300, Macro-F1 = 0.1789


Processing:   8%|▊         | 7000/86220 [1:12:11<13:07:59,  1.68it/s]

Checkpoint 7000: Acc = 0.4580, Macro-F1 = 0.2303


Processing:   9%|▉         | 8000/86220 [1:22:15<13:13:36,  1.64it/s]

Checkpoint 8000: Acc = 0.4070, Macro-F1 = 0.1728


Processing:  10%|█         | 9000/86220 [1:32:15<13:20:57,  1.61it/s]

Checkpoint 9000: Acc = 0.4230, Macro-F1 = 0.1880


Processing:  12%|█▏        | 10000/86220 [1:42:13<12:55:04,  1.64it/s]

Checkpoint 10000: Acc = 0.4440, Macro-F1 = 0.2182


Processing:  13%|█▎        | 11000/86220 [1:52:16<12:34:18,  1.66it/s]

Checkpoint 11000: Acc = 0.4400, Macro-F1 = 0.2165


Processing:  14%|█▍        | 12000/86220 [2:02:19<12:29:08,  1.65it/s]

Checkpoint 12000: Acc = 0.4110, Macro-F1 = 0.2019


Processing:  15%|█▌        | 13000/86220 [2:12:24<12:31:15,  1.62it/s]

Checkpoint 13000: Acc = 0.4340, Macro-F1 = 0.1893


Processing:  16%|█▌        | 14000/86220 [2:22:31<12:30:50,  1.60it/s]

Checkpoint 14000: Acc = 0.4310, Macro-F1 = 0.2123


Processing:  17%|█▋        | 15000/86220 [2:32:55<13:07:15,  1.51it/s]

Checkpoint 15000: Acc = 0.4070, Macro-F1 = 0.1842


Processing:  19%|█▊        | 16000/86220 [2:43:15<12:01:58,  1.62it/s]

Checkpoint 16000: Acc = 0.4210, Macro-F1 = 0.1778


Processing:  20%|█▉        | 17000/86220 [2:53:35<12:06:08,  1.59it/s]

Checkpoint 17000: Acc = 0.4300, Macro-F1 = 0.1679


Processing:  21%|██        | 18000/86220 [3:03:49<11:45:28,  1.61it/s]

Checkpoint 18000: Acc = 0.4320, Macro-F1 = 0.1948


Processing:  22%|██▏       | 19000/86220 [3:14:06<11:28:35,  1.63it/s]

Checkpoint 19000: Acc = 0.4300, Macro-F1 = 0.1878


Processing:  23%|██▎       | 20000/86220 [3:24:22<11:16:39,  1.63it/s]

Checkpoint 20000: Acc = 0.4360, Macro-F1 = 0.2214





AttributeError: 'float' object has no attribute 'lower'