In [1]:
!pip install peft accelerate transformers datasets bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13.0->peft)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.13.0->peft)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.13.0->peft)
  Downloading nvidia_c

In [2]:

!pip uninstall -y pylibcudagraph-cu12 rmm-cu12
!pip install scikit-learn
!pip install timeout-decorator

import pandas as pd
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
import torch
from tqdm import tqdm
import gc
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score
import os
from timeout_decorator import timeout, TimeoutError
from peft import PeftModel

# Load the CSV file
csv_path = "/kaggle/input/dataset-curated-with-split-r/Sf/Sf_qa_data_trimmed_test_r.csv"
pred_path = "/kaggle/working/blip_vqa_finetuned_r16_e5_predctions.csv"
metrics_path = "/kaggle/working/blip_vqa_finetuned_r16_e5_metrics.csv"
df = pd.read_csv(csv_path, header=None, names=["image_path", "question", "ground_truth"])


processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
base_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
# Load the fine-tuned LoRA adapter on top
model = PeftModel.from_pretrained(base_model, "/kaggle/input/finetuned-blip-lora-r16-e5/blip-vqa-finetuned-r16-e5")

# Move model to GPU
device = torch.device("cuda")
model.to(device)
model.eval()

# List to store skipped entries
skipped_entries = []

# Function to predict answer with timeout
@timeout(30)  # Increased timeout to 30 seconds for stability
def predict_answer(image_path, question):
    try:
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            return ""
        image = Image.open(image_path).convert("RGB").resize((224, 224))  # Resize for efficiency
        #print(f"QUESTION: {question}")

        # Prepare the VQA prompt (ensure consistency with training)
        prompt = f"Question: {question} Answer:"  # Adjusted prompt to match potential training format
        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=50, num_beams=5, early_stopping=True)
        predicted_answer = processor.decode(outputs[0], skip_special_tokens=True)
        #print(f"Raw outputs: {outputs}")
        #print(f"Decoded answer: {predicted_answer}")
        del inputs, outputs
        gc.collect()
        torch.cuda.empty_cache()
        return predicted_answer
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# Normalize answer function for consistent comparison
def normalize_answer(s):
    s = str(s).lower().strip()
    s = ''.join(c for c in s if c.isalnum() or c.isspace())
    return s

# Resume support
start_idx = 0
if os.path.exists(pred_path):
    existing = pd.read_csv(pred_path)
    start_idx = len(existing)
    print(f"Resuming from index {start_idx}")
else:
    existing = pd.DataFrame()

# Lists to store predictions and ground truth
predictions = []
ground_truths = df["ground_truth"].tolist()
y_true = []
y_pred = []
results = []

# Predict answers for each row
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
    if idx < start_idx:
        continue
    full_image_path = f"/kaggle/input/dataset-curated-with-split-r/{row['image_path']}"
    if not os.path.exists(full_image_path):
        print(f"Invalid image path: {full_image_path}")
        skipped_entries.append({"row": idx, "image_path": full_image_path, "question": row["question"]})
        continue
    question = row["question"]
    try:
        predicted = predict_answer(full_image_path, question)
    except TimeoutError:
        print(f"Timeout processing row {idx}: {full_image_path}")
        skipped_entries.append({"row": idx, "image_path": full_image_path, "question": question})
        predicted = "TIMEOUT"
    #print(f"pred: {normalize_answer(predicted)}, ground-truth: {normalize_answer(ground_truths[idx])}")
    predictions.append(predicted)
    y_true.append(normalize_answer(ground_truths[idx]))
    y_pred.append(normalize_answer(predicted))
    results.append({
        "img_path": full_image_path,
        "question": question,
        "true_answer": normalize_answer(ground_truths[idx]),
        "predicted_answer": normalize_answer(predicted)
    })

    # Save every 1000 or last item
    if (idx + 1) % 1000 == 0 or (idx + 1) == len(df):
        chunk_df = pd.DataFrame(results)
        if not os.path.exists(pred_path):
            chunk_df.to_csv(pred_path, mode='w', index=False, header=True)
        else:
            chunk_df.to_csv(pred_path, mode='a', index=False, header=False)
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)  # Correct F1 calculation
        metrics_entry = pd.DataFrame([{"step": idx + 1, "accuracy": accuracy, "f1_score": f1}])
        if not os.path.exists(metrics_path):
            metrics_entry.to_csv(metrics_path, mode='w', index=False, header=True)
        else:
            metrics_entry.to_csv(metrics_path, mode='a', index=False, header=False)
        results, y_true, y_pred = [], [], []
        print(f"Checkpoint saved at index {idx + 1}")

# Compute final metrics
predictions_lower = [normalize_answer(pred) for pred in predictions]
ground_truths_lower = [normalize_answer(truth) for truth in ground_truths]
accuracy = accuracy_score(ground_truths_lower, predictions_lower)
precision, recall, f1, _ = precision_recall_fscore_support(
    ground_truths_lower, predictions_lower, average='macro', zero_division=0
)

# Print results
print(f"\nTotal Questions: {len(predictions)}")
print(f"Correct Predictions: {sum([p == t for p, t in zip(predictions_lower, ground_truths_lower)])}")
print(f"Accuracy: {accuracy:.2f}%")
print(f"Macro Precision: {precision:.2f}")
print(f"Macro Recall: {recall:.2f}")
print(f"Macro F1 Score: {f1:.2f}")
print(f"Skipped Entries: {len(skipped_entries)}")
if skipped_entries:
    print("\nSkipped Entries:")
    skipped_df = pd.DataFrame(skipped_entries)
    print(skipped_df)

# Display sample predictions
results_df = pd.DataFrame({
    "Image Path": df["image_path"],
    "Question": df["question"],
    "Ground Truth": ground_truths,
    "Predicted": predictions
})
print("\nSample Predictions:")
print(results_df.head(10))

[0mFound existing installation: rmm-cu12 25.2.0
Uninstalling rmm-cu12-25.2.0:
  Successfully uninstalled rmm-cu12-25.2.0
Collecting timeout-decorator
  Downloading timeout-decorator-0.5.0.tar.gz (4.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: timeout-decorator
  Building wheel for timeout-decorator (setup.py) ... [?25l[?25hdone
  Created wheel for timeout-decorator: filename=timeout_decorator-0.5.0-py3-none-any.whl size=5006 sha256=f1ffeefaeb71d7de684cf7a10ad747f4fb1ecd32b4b02872e0f1efe142c53edf
  Stored in directory: /root/.cache/pip/wheels/aa/cd/d1/51736c6b95846b2613a520ce146a8f305c4016a987bc9faec7
Successfully built timeout-decorator
Installing collected packages: timeout-decorator
Successfully installed timeout-decorator-0.5.0


2025-05-15 18:01:53.865569: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747332114.087118      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747332114.148546      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.54G [00:00<?, ?B/s]

Processing:   8%|▊         | 1000/11972 [08:28<1:35:07,  1.92it/s]

Checkpoint saved at index 1000


Processing:  17%|█▋        | 2000/11972 [16:56<1:24:02,  1.98it/s]

Checkpoint saved at index 2000


Processing:  25%|██▌       | 3000/11972 [25:22<1:14:29,  2.01it/s]

Checkpoint saved at index 3000


Processing:  33%|███▎      | 4000/11972 [33:27<1:03:47,  2.08it/s]

Checkpoint saved at index 4000


Processing:  42%|████▏     | 5000/11972 [41:33<1:01:40,  1.88it/s]

Checkpoint saved at index 5000


Processing:  50%|█████     | 6000/11972 [50:00<56:13,  1.77it/s]  

Checkpoint saved at index 6000


Processing:  58%|█████▊    | 7000/11972 [59:04<45:01,  1.84it/s]

Checkpoint saved at index 7000


Processing:  67%|██████▋   | 8000/11972 [1:08:04<35:45,  1.85it/s]

Checkpoint saved at index 8000


Processing:  75%|███████▌  | 9000/11972 [1:17:00<27:46,  1.78it/s]

Checkpoint saved at index 9000


Processing:  84%|████████▎ | 10000/11972 [1:25:58<17:48,  1.85it/s]

Checkpoint saved at index 10000


Processing:  92%|█████████▏| 11000/11972 [1:34:56<08:45,  1.85it/s]

Checkpoint saved at index 11000


Processing: 100%|██████████| 11972/11972 [1:43:38<00:00,  1.93it/s]

Checkpoint saved at index 11972

Total Questions: 11972
Correct Predictions: 7715
Accuracy: 0.64%
Macro Precision: 0.23
Macro Recall: 0.23
Macro F1 Score: 0.21
Skipped Entries: 0

Sample Predictions:
                  Image Path                                Question  \
0  Sf/Sf_images/a5457207.jpg               What is the case made of?   
1  Sf/Sf_images/d593a583.jpg        What is the mat's primary color?   
2  Sf/Sf_images/012a0c8b.jpg          What shape is the main design?   
3  Sf/Sf_images/7ab9d896.jpg     How many tiers does the shelf have?   
4  Sf/Sf_images/e0cfa227.jpg      What is the rug's primary pattern?   
5  Sf/Sf_images/d7e4429c.jpg     What is the main color of the case?   
6  Sf/Sf_images/18c4e82d.jpg                  What is the toe style?   
7  Sf/Sf_images/a5671995.jpg         What material forms the number?   
8  Sf/Sf_images/092bba5c.jpg  What is the primary color of the case?   
9  Sf/Sf_images/5836f4d7.jpg                What color is the heart?   

  Groun




In [None]:
!rm -rf /kaggle/working/*