In [1]:
!pip install transformers accelerate bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.0.0->accelerate)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.0.0->accelerate)
  

In [None]:
!pip uninstall -y pylibcudagraph-cu12 rmm-cu12
!pip install transformers  # Use latest version for bakLlava compatibility
!pip install scikit-learn
!pip install timeout-decorator  # Install timeout-decorator

import pandas as pd
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch
from tqdm import tqdm
import gc
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import os
from sklearn.metrics import accuracy_score, f1_score
from timeout_decorator import timeout, TimeoutError  # Import timeout decorator

# Load the CSV file
csv_path = "/kaggle/input/dataset-curated-with-split-r/Sf/Sf_qa_data_trimmed_test_r.csv"
pred_path = "/kaggle/working/bakllava_vqa_predictions_new.csv"
metrics_path = "/kaggle/working/bakllava_vqa_metrics_new.csv"
df = pd.read_csv(csv_path, header=None, names=["image_path", "question", "ground_truth"])

# Load bakLlava processor and model
model_id = "llava-hf/bakLlava-v1-hf"
processor = AutoProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16)

# Move model to GPU
device = torch.device("cuda")
model.to(device)
model.eval()

# List to store skipped entries
skipped_entries = []

# Function to predict answer with timeout
@timeout(10)  # Set timeout to 10 seconds
def predict_answer(image_path, question):
    try:
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            return ""
        image = Image.open(image_path).convert("RGB")

        # Prepare the VQA prompt for bakLlava
        prompt = f"[INST] <image> Question: {question} Answer in one word: [/INST]"
        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch.float16)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=20)
        predicted_answer = processor.decode(outputs[0], skip_special_tokens=True).strip()

        # Extract one-word answer (post-process if needed)
        predicted_answer = predicted_answer.split()[-1]  # Take the last word as the answer

        del inputs, outputs
        gc.collect()
        torch.cuda.empty_cache()
        return predicted_answer
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# Resume support
start_idx = 0
if os.path.exists(pred_path):
    existing = pd.read_csv(pred_path)
    start_idx = len(existing)
    print(f"Resuming from index {start_idx}")
else:
    existing = pd.DataFrame()

# Lists to store predictions and ground truth
predictions = []
ground_truths = df["ground_truth"].tolist()
y_true = []
y_pred = []
results = []

# Predict answers for each row
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
    if idx < start_idx:
        continue  # Ignore those already done
    # if idx == 100:
    #     break  # Ignore those already done
    full_image_path = f"/kaggle/input/dataset-curated-with-split-r/{row['image_path']}"
    question = row["question"]
    try:
        predicted = predict_answer(full_image_path, question)
    except TimeoutError:
        print(f"Timeout processing row {idx}: {full_image_path}")
        skipped_entries.append({"row": idx, "image_path": full_image_path, "question": question})
        predicted = ""  # Skip this entry
    predictions.append(predicted)

    y_true.append(str(ground_truths[idx]).lower())
    y_pred.append(str(predicted).lower())

    results.append({
        "img_path": full_image_path,
        "question": question,
        "true_answer": str(ground_truths[idx]).lower(),
        "predicted_answer": str(predicted).lower()
    })
    if idx <= 100:
        match = "✅" if y_pred[-1] == y_true[-1] else "❌"
        print(f"[{idx}] Truth: {y_true[-1]} | Predicted: {y_pred[-1]} | Match: {match}")

    # Save every 1000 or last item
    if (idx + 1) % 1000 == 0 or (idx + 1) == len(df):
        # Convert to DataFrame
        chunk_df = pd.DataFrame(results)

        if not os.path.exists(pred_path):
            # Write with header if file does not exist
            chunk_df.to_csv(pred_path, mode='w', index=False, header=True)
        else:
            # Append without header
            chunk_df.to_csv(pred_path, mode='a', index=False, header=False)

        # Compute metrics
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
        metrics_entry = pd.DataFrame([{"step": idx + 1, "accuracy": accuracy, "f1_score": f1}])

        if not os.path.exists(metrics_path):
            metrics_entry.to_csv(metrics_path, mode='w', index=False, header=True)
        else:
            metrics_entry.to_csv(metrics_path, mode='a', index=False, header=False)

        # Reset for next chunk
        results, y_true, y_pred = [], [], []

        print(f"Checkpoint saved at index {idx + 1}")

# Save predictions
results_df = pd.DataFrame(results)
results_df.to_csv("bakllava_vqa_predictions.csv", index=False)

# Compute accuracy and F1 score
correct = 0
total = len(predictions)
predictions_lower = [str(pred).lower() for pred in predictions]
ground_truths_lower = [str(truth).lower() for truth in ground_truths]
for pred, truth in zip(predictions_lower, ground_truths_lower):
    if pred == truth:
        correct += 1
accuracy = (correct / total) * 100 if total > 0 else 0
# precision, recall, f1, _ = precision_recall_fscore_support(
#     ground_truths_lower, predictions_lower, average='macro', zero_division=0
# )

# Print results
print(f"\nTotal Questions: {total}")
print(f"Correct Predictions: {correct}")
print(f"Accuracy: {accuracy:.2f}%")
print(f"F1-Score: {((2*accuracy)/(1+accuracy)):.2f}%")
# print(f"Macro Precision: {precision:.2f}")
# print(f"Macro Recall: {recall:.2f}")
# print(f"Macro F1 Score: {f1:.2f}")
print(f"Skipped Entries: {len(skipped_entries)}")
if skipped_entries:
    print("\nSkipped Entries (due to timeout):")
    skipped_df = pd.DataFrame(skipped_entries)
    print(skipped_df)

# Display a few examples
results_df = pd.DataFrame({
    "Image Path": df["image_path"],
    "Question": df["question"],
    "Ground Truth": ground_truths,
    "Predicted": predictions
})
print("\nSample Predictions:")
print(results_df.head(10))

[0mFound existing installation: rmm-cu12 25.2.0
Uninstalling rmm-cu12-25.2.0:
  Successfully uninstalled rmm-cu12-25.2.0
Collecting timeout-decorator
  Downloading timeout-decorator-0.5.0.tar.gz (4.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: timeout-decorator
  Building wheel for timeout-decorator (setup.py) ... [?25l[?25hdone
  Created wheel for timeout-decorator: filename=timeout_decorator-0.5.0-py3-none-any.whl size=5006 sha256=69750f97a3f7ba5f6a342343fe3fe3ba07eca06bf9dca60fbf6cc1e39268713c
  Stored in directory: /root/.cache/pip/wheels/aa/cd/d1/51736c6b95846b2613a520ce146a8f305c4016a987bc9faec7
Successfully built timeout-decorator
Installing collected packages: timeout-decorator
Successfully installed timeout-decorator-0.5.0


2025-05-18 01:34:59.957697: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747532100.186509      59 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747532100.251009      59 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.51M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/934M [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

Processing:   0%|          | 1/11972 [00:02<8:10:52,  2.46s/it]

[0] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 2/11972 [00:03<5:05:09,  1.53s/it]

[1] Truth: black | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 3/11972 [00:04<4:12:03,  1.26s/it]

[2] Truth: tail | Predicted: fish | Match: ❌


Processing:   0%|          | 4/11972 [00:05<3:51:59,  1.16s/it]

[3] Truth: two | Predicted: 3 | Match: ❌


Processing:   0%|          | 5/11972 [00:06<3:45:54,  1.13s/it]

[4] Truth: lattice | Predicted: stripes | Match: ❌


Processing:   0%|          | 6/11972 [00:07<3:33:06,  1.07s/it]

[5] Truth: pink | Predicted: pink | Match: ✅


Processing:   0%|          | 7/11972 [00:08<3:33:16,  1.07s/it]

[6] Truth: round | Predicted: loafer | Match: ❌


Processing:   0%|          | 8/11972 [00:09<3:22:37,  1.02s/it]

[7] Truth: chain | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 9/11972 [00:10<3:14:42,  1.02it/s]

[8] Truth: black | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 10/11972 [00:11<3:13:24,  1.03it/s]

[9] Truth: red | Predicted: red | Match: ✅


Processing:   0%|          | 11/11972 [00:12<3:11:43,  1.04it/s]

[10] Truth: love | Predicted: love | Match: ✅


Processing:   0%|          | 12/11972 [00:12<3:06:26,  1.07it/s]

[11] Truth: meow | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 13/11972 [00:13<3:08:19,  1.06it/s]

[12] Truth: lake | Predicted: mountain | Match: ❌


Processing:   0%|          | 14/11972 [00:14<3:08:27,  1.06it/s]

[13] Truth: red | Predicted: blue | Match: ❌


Processing:   0%|          | 15/11972 [00:15<3:08:13,  1.06it/s]

[14] Truth: white | Predicted: white | Match: ✅


Processing:   0%|          | 16/11972 [00:16<3:08:49,  1.06it/s]

[15] Truth: red | Predicted: red | Match: ✅


Processing:   0%|          | 17/11972 [00:17<3:04:39,  1.08it/s]

[16] Truth: black | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 18/11972 [00:18<3:06:23,  1.07it/s]

[17] Truth: wood | Predicted: wood | Match: ✅


Processing:   0%|          | 19/11972 [00:19<3:07:42,  1.06it/s]

[18] Truth: brown | Predicted: brown | Match: ✅


Processing:   0%|          | 20/11972 [00:20<3:08:15,  1.06it/s]

[19] Truth: red | Predicted: red | Match: ✅


Processing:   0%|          | 21/11972 [00:21<3:04:46,  1.08it/s]

[20] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 22/11972 [00:22<3:10:12,  1.05it/s]

[21] Truth: slip-on | Predicted: lace | Match: ❌


Processing:   0%|          | 23/11972 [00:23<3:08:12,  1.06it/s]

[22] Truth: rectangle | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 24/11972 [00:24<3:05:05,  1.08it/s]

[23] Truth: multicolor | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 25/11972 [00:25<3:06:38,  1.07it/s]

[24] Truth: blue | Predicted: blue | Match: ✅


Processing:   0%|          | 26/11972 [00:26<3:07:59,  1.06it/s]

[25] Truth: white | Predicted: white | Match: ✅


Processing:   0%|          | 27/11972 [00:27<3:12:42,  1.03it/s]

[26] Truth: fabric | Predicted: foam | Match: ❌


Processing:   0%|          | 28/11972 [00:28<3:08:22,  1.06it/s]

[27] Truth: silhouette | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 29/11972 [00:28<3:05:16,  1.07it/s]

[28] Truth: teal | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 30/11972 [00:29<3:02:57,  1.09it/s]

[29] Truth: java | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 31/11972 [00:30<3:05:15,  1.07it/s]

[30] Truth: red | Predicted: black | Match: ❌


Processing:   0%|          | 32/11972 [00:31<3:09:34,  1.05it/s]

[31] Truth: cylindrical | Predicted: round | Match: ❌


Processing:   0%|          | 33/11972 [00:32<3:06:15,  1.07it/s]

[32] Truth: kitten | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 34/11972 [00:33<3:05:41,  1.07it/s]

[33] Truth: bottle | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 35/11972 [00:34<3:03:21,  1.09it/s]

[34] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 36/11972 [00:35<3:06:10,  1.07it/s]

[35] Truth: pink | Predicted: pink | Match: ✅


Processing:   0%|          | 37/11972 [00:36<3:04:11,  1.08it/s]

[36] Truth: leaves | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 38/11972 [00:37<3:11:06,  1.04it/s]

[37] Truth: swirl | Predicted: spiral | Match: ❌


Processing:   0%|          | 39/11972 [00:38<3:07:40,  1.06it/s]

[38] Truth: lighthouse | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 40/11972 [00:39<3:09:35,  1.05it/s]

[39] Truth: double | Predicted: modern | Match: ❌


Processing:   0%|          | 41/11972 [00:40<3:06:46,  1.06it/s]

[40] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 42/11972 [00:41<3:04:11,  1.08it/s]

[41] Truth: meow | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 43/11972 [00:42<3:06:26,  1.07it/s]

[42] Truth: us | Predicted: usb | Match: ❌


Processing:   0%|          | 44/11972 [00:43<3:08:13,  1.06it/s]

[43] Truth: white | Predicted: silver | Match: ❌


Processing:   0%|          | 45/11972 [00:44<3:09:47,  1.05it/s]

[44] Truth: cube | Predicted: square | Match: ❌


Processing:   0%|          | 46/11972 [00:45<3:15:20,  1.02it/s]

[45] Truth: bucket | Predicted: bucket | Match: ✅


Processing:   0%|          | 47/11972 [00:46<3:19:11,  1.00s/it]

[46] Truth: wag | Predicted: wag | Match: ✅


Processing:   0%|          | 48/11972 [00:47<3:12:50,  1.03it/s]

[47] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 49/11972 [00:47<3:13:06,  1.03it/s]

[48] Truth: capri | Predicted: long | Match: ❌


Processing:   0%|          | 50/11972 [00:49<3:17:19,  1.01it/s]

[49] Truth: tiger | Predicted: tiger | Match: ✅


Processing:   0%|          | 51/11972 [00:50<3:20:23,  1.01s/it]

[50] Truth: motorcycle | Predicted: motorcycle | Match: ✅


Processing:   0%|          | 52/11972 [00:50<3:14:23,  1.02it/s]

[51] Truth: jar | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 53/11972 [00:51<3:14:54,  1.02it/s]

[52] Truth: black | Predicted: black | Match: ✅


Processing:   0%|          | 54/11972 [00:52<3:10:54,  1.04it/s]

[53] Truth: black | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 55/11972 [00:53<3:09:43,  1.05it/s]

[54] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 56/11972 [00:54<3:07:07,  1.06it/s]

[55] Truth: rectangular | Predicted: [/inst] | Match: ❌


Processing:   0%|          | 57/11972 [00:55<3:09:55,  1.05it/s]

[56] Truth: black | Predicted: black | Match: ✅


Processing:   0%|          | 58/11972 [00:56<3:11:32,  1.04it/s]

[57] Truth: black | Predicted: black | Match: ✅


Processing:   0%|          | 59/11972 [00:57<3:13:20,  1.03it/s]

[58] Truth: grey | Predicted: silver | Match: ❌


Processing:   1%|          | 60/11972 [00:58<3:14:00,  1.02it/s]

[59] Truth: white | Predicted: white | Match: ✅


Processing:   1%|          | 61/11972 [00:59<3:18:50,  1.00s/it]

[60] Truth: lace-up | Predicted: velcro | Match: ❌


Processing:   1%|          | 62/11972 [01:00<3:17:52,  1.00it/s]

[61] Truth: black | Predicted: black | Match: ✅


Processing:   1%|          | 63/11972 [01:01<3:13:15,  1.03it/s]

[62] Truth: plastic | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 64/11972 [01:02<3:18:34,  1.00s/it]

[63] Truth: rectangular | Predicted: rectangle | Match: ❌


Processing:   1%|          | 65/11972 [01:03<3:19:34,  1.01s/it]

[64] Truth: tan | Predicted: brown | Match: ❌


Processing:   1%|          | 66/11972 [01:04<3:13:21,  1.03it/s]

[65] Truth: grey | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 67/11972 [01:05<3:13:28,  1.03it/s]

[66] Truth: love | Predicted: love | Match: ✅


Processing:   1%|          | 68/11972 [01:06<3:09:49,  1.05it/s]

[67] Truth: fitted | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 69/11972 [01:07<3:11:44,  1.03it/s]

[68] Truth: blue | Predicted: blue | Match: ✅


Processing:   1%|          | 70/11972 [01:08<3:08:34,  1.05it/s]

[69] Truth: silicon | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 71/11972 [01:09<3:11:08,  1.04it/s]

[70] Truth: black | Predicted: blue | Match: ❌


Processing:   1%|          | 72/11972 [01:10<3:08:34,  1.05it/s]

[71] Truth: black | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 73/11972 [01:11<3:15:27,  1.01it/s]

[72] Truth: two | Predicted: 3 | Match: ❌


Processing:   1%|          | 74/11972 [01:12<3:11:23,  1.04it/s]

[73] Truth: rectangular | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 75/11972 [01:13<3:16:51,  1.01it/s]

[74] Truth: four | Predicted: 3 | Match: ❌


Processing:   1%|          | 76/11972 [01:14<3:16:26,  1.01it/s]

[75] Truth: tan | Predicted: yellow | Match: ❌


Processing:   1%|          | 77/11972 [01:15<3:12:39,  1.03it/s]

[76] Truth: silicon | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 78/11972 [01:16<3:14:21,  1.02it/s]

[77] Truth: green | Predicted: green | Match: ✅


Processing:   1%|          | 79/11972 [01:17<3:18:44,  1.00s/it]

[78] Truth: rectangular | Predicted: rectangle | Match: ❌


Processing:   1%|          | 80/11972 [01:18<3:13:37,  1.02it/s]

[79] Truth: pink | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 81/11972 [01:19<3:14:16,  1.02it/s]

[80] Truth: stars | Predicted: stars | Match: ✅


Processing:   1%|          | 82/11972 [01:20<3:19:15,  1.01s/it]

[81] Truth: hearts | Predicted: hearts | Match: ✅


Processing:   1%|          | 83/11972 [01:21<3:18:37,  1.00s/it]

[82] Truth: crown | Predicted: queen | Match: ❌


Processing:   1%|          | 84/11972 [01:22<3:22:32,  1.02s/it]

[83] Truth: two | Predicted: 0 | Match: ❌


Processing:   1%|          | 85/11972 [01:23<3:17:18,  1.00it/s]

[84] Truth: green | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 86/11972 [01:24<3:17:52,  1.00it/s]

[85] Truth: black | Predicted: black | Match: ✅


Processing:   1%|          | 87/11972 [01:25<3:13:49,  1.02it/s]

[86] Truth: rectangular | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 88/11972 [01:26<3:15:56,  1.01it/s]

[87] Truth: red | Predicted: blue | Match: ❌


Processing:   1%|          | 89/11972 [01:27<3:17:33,  1.00it/s]

[88] Truth: pink | Predicted: pink | Match: ✅


Processing:   1%|          | 90/11972 [01:28<3:18:32,  1.00s/it]

[89] Truth: brown | Predicted: brown | Match: ✅


Processing:   1%|          | 91/11972 [01:29<3:18:42,  1.00s/it]

[90] Truth: black | Predicted: brown | Match: ❌


Processing:   1%|          | 92/11972 [01:30<3:19:03,  1.01s/it]

[91] Truth: black | Predicted: black | Match: ✅


Processing:   1%|          | 93/11972 [01:31<3:14:58,  1.02it/s]

[92] Truth: feather | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 94/11972 [01:32<3:12:32,  1.03it/s]

[93] Truth: purple | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 95/11972 [01:33<3:14:47,  1.02it/s]

[94] Truth: teal | Predicted: blue | Match: ❌


Processing:   1%|          | 96/11972 [01:34<3:25:16,  1.04s/it]

[95] Truth: fur | Predicted: fuzzy | Match: ❌


Processing:   1%|          | 97/11972 [01:35<3:28:47,  1.05s/it]

[96] Truth: mesh | Predicted: plastic | Match: ❌


Processing:   1%|          | 98/11972 [01:36<3:22:05,  1.02s/it]

[97] Truth: cloth | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 99/11972 [01:37<3:25:59,  1.04s/it]

[98] Truth: boat | Predicted: boat | Match: ✅


Processing:   1%|          | 100/11972 [01:38<3:19:38,  1.01s/it]

[99] Truth: rectangular | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 101/11972 [01:39<3:16:16,  1.01it/s]

[100] Truth: green | Predicted: [/inst] | Match: ❌


Processing:   1%|          | 112/11972 [01:50<3:14:50,  1.01it/s]

In [None]:
print(len(predictions_lower))
print(len(ground_truths_lower))