In [7]:
import os
import pandas as pd
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
import nltk
from nltk.translate.bleu_score import corpus_bleu
from nltk.tokenize import word_tokenize

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sirja\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [8]:
CAPTIONS_FILE = './data/captions.txt'  
MODEL_PATH = './models/blip-ft/final_blip_model3' 
TEST_IMAGES_FILE = 'testing_images.txt'
IMAGES_FOLDER = './data/images'

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = BlipProcessor.from_pretrained(MODEL_PATH)
model = BlipForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [10]:
df = pd.read_csv(CAPTIONS_FILE, sep=',', header=None, names=['image', 'caption'])
with open(TEST_IMAGES_FILE, 'r') as f:
    test_image_list = [line.strip() for line in f]

In [11]:
references_list = []
hypotheses_list = []

In [16]:
for img_name in test_image_list[:1000]: 
    refs = df[df['image'] == img_name]['caption'].tolist()
    if not refs:
        continue

 
    tokenized_refs = [word_tokenize(r.lower()) for r in refs]

 
    img_path = os.path.join(IMAGES_FOLDER, img_name)
    if not os.path.exists(img_path):
        continue

    image = Image.open(img_path).convert("RGB")


    inputs = processor(images=image, return_tensors="pt").to(device)
    out_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
    gen_caption = processor.batch_decode(out_ids, skip_special_tokens=True)[0]
    tokenized_gen = word_tokenize(gen_caption.lower())


    references_list.append(tokenized_refs)
    hypotheses_list.append(tokenized_gen)

In [17]:
score = corpus_bleu(references_list, hypotheses_list)
print(f"Corpus BLEU score: {score:.2f}")

Corpus BLEU score: 0.33
