In [25]:
# STEP 1: Install dependencies

!pip install transformers torch pillow nltk



In [36]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [37]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


In [38]:
def generate_image_caption(image_path):
    """
    Generates a real AI caption using the BLIP model.
    """
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt")
        output = model.generate(**inputs, max_new_tokens=50)
        caption = processor.decode(output[0], skip_special_tokens=True)
        return caption

    except FileNotFoundError:
        return "‚ùå Error: Image file not found."
    except Exception as e:
        return f"‚ö†Ô∏è An error occurred: {e}"

In [39]:
def evaluate_caption(generated_caption, reference_caption):
    """
    Calculates BLEU score comparing generated and reference captions.
    """
    reference = [reference_caption.lower().split()]   # tokenized reference
    candidate = generated_caption.lower().split()     # tokenized model output
    smoothie = SmoothingFunction().method4
    bleu = sentence_bleu(reference, candidate, smoothing_function=smoothie)
    return round(bleu, 4)

In [40]:
image_path = "sample.png"  # your image file

generated_caption = generate_image_caption(image_path)
reference_caption = "A group of people standing near a lake during sunset."  # your ground truth (edit this)

print("üñºÔ∏è Generated Caption:", generated_caption)
print("üìò Reference Caption:", reference_caption)
print("üìä BLEU Score:", evaluate_caption(generated_caption, reference_caption))

üñºÔ∏è Generated Caption: three people standing on a shore watching the sunset
üìò Reference Caption: A group of people standing near a lake during sunset.
üìä BLEU Score: 0.0626
