Install Required Libraries

In [None]:
!pip install transformers
!pip install torch torchvision
!pip install pillow
!pip install numpy



Import Required Modules

In [None]:
import torch  # For deep learning
from transformers import BlipProcessor, BlipForConditionalGeneration  # For BLIP model
from PIL import Image  # For image processing
import requests  # For making HTTP requests to fetch images
from io import BytesIO  # For handling image data in memory

Load Pre-trained BLIP Model

In [None]:

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


Define Caption Generation Function

In [None]:
# Define the function to generate captions for images
def generate_caption(image_path):
    # Print the URL to show which image is being processed
    print(f"Processing image from URL: {image_path}")

    # Fetch image from URL or open from local path
    if image_path.startswith('http'):
        response = requests.get(image_path)  # Get the image from the URL
        img = Image.open(BytesIO(response.content))  # Open the image
    else:
        img = Image.open(image_path)  # Open the local image
    # Preprocess the image for the model
    inputs = processor(images=img, return_tensors="pt")
    # Generate caption for the image
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

Generate Caption for Image

In [None]:
image_url = "https://www.caninecampus.us/wp-content/uploads/2020/08/extra-blog-image-EXERCISE.jpg"
print(generate_caption(image_url))


Processing image from URL: https://www.caninecampus.us/wp-content/uploads/2020/08/extra-blog-image-EXERCISE.jpg
two dogs playing with a ball in the grass
