**Step 1: Install Required Libraries**

In [1]:
pip install torch torchvision transformers pillow gradio


Note: you may need to restart the kernel to use updated packages.


**Step 2: Import Required Modules**

In [2]:
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr


  from .autonotebook import tqdm as notebook_tqdm


** Step 3: Load the Pre-trained BLIP Model**

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


** Step 4: Define the Caption Generation Function**

In [6]:
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")  # Open and convert image
    inputs = processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        output = model.generate(**inputs, max_length=30)

    caption = processor.tokenizer.decode(output[0], skip_special_tokens=True)
    return caption


**Step 5: Define a Function to Handle Uploads & Generate Captions**

In [7]:
def upload_and_generate_caption(image):
    caption = generate_caption(image)  # Generate the caption
    return image, caption  # Return both the image & caption


**Step 6:Interactive UI with Gradio**

In [8]:
with gr.Blocks() as demo:
    gr.Markdown("## 🖼️ AI Image Captioning System 🚀")

    with gr.Row():
        # Change type to "filepath" to receive the file path of the uploaded image
        image_input = gr.Image(type="filepath", label="Upload Image")
        caption_output = gr.Textbox(label="Generated Caption")

    generate_button = gr.Button("Generate Caption")  # Add button

    # Display uploaded image
    image_display = gr.Image(label="Uploaded Image")

    # Button triggers caption generation
    generate_button.click(upload_and_generate_caption, inputs=image_input, outputs=[image_display, caption_output])

# Launch the UI
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
