In [4]:
# Install necessary libraries
!pip install -q transformers diffusers torch torchvision pillow
!pip install -q gradio

# Import libraries
import torch
from PIL import Image
import requests
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
import io

# Load BLIP model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Function to load image safely
def safe_load_image(image_input):
    if isinstance(image_input, str):
        # It's a URL
        response = requests.get(image_input)
        image = Image.open(io.BytesIO(response.content)).convert('RGB')
    else:
        # It's already a PIL Image
        image = image_input
    return image

# Function to generate caption
def generate_caption(image):
    inputs = processor(images=image, return_tensors="pt").to(device)
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# Gradio app function
def explain_image(image_input):
    try:
        image = safe_load_image(image_input)
        caption = generate_caption(image)
        return caption
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Launch the Gradio app
with gr.Blocks() as demo:
    gr.Markdown("# 📸 Image Explainer Bot\nUpload an image or paste an image URL!")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Image or Enter URL (below)")
        url_input = gr.Textbox(label="Or paste Image URL here (optional)")

    caption_output = gr.Textbox(label="AI Generated Caption")

    def process_inputs(image_uploaded, url_entered):
        if url_entered.strip() != "":
            return explain_image(url_entered)
        elif image_uploaded is not None:
            return explain_image(image_uploaded)
        else:
            return "Please upload an image or paste a URL."

    generate_button = gr.Button("Generate Caption")

    generate_button.click(fn=process_inputs, inputs=[image_input, url_input], outputs=[caption_output])

demo.launch()


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.0/54.0 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.6/322.6 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m117.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hIt looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in cola

