<a href="https://colab.research.google.com/github/SanjanaRamoliya27/blip-weather-vqa/blob/main/App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio opencv-python pillow imagehash


In [None]:
%%writefile utils.py

import cv2
import numpy as np
from PIL import Image
import imagehash

def image_basic_info(image):
    width, height = image.size
    aspect_ratio = round(width / height, 2)
    return width, height, aspect_ratio

def blur_score(image):
    gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
    score = cv2.Laplacian(gray, cv2.CV_64F).var()
    return round(score, 2)

def perceptual_hash(image):
    return str(imagehash.phash(image))


In [None]:
from transformers import BlipProcessor, BlipForQuestionAnswering
import torch

processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

In [None]:
import gradio as gr
from PIL import Image

from utils import image_basic_info, blur_score


In [None]:
def analyze_image_with_vqa(img, question):
    if img is None or question.strip() == "":
        return "‚ö†Ô∏è Please upload an image and enter a question."

    # -------- EDA --------
    width, height, aspect_ratio = image_basic_info(img)
    blur = blur_score(img)

    quality = "Good"
    if blur < 100:
        quality = "Low Quality (Blurry)"

    # -------- VQA --------
    inputs = processor(img, question, return_tensors="pt").to(device)

    with torch.no_grad():
        output = model.generate(**inputs)

    answer = processor.decode(output[0], skip_special_tokens=True)

    result = f"""
üñºÔ∏è IMAGE EDA
-------------------------
Width           : {width}
Height          : {height}
Aspect Ratio    : {aspect_ratio}
Blur Score      : {blur}
Image Quality   : {quality}

ü§ñ VISUAL QUESTION ANSWERING
-------------------------
Question : {question}
Answer   : {answer}
"""

    return result


In [None]:
with gr.Blocks(title="Image EDA & Visual Question Answering (VQA)") as demo:

    gr.Markdown(
        """
        # üñºÔ∏è Image EDA & Visual Question Answering (VQA)
        Analyze image quality and ask questions using the BLIP VQA model.
        """
    )

    with gr.Row():

        # ---------- LEFT COLUMN ----------
        with gr.Column(scale=1):
            image_input = gr.Image(
                type="pil",
                label="Upload Image"
            )

            question_input = gr.Textbox(
                label="Ask a Question about the Image",
                placeholder="What is happening in the image?"
            )

            submit_btn = gr.Button("Submit", variant="primary")
            clear_btn = gr.Button("Clear")

        # ---------- RIGHT COLUMN ----------
        with gr.Column(scale=1):
            output_box = gr.Textbox(
                label="EDA + VQA Output",
                lines=20,
                interactive=False
            )

    # Button actions
    submit_btn.click(
        fn=analyze_image_with_vqa,
        inputs=[image_input, question_input],
        outputs=output_box
    )

    clear_btn.click(
        fn=lambda: ("", None, ""),
        inputs=[],
        outputs=[question_input, image_input, output_box]
    )

demo.launch(share=True)
