In [2]:
import base64
import requests
import gradio as gr
from PIL import Image
from io import BytesIO

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
OLLAMA_URL = "http://localhost:11434/api/chat"
MODEL_NAME = "llava"

In [4]:
def image_to_base64(image: Image.Image) -> str:
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode()

In [5]:
def vqa_ollama(image, question):
    if image is None or not question:
        return "Please upload an image and enter a question."

    image_b64 = image_to_base64(image)

    payload = {
        "model": MODEL_NAME,
        "messages": [
            {
                "role": "user",
                "content": f"Answer the question based on the image.\nQuestion: {question}",
                "images": [image_b64]
            }
        ],
        "stream": False
    }

    response = requests.post(OLLAMA_URL, json=payload)
    result = response.json()

    return result["message"]["content"]

In [6]:
interface = gr.Interface(
    fn=vqa_ollama,
    inputs=[
        gr.Image(type="pil", label="Image"),
        gr.Textbox(label="Question", placeholder="What is in the image?")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="VQA with Ollama (LLaVA)",
    description="Visual Question Answering using Ollama + LLaVA"
)

In [7]:
interface.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio\flagged\dataset1.csv
