In [None]:
from optimum.intel.openvino import OVModelForVisualCausalLM
from transformers import AutoProcessor, TextStreamer
from qwen_vl_utils import process_vision_info

from PIL import Image
from io import BytesIO
from pathlib import Path
import requests

In [None]:
ov_model_path = "ov-Qwen2.5-VL-3B-Instruct-INT4"
device="CPU"
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
image_path = "demo.jpeg"

model = OVModelForVisualCausalLM.from_pretrained(ov_model_path, device=device)
processor = AutoProcessor.from_pretrained(ov_model_path, min_pixels=min_pixels, max_pixels=max_pixels)


In [None]:
image = Image.open(image_path)
question = "Describe this image."

In [None]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "image": f"file://{image_path}",
            },
            {"type": "text", "text": question},
        ],
    }
]

In [None]:

text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")

In [None]:
display(image)
print("Question:")
print(question)
print("Answer:")
generated_ids = model.generate(**inputs, max_new_tokens=100, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True))


In [None]:
from gradio_helper import make_demo
# demo = make_demo(model, processor)
demo = make_demo(model)
try:
    demo.launch(debug=True)
except Exception:
    demo.launch(debug=True, share=True)