# Use the VLM endpoint

In [None]:
import base64
import cv2
import numpy as np
import os
import pyheif

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from openai import AzureOpenAI


# Function to encode an image
def encode_image(image_path: str):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to encode and resize an image
def encode_and_resize(image_path: str, max_size: int = 640):
    if image_path.endswith(".HEIC"):
        img = read_heic_to_numpy(image_path)
    else:
        img = cv2.imread(image_path)
    scale_factor = max_size / max(img.shape)
    img = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
    _, im_arr = cv2.imencode('.jpg', img)  # im_arr: image in Numpy one-dim array format.
    im_bytes = im_arr.tobytes()
    return base64.b64encode(im_bytes).decode("utf-8")

# Function to read .HEIC image format
def read_heic_to_numpy(file_path: str):
    heif_file = pyheif.read(file_path)
    data = heif_file.data
    if heif_file.mode == "RGB":
        numpy_array = np.frombuffer(data, dtype=np.uint8).reshape(
            heif_file.size[1], heif_file.size[0], 3)
    elif heif_file.mode == "RGBA":
        numpy_array = np.frombuffer(data, dtype=np.uint8).reshape(
            heif_file.size[1], heif_file.size[0], 4)
    else:
        raise ValueError("Unsupported HEIC color mode")
    return numpy_array


endpoint = "https://oai-aip-cv-ont-sdc.openai.azure.com/"
model_name = "gpt-4o-mini"
deployment = "gpt-4o-mini"
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
api_version = "2024-12-01-preview"

client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    azure_ad_token_provider=token_provider,
)

## Ask question with a photo as context

In [None]:
images_folder = "local_data/2025_Centrum/images/"  # See other notebook on how to download an image from blob store
image_name = "18228489-9471-4aee-bc87-0a8605521007.jpeg"

# Getting the Base64 string
# base64_image = encode_image(os.path.join(images_folder, image_name))
base64_image = encode_and_resize(os.path.join(images_folder, image_name))

# Query endpoint
response = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a sidewalk inspector for the municipality. Your job is to inspect images taken of sidewalk surfaces and assess their quality and maintenance needs.",
        },
        {
            "role": "user",
            "content": [
                { "type": "text", "text": "Describe the quality of the sidewalk in this image." },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": "low", # reduces token usage
                    },
                },
            ],
        }
    ],
    max_tokens=4096,
    temperature=1.0,
    top_p=1.0,
    model=deployment
)

print(response.choices[0].message.content)