In [1]:
import time
import base64
import mimetypes
from dotenv import load_dotenv
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()



True

In [2]:
def client_pipeline(current_model):
    client = ChatGoogleGenerativeAI(
        model=current_model, 
        temperature=0,
        max_retries=0
    )
    return client

In [3]:
PROMPT = """
You are given an image of a receipt. Please read the content into JSON format:

```
{
    "items": [
        {
            "name": <item name>,
            "quantity": <item quantity>,
            "price_per_unit": <item price per unit>
        }
    ],
    "service_price": <service price in receipt or 0 if not available>,
    "tax_price": <tax price in receipt or 0 if not available>,
    "discount_price": <discount price in receipt or 0 if not available>,
}
```

return only in JSON format
"""

In [4]:
def image_loader(current_image_path):
    with open(current_image_path, "rb") as f:
        image_bytes = f.read()

    mime_type, _ = mimetypes.guess_type(current_image_path)
    encoded = base64.b64encode(image_bytes).decode("utf-8")
    image_uri = f"data:{mime_type};base64,{encoded}"
    return image_uri

In [5]:
def inference_pipeline(PROMPT, current_image_uri, current_client):
    message = HumanMessage(
        content=[
            {"type": "text", "text": PROMPT},
            {
                "type": "image_url",
                "image_url": {"url": current_image_uri},
            },
        ]
    )

    start_time = time.time()
    response = current_client.invoke([message])
    end_time = time.time()
    elapsed_time = end_time - start_time
    return response, elapsed_time

In [7]:
IMAGE_PATHS = [r"../data/receipt_1.jpg", r"../data/receipt_2.png", r"../data/receipt_3.jpg"]
MODELS_LIST = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-2.0-flash-lite"]
n_trials = 3

for current_model_name in MODELS_LIST:

    for current_image_path in IMAGE_PATHS:
        current_image_uri = image_loader(current_image_path)
        
        for current_trial in range(n_trials):
            current_client = client_pipeline(model_name)
            response, elapsed_time = inference_pipeline(PROMPT, current_image_uri, current_client)

            with open("artifacts/evaluation_prompt.txt", "a+") as file:
                file.write("="*50 + "\n")
                file.write(f"Evaluation for model: {current_model_name} on image: {current_image_path} on trial: {current_trial + 1}\n")
                file.write(f"Elapsed time: {elapsed_time}\n")
                file.write(f"Response: \n{response.content}\n")

            time.sleep(30)