In [149]:
import os
import requests
from dotenv import load_dotenv
from unstract.llmwhisperer import LLMWhispererClientV2
from google import genai
from pydantic import BaseModel

In [150]:
from typing import List, Optional

In [151]:
class LineItems(BaseModel):
    name: str
    cost: float


class Reciepts(BaseModel):
    image_url: Optional[str]
    date: Optional[str]
    shop_name: Optional[str]
    total_amount: Optional[str]
    line_items: List[LineItems]

In [152]:
load_dotenv()

LLMWHISPERER_API_KEY = os.getenv("LLMWHISPERER_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MONGO_URL = os.getenv("MONGO_URL")

In [153]:
def download_image(url, folder="bills", filename="downloaded_image.jpg"):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Get the image data
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        image_path = os.path.join(folder, filename)
        with open(image_path, "wb") as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
        print(f"Image downloaded successfully: {image_path}")
    else:
        print("Failed to download image. Check the URL.")

In [154]:
def convert_receipt_to_text(image_path: str):
    clientllm = LLMWhispererClientV2(
        base_url="https://llmwhisperer-api.us-central.unstract.com/api/v2",
        api_key=LLMWHISPERER_API_KEY,
        logging_level="INFO",
    )
    whisper = clientllm.whisper(
        file_path=image_path, wait_for_completion=True, wait_timeout=200
    )
    output = whisper["extraction"]["result_text"]
    return output

In [155]:
def convert_text_to_json(output: str):
    client = genai.Client(api_key=GEMINI_API_KEY)
    data = (
        "convert this to json. do not include tax or bag charges or any redeem vouchers cost such as CA REDEM VAL"
        + output
    )
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=data,
        config={
            "response_mime_type": "application/json",
            "response_schema": Reciepts,
        },
    )
    final_lineitems = response.parsed
    print("respone", response.text)
    print(final_lineitems)
    return final_lineitems

In [156]:
# image_url = "https://media.istockphoto.com/id/1420767944/vector/register-sale-receipt-isolated-on-white-background-cash-receipt-printed.jpg?s=612x612&w=0&k=20&c=eV7CDJK0DZgKo7KVlGTDJeVMN_2xybqIPvt1ATl_kkM="

In [157]:
image_url = "https://images.iwaspoisoned.com/148750/tn1200w_1543370032.jpg"

In [158]:
download_image(image_url)
image_path = "./bills/downloaded_image.jpg"
output = convert_receipt_to_text(image_path)
final_lineitems = convert_text_to_json(output)

Image downloaded successfully: bills/downloaded_image.jpg
respone {
  "image_url": null,
  "date": "11/14/18 12:11pm",
  "shop_name": "Ralphs",
  "total_amount": "26.13",
  "line_items": [
    {
      "name": "KRO AGED SWS SLC RC",
      "cost": 3.50
    },
    {
      "name": "KRO AGED SWS SLC RC",
      "cost": 3.50
    },
    {
      "name": "OCEAN SPRY JC DRNKRC",
      "cost": 2.50
    },
    {
      "name": "KRO WHT CRAN PEACH",
      "cost": 1.99
    },
    {
      "name": "OCEAN SPRY JC DRNKRC",
      "cost": 2.50
    },
    {
      "name": "COKE 6PK RC",
      "cost": 3.00
    },
    {
      "name": "KRO LEAFY ROMAINE RC",
      "cost": 2.29
    },
    {
      "name": "HASS AVOCADO RC",
      "cost": 3.96
    },
    {
      "name": "KRO LEAFY ROMAINE RC",
      "cost": 2.29
    }
  ]
}
image_url=None date='11/14/18 12:11pm' shop_name='Ralphs' total_amount='26.13' line_items=[LineItems(name='KRO AGED SWS SLC RC', cost=3.5), LineItems(name='KRO AGED SWS SLC RC', cost=3.5), LineI

In [159]:
# this is when final_lineitems =  response.text
# with open("output.json", "w") as file:
#     file.write(final_lineitems)
# final_lineitems

In [160]:
# final = json.loads(final_lineitems)
# print(final)
# Reciepts(**final)

In [161]:
Reciepts.model_validate(final_lineitems)

Reciepts(image_url=None, date='11/14/18 12:11pm', shop_name='Ralphs', total_amount='26.13', line_items=[LineItems(name='KRO AGED SWS SLC RC', cost=3.5), LineItems(name='KRO AGED SWS SLC RC', cost=3.5), LineItems(name='OCEAN SPRY JC DRNKRC', cost=2.5), LineItems(name='KRO WHT CRAN PEACH', cost=1.99), LineItems(name='OCEAN SPRY JC DRNKRC', cost=2.5), LineItems(name='COKE 6PK RC', cost=3.0), LineItems(name='KRO LEAFY ROMAINE RC', cost=2.29), LineItems(name='HASS AVOCADO RC', cost=3.96), LineItems(name='KRO LEAFY ROMAINE RC', cost=2.29)])