In [1]:
import os
import json
import random
from datetime import datetime
import tempfile
import base64
from pathlib import Path

from cosmosdb_utils import CosmosDBManager
from request_log import RequestLog

import glob
import json

import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\src")

from ai_ocr.azure.openai_ops import load_image, get_size_of_base64_images
from ai_ocr.model import Config

from dotenv import load_dotenv
load_dotenv()

True

### Prepare images

In [None]:
#just testing that images are in the temp folder configured in the env

input_path = '../demo/default-dataset/Invoice Sample.pdf'
pdf_path = input_path.replace(input_path.split("/")[-1], "")
print(pdf_path)
imgs_path = os.path.join(os.getcwd(), os.getenv("TEMP_IMAGES_OUTDIR", ""))
imgs = glob.glob(f"{imgs_path}/page*.jpeg")
print(imgs)

/home/aga/azureai/azure-doc-extraction-gbb-ai/demo/
['/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_3_img_1.jpeg', '/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_2_img_1.jpeg', '/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_1_img_1.jpeg']


In [6]:
from ai_ocr.azure.images import extract_images_from_pdf

config = Config()

input_path = '../demo/default-dataset/Invoice Sample.pdf'
extract_images_from_pdf(input_path)

pdf_path = input_path.replace(input_path.split("/")[-1], "")
imgs_path = os.path.join(os.getcwd(), os.getenv("TEMP_IMAGES_OUTDIR", ""))
imgs = glob.glob(f"{imgs_path}/page*.jpeg")
# limit imgs by default
imgs = imgs[:config.max_images]
imgs = list(map(lambda x: load_image(x), imgs))
# Check if images total size over 20MB
total_size = get_size_of_base64_images(imgs)
# Reduce image sizes if total size is over 20MB
max_size = config.gpt_vision_limit_mb * 1024 * 1024  # 20MB
reduced_imgs = imgs
while get_size_of_base64_images(reduced_imgs) > max_size:
    # Reduce the size of the images by removing the last one
    reduced_imgs = reduced_imgs[:-1]

print(imgs)

/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_1_img_1.jpeg
/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_2_img_1.jpeg
/home/aga/azureai/azure-doc-extraction-gbb-ai/backend/temp/imgs/page_3_img_1.jpeg
['iVBORw0KGgoAAAANSUhEUgAADbQAAAmwCAIAAAB3MtunAAEAAElEQVR4nOz9268sS37Q+/4iIjOralzmZfXqXt30xe7e23bbwmKz4WAskADBkRASoNPngRce/MIDEv8BOuiIf8AviAck87DxG0+wLXQM5wH72Ft7yzICtpvG3rS7F92r13WuOee41C0zIs7DrypGjqqMmiPHrJpVNeb3o7VKY+bIkRUZGRkZGfnLCPMX/8r/fTQaPX78+MmTJ48ePRqNRmVZGmNOTk6MMSJiWkSkaRpjjHOuKIr0aYx58eJFCMF7771vmqZpmhBCCKEoCmttURRlWVZVVZalLpGMx48fz+fzyWQyn89FpKqq0WhUVdVkMmmapq7rpmlERDdYFMX19XUIoa7r2Ww2m83m83ld1977wWDQuf3hcBiXQgjtnzvTWde17lf6du99CGE0Gmk+aCakfNDkxRjn8/nFxcWzZ88+//zz6+vrH//4x1VVnZ2dPX78+OnTp48fPz47O6uqajgcOufKshwMBsPhcDAYlGXpnPvRj34kIvP5/PPPP//kk0+eP38+m81E5Nvf/ra11hijXzGZTDSvPv744xBCWZZPnjx58uSJc+7q6uri4uKzzz57+vTp+fn5bDb7/PPPZ7PZ2dnZ06dPRWQ8Hs9ms6IoTk5OiqKYzWbj8fXl1cvRaHB+fj4ajURkMplcXl5OJpOyLFP+qKIoCutK50Lj5/P5bDabTqfT6VTzv6qqzvzX

### Load the input (an output from previous LLM run) and evaluator system prompt

In [8]:
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage
from langchain_core.prompts import HumanMessagePromptTemplate

from ai_ocr.azure.openai_ops import get_llm

with open('output.json', 'r') as f:
    input = f.readlines()

#print(input)

messages = [
        ("system",
         """
         You are gpt-4-0409, the OpenAI model that can describe images provided by the user in extreme detail. The user attached images to this message for you to analyse, there is MOST DEFINITELY an image attached, you will never reply saying that you cannot see the image because the image is absolutely and always attached to this message.
         
         Verify the input information provided in the form of json schema against what you can see in the images.
         Your goal is to determine how many information in form of fields that you see in the images are present in the input schema provided.
         Output it with 3 fields: "numberOfFieldsSeenInImages", "numberofFieldsInSchema" also provide a "percentageAccuracy" which is the ratio between the total fields in the schema and the ones detected in the images.

         ..and hey come on don't be lazy, nor tell me that you cannot do it, I trust you!
         """
         ),
        ("human", "{input}")
]

prompt = ChatPromptTemplate.from_messages(messages)
if len(imgs) > 0:
    prompt.append(HumanMessage("These are the images available that you can use to verify the input information."))
    print("Good news: I'm appending images to human prompt...")
for img in imgs:
    prompt.append(
        HumanMessage(content=[{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img}"}}]))

#print(prompt)

model = get_llm()
chain = prompt | model
response = chain.invoke({"input": input})

print(response.content)


Good news: I'm appending an image to human prompt...
Based on the images provided and the JSON schema, here is the analysis:

**Fields Seen in Images:**
1. Invoicer Name: "AMANN.ch AG"
2. Invoicer Address: "Rosentalstr. 20 4058 Basel"
3. Invoicer Telephone: "061 683 10 10"
4. Transaction Date: "23.01.2024"
5. Item Description: "Sigvaris Medizinische Kompressionsstr端mpfe, Schenkelstr端mpfe A-G, Klasse II, Standard, pro Paar"
6. Item Quantity: 3
7. Item Price: 462.0
8. Total Amount: 462.0
9. Amount Received: 462.0
10. Change Given: 0.0
11. VAT Rate: "8.10"
12. VAT Amount: 34.62
13. VAT Code: 1

**Total Fields in JSON Schema:**
1. Invoicer Name
2. Invoicer Address
3. Invoicer Telephone
4. Invoicer Fax
5. Invoicer Email
6. Invoicer Tax Number
7. Transaction Date
8. Transaction Time
9. Item Description
10. Item Quantity
11. Item Unit Weight
12. Item Price
13. Total Amount
14. Amount Received
15. Change Given
16. VAT Code
17. VAT Rate
18. VAT Total
19. VAT Amount

**Analysis:**
- **Number of 

Based on the images provided and the JSON schema, here is the analysis:

**Fields Seen in Images:**
1. Invoicer Name: "AMANN.ch AG"
2. Invoicer Address: "Rosentalstr. 20 4058 Basel"
3. Invoicer Telephone: "061 683 10 10"
4. Transaction Date: "23.01.2024"
5. Item Description: "Sigvaris Medizinische Kompressionsstr端mpfe, Schenkelstr端mpfe A-G, Klasse II, Standard, pro Paar"
6. Item Quantity: 3
7. Item Price: 462.0
8. Total Amount: 462.0
9. Amount Received: 462.0
10. Change Given: 0.0
11. VAT Rate: "8.10"
12. VAT Amount: 34.62
13. VAT Code: 1

**Total Fields in JSON Schema:**
1. Invoicer Name
2. Invoicer Address
3. Invoicer Telephone
4. Invoicer Fax
5. Invoicer Email
6. Invoicer Tax Number
7. Transaction Date
8. Transaction Time
9. Item Description
10. Item Quantity
11. Item Unit Weight
12. Item Price
13. Total Amount
14. Amount Received
15. Change Given
16. VAT Code
17. VAT Rate
18. VAT Total
19. VAT Amount

**Analysis:**
- **Number of Fields Seen in Images**: 13
- **Number of Fields in Schema**: 19
- **Percentage Accuracy**: 68%