In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "src")))

import io
import logging
from pathlib import Path

from dotenv import load_dotenv
from PIL import Image
import nest_asyncio
nest_asyncio.apply()

from pydantic import BaseModel, Field, PositiveInt
from pydantic_ai import Agent, BinaryContent
from pydantic_ai.models.bedrock import BedrockConverseModel
from pydantic_ai.models.openai import OpenAIModel
from pydantic_ai.providers.bedrock import BedrockProvider

from service.invoice.prompts import PAGE_TEMPLATE, SYSTEM_MESSAGE_1, SYSTEM_MESSAGE_2, USER_MESSAGE_1
from service.invoice.utility import get_secret_keys, image_to_byte_string

In [2]:
load_dotenv()

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler()],
)


In [8]:
model2 = OpenAIModel("gpt-4o")
MODEL_ID = "us.meta.llama3-2-90b-instruct-v1:0"

model = BedrockConverseModel(
    model_name=MODEL_ID,
    provider=BedrockProvider(**get_secret_keys()),
)

agent = Agent(
    model=model,
    system_prompt=SYSTEM_MESSAGE_1,
    result_type=str,
    model_settings={"temperature": 0},
)

In [9]:
image_name = "C:/Users/rdas6/OneDrive/Desktop/codespace/vyturr/temp/pdf2img/Invoice-Copy-20/Page_01.png"
img_byte, mimetype = image_to_byte_string(image_name)

input_msg = [
    USER_MESSAGE_1,
    # ImageUrl(img_url),
    BinaryContent(data=img_byte, media_type="image/png"),
]
result = agent.run_sync(input_msg)  # type: ignore
data_=result.data
print(data_)



**Invoice Details:**

1. **Invoice Number:** NOT_AVAILABLE
2. **Invoice Date:** 08/04/2021
3. **Seller Details:**
	* **Company Name:** Transitech Private Limited
	* **GST No:** 33AAFCT5065J1Z7
	* **PAN No:** NOT_AVAILABLE
	* **Address:** Regd. & Corp. Office: Plot No. 18-20, Ambit Park Road, Ambattur Industrial Estate, Chennai - 600 058. Tamilnadu, India.
	* **Phone Number:** +91-44-4203 1951
	* **Email:** info@transitech.in
4. **Buyer Details:**
	* **Company Name:** Billionloans Financial Services Pvt Ltd
	* **GST No:** NOT_AVAILABLE
	* **PAN No:** NOT_AVAILABLE
	* **Address:** 370, 4th Cross, J P Nagar III Phase, Bangalore 560 078 India
	* **Phone Number:** NOT_AVAILABLE
	* **Email:** NOT_AVAILABLE
5. **Item Details:**
	* **1. Description:** UPS SCS (India) Pvt Ltd, Quantity: 20 FEET, Price: 27000.00, Currency: INR
	* **2. Description:** UPS SCS (India) Pvt Ltd, Quantity: 20 FEET, Price: 27000.00, Currency: INR
	* **3. Description:** UPS SCS (India) Pvt Ltd, Quantity: 20 FEET, Price:

In [10]:
result.usage()

Usage(requests=1, request_tokens=661, response_tokens=602, total_tokens=1263, details=None)

Usage(requests=1, request_tokens=1735, response_tokens=561, total_tokens=2296, details={'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0, 'cached_tokens': 0})

In [46]:
image_name = "C:/Users/rdas6/OneDrive/Desktop/codespace/vyturr/temp/img/Invoice-Copy-19/Page_02.png"
img_byte, mimetype = image_to_byte_string(image_name)

input_msg = [
    USER_MESSAGE,
    # ImageUrl(img_url),
    BinaryContent(data=img_byte, media_type="image/png"),
]
result = agent.run_sync(input_msg)  # type: ignore

data_ += f'\n--------Page: No 3----------\n {result.data} \n'
print(result.data)


1. Invoice Number: 21-22/0179
2. Invoice Date: 15-4-2021
3. Seller Details:
   - Company Name: TRANSITECH PRIVATE LIMITED
   - GST No: 33AAFC15065J1Z7
   - PAN No: NOT_AVAILABLE
   - Address: Plot No. 18-20, Ambit Park Road, Ambattur Industrial Estate, Chennai 600 058
   - Phone Number: 7550015666 / 8939601566
   - Email: support@freightx.in
4. Buyer Details:
   - Company Name: Linfox Logistics India Pvt Ltd
   - GST No: 33AABCL1515H1Z1
   - PAN No: NOT_AVAILABLE
   - Address: C/O Vinplex India Pvt.Ltd. 86, Maduravasal Village, Uthukottai Taluk, Penyapalayam, Tiruvallur, Tamilnadu
   - Phone Number: NOT_AVAILABLE
   - Email: NOT_AVAILABLE
5. Item Details:
   - 1. Description: VHC/MMDH/2122/000747, Quantity: 1, Price: Rs.14000.00
6. Total Tax:
   - CGST: 0.00
   - SGST: 0.00
   - IGST: 0.00
7. Total Charges: NOT_AVAILABLE
8. Total Discount: NOT_AVAILABLE
9. Total Amount: Rs.14000.00
10. Amount Paid: Rs.14000.00
11. Amount Due: Rs.0.00


In [31]:

print(data_)

1. Invoice Number: 1118212440009383
2. Invoice Date: 21-MAR-2022
3. Seller Details:
   - Company Name: VRRDDHI FREIGHT PVT LTD
   - GST No - NOT_AVAILABLE
   - PAN No - NOT_AVAILABLE
   - Address: 28 A 116, Egato Trade Centre, New No. 318, Poonamallee High Road, Kilpauk, Chennai - 600 010, Tamil Nadu
   - Phone Number: +91 44 25003622
   - Email: seller@abc.co.in
4. Buyer Details:
   - Company Name: SUNDARAM CLAYTON LIMITED
   - GST No - NOT_AVAILABLE
   - PAN No - NOT_AVAILABLE
   - Address: PADI CHENNAI - 600050
   - Phone Number: NOT_AVAILABLE
   - Email: NOT_AVAILABLE
5. Item Details:
   - 1. Description: FREIGHT CHARGE, Quantity: 1.000, Price: 8700.00
   - 2. Description: DESTINATION CHARGES, Quantity: 1.000, Price: 1600.00
   - 3. Description: ORIGIN LOCAL CHARGES, Quantity: 1.000, Price: 4702.00
   - 4. Description: FUMIGATION CHARGES, Quantity: 1.000, Price: 3500.00
6. Total Tax:
   - CGST: 0.00
   - SGST: 0.00
   - IGST: 0.00
7. Total Charges: NOT_AVAILABLE
8. Total Discount: 

In [33]:
class Item(BaseModel):
    """
    Structured model for summarizing item details in the invoice.
    """

    slno: PositiveInt
    description: str = Field(default="Not Available", description="Description of the item")
    quantity: str = Field(default="Not Available", description="Quantity of the item")
    price: str = Field(default="Not Available", description="Price of the item")
    currency: str = Field(default="INR", description="Currency of the price")


class CompanyDetails(BaseModel):
    """
    Structured model for summarizing company details.
    """

    name: str = Field(default="Not Available", description="Name of the company")
    gst_no: str = Field(default="Not Available", description="GST No of the company")
    pan_no: str = Field(default="Not Available", description="PAN No of the company")
    address: str = Field(default="Not Available", description="Address of the company")
    phone_number: str = Field(default="Not Available", description="Phone number of the company")
    email: str = Field(default="Not Available", description="Email of the company")


class TaxComponents(BaseModel):
    """
    Structured model for summarizing tax components.
    """

    CGST: float = Field(default=0.0, description="Central Goods and Services Tax")
    SGST: float = Field(default=0.0, description="State Goods and Services Tax")
    IGST: float = Field(default=0.0, description="Integrated Goods and Services Tax")


class Invoice(BaseModel):
    """
    Structured model for summarizing invoice details.
    """

    invoice_number: str = Field(description="Invoice number")
    invoice_date: str = Field(default="Not Available", description="Invoice date")
    seller_details: CompanyDetails = Field(
        default_factory=lambda: CompanyDetails(), description="Details of the seller Comapny"
    )
    buyer_details: CompanyDetails = Field(default_factory=lambda: CompanyDetails(), description="Details of the buyer Company")
    items: list[Item] = Field(default_factory=list, description="List of items in the invoice")
    total_tax: TaxComponents = Field(default_factory=lambda: TaxComponents(), description="Total tax components")
    total_charge: float = Field(default=0.0, description="Total charges")
    total_discount: float = Field(default=0.0, description="Total discount applied")
    total_amount: float = Field(default=0.0, description="Total amount of the invoice")
    amount_paid: float = Field(default=0.0, description="Amount paid")
    amount_due: float = Field(default=0.0, description="Amount due")
    page_no: int = Field(default=0.0, description="Page no , Where the Information Available")


class InvoiceData(BaseModel):
    """Structured model for summarizing invoice details from List of pages."""
    details: list[Invoice] = Field(description="Deatils of invoice present per page", default_factory=list)



In [None]:
from pydantic_ai import ModelRetry

model = OpenAIModel("gpt-4o")

SYSTEM_MESSAGE_2 = """
Kindly extract invoice details from the pages and Return the results in valid JSON Format.
"""

agent1 = Agent[InvoiceData](
    model=model,
    system_prompt=SYSTEM_MESSAGE_2,
    result_type=Invoice,
    model_settings={"temperature": 0},
)

@agent1.result_validator
async def validate_result(result) :
    if isinstance(result, InvoiceData):
        return result
    return ModelRetry('Final result Format is not Correct ') 

result1 = agent.run_sync([result.data])

In [48]:
print(result1.data)

Here is the extracted invoice details in the requested format:

```
1. Invoice Number: 21-22/0179
2. Invoice Date: 15-4-2021
3. Seller Details:
   - Company Name: TRANSITECH PRIVATE LIMITED
   - GST No: 33AAFC15065J1Z7
   - PAN No: NOT_AVAILABLE
   - Address: Plot No. 18-20, Ambit Park Road, Ambattur Industrial Estate, Chennai 600 058
   - Phone Number: 7550015666 / 8939601566
   - Email: support@freightx.in
4. Buyer Details:
   - Company Name: Linfox Logistics India Pvt Ltd
   - GST No: 33AABCL1515H1Z1
   - PAN No: NOT_AVAILABLE
   - Address: C/O Vinplex India Pvt.Ltd. 86, Maduravasal Village, Uthukottai Taluk, Penyapalayam, Tiruvallur, Tamilnadu
   - Phone Number: NOT_AVAILABLE
   - Email: NOT_AVAILABLE
5. Item Details:
   - 1. Description: VHC/MMDH/2122/000747, Quantity: 1, Price: 14000.00
6. Total Tax:
   - CGST: 0.00
   - SGST: 0.00
   - IGST: 0.00
7. Total Charges: NOT_AVAILABLE
8. Total Discount: NOT_AVAILABLE
9. Total Amount: 14000.00
10. Amount Paid: 14000.00
11. Amount Due: 0