In [52]:
from typing import List
from pydantic import BaseModel, Field
from langchain_community.utils.openai_functions import convert_pydantic_to_openai_function

from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI

In [53]:
from langchain_community.document_loaders import PyPDFLoader
from datetime import datetime
loader = PyPDFLoader("data\invoice_1item.pdf")
pages = loader.load_and_split()

  loader = PyPDFLoader("data\invoice_1item.pdf")


In [54]:
pages

[Document(metadata={'source': 'data\\invoice_1item.pdf', 'page': 0}, page_content='Tax Invoice/Bill of Supply/Cash Memo\n(Original for Recipient)\n*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located) \nCustomers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers \nPlease note that this invoice is not a demand for payment\nPage 1 of 1For ASHVINKUMAR BHAGVANBHAI GOVINDA:\nAuthorized Signatory\nOrder Number:  403-9087323-7735518 Invoice Number :  AMD2-28157\nOrder Date:  05.03.2023 Invoice Details :  GJ-AMD2-1369143095-2223\nInvoice Date :  05.03.2023\nSl.\nNoDescriptionUnit\nPriceQtyNet\nAmountTax\nRateTax\nTypeTax\nAmountTotal\nAmount\n1Jialto Adhesive Nails Wall Poster Non-Trace Stick Wall Hook, Adhesive\nHooks, , Heavy Duty Wall Hooks for Hanging, Durable Practical\nTransparent Strong Adhesive

In [55]:
class Extraction(BaseModel):
    """ Extract the entities from the invoice document"""
    transaction_id :str =Field(description="Payment Transaction ID of the invoice document file")
    amount : float = Field(description="Total Invoice value of the invoice document file")
    datetime_field: datetime = Field(description="The date and time (24-hour format) when the payment was made.")
    mode_of_payment :str=Field(description="Tells about the mode of payment the user proceeeded")

model = ChatOpenAI(temperature=0)
function = [convert_pydantic_to_openai_function(Extraction)]

In [56]:
prompt_template="""You are tasked with extracting specific fields from an invoice document. The fields you need to extract are:

1. Transaction ID
2. Amount
3. Date and Time (combined)
4. Mode of Payment

Please extract the following fields from the given invoice document. If any field cannot be extracted, set its value to `null`. 

Invoice Document :{Document}
"""


from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-3.5-turbo")
from langchain_core.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_template(prompt_template)
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
llm_model = (
      prompt_template
      | model.bind(functions=function) 
      | JsonOutputFunctionsParser() 
  )

In [57]:
llm_model.invoke({"Document":pages})

{'transaction_id': '1113eiI7qUaD9rDLu6KLV5g2z',
 'amount': 158.0,
 'datetime_field': '05/03/2023, 11:13:29',
 'mode_of_payment': 'GiftCard'}

In [58]:
class Product(BaseModel):
    """Information about a Product."""
    name: str = Field(description="Product Name of a product")
    age: float = Field(description="Total amount of the product")
    
class Extraction_with_products(BaseModel):
    """ Extract the entities from the invoice document"""
    transaction_id :str =Field(description="Payment Transaction ID of the invoice document file")
    amount : float = Field(description="Total Invoice value of the invoice document file")
    datetime_field: datetime = Field(description="The date and time (24-hour format) when the payment was made.")
    mode_of_payment :str=Field(description="Tells about the mode of payment the user proceeeded")
    Products:List[Product] =Field(description="Tells about the details of list of products")

model2 = ChatOpenAI(temperature=0)
function2 = [convert_pydantic_to_openai_function(Extraction_with_products)]

prompt_template1="""You are tasked with extracting specific fields from an invoice document. The fields you need to extract are:

1. Transaction ID
2. Amount
3. Date and Time (combined)
4. Mode of Payment
5. 5. List of Products (each product has a Name and Amount)

Please extract the following fields from the given invoice document. If any field cannot be extracted, set its value to `null`. 

Invoice Document :{Document}
"""

from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate
prompt_template1 = ChatPromptTemplate.from_template(prompt_template1)
llm_model2 = (
      prompt_template1
      | model2.bind(functions=function2) 
      | JsonOutputFunctionsParser() 
  )

llm_model2.invoke({"Document":pages})

{'transaction_id': '1113eiI7qUaD9rDLu6KLV5g2z',
 'amount': 158,
 'datetime_field': '05/03/2023, 11:13:29',
 'mode_of_payment': 'GiftCard',
 'Products': [{'name': 'Jialto Adhesive Nails Wall Poster Non-Trace Stick Wall Hook, Adhesive Hooks, , Heavy Duty Wall Hooks for Hanging, Durable Practical Transparent Strong Adhesive Screw Wall Nail 6 Pcs | B08X49Y75V (WB-GREO-8XLU )',
   'age': 158}]}