In [7]:
!pip install openai
!pip install langchain
!pip install langchain_community
!pip install langchain_openai
!pip install tiktoken
!pip install pydantic
!pip install python-dotenv

In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI

# loading from a .env file
# load_dotenv(dotenv_path="/full/path/to/your/.env")

# or 
# if you're on google colab just uncomment below and replace with your openai api key
# os.environ["OPENAI_API_KEY"] = "<your-openai-api-key>"

In [2]:
from pydantic import BaseModel, Field
import openai
import os
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain_openai.chat_models import ChatOpenAI

class InvoiceData(BaseModel):
    """Call this function to extract invoice data with specific output structure"""
    invoice_number: str = Field(..., description="The invoice number extracted from the PDF")
    vat_registration_number: str = Field(..., description="The VAT registration number extracted from the PDF")

fake_invoice_data = """
Invoice Summary
==============
Invoice No: INV-123456
Date: 2024-02-11
Billing To: ACME Corporation
Address: 123 Business Rd, Business City, BC1234

Product Description       Quantity    Unit Price    Total
-----------------------------------------------------------
Widget A                  10          $15.00        $150.00
Gadget B                  5           $20.00        $100.00

Subtotal:                                          $250.00
Tax (10%):                                          $25.00
Total:                                             $275.00

VAT Registration Number: VAT-7890123

Thank you for your business!

"""


invoice_extractor = convert_pydantic_to_openai_function(InvoiceData)


llm_chat = ChatOpenAI()

output_invoice = llm_chat.invoke(f"Invoice: {fake_invoice_data}. Extracted data:\n",functions=[invoice_extractor])

In [6]:
output_invoice.additional_kwargs["function_call"]

{'arguments': '{"invoice_number":"INV-123456","vat_registration_number":"VAT-7890123"}',
 'name': 'InvoiceData'}

AIMessage(content='', additional_kwargs={'function_call': {'name': 'InvoiceData', 'arguments': '{\n  "invoice_number": "INV-123456",\n  "vat_registration_number": "VAT-7890123"\n}'}})