[Link to LangChain docs on multi-modal inputs](https://python.langchain.com/docs/how_to/multimodal_inputs/)

In [1]:
%pip install langchain langchain-community langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [2]:
from pydantic import BaseModel, constr, condecimal
from typing import List

class Item(BaseModel):
    quantity: int
    description: str
    price: condecimal(gt=0, decimal_places=2) # type: ignore
    subtotal: condecimal(gt=0, decimal_places=2) # type: ignore

class PaymentInfo(BaseModel):
    account_number: constr(min_length=9, max_length=9) # type: ignore
    routing_number: constr(min_length=9, max_length=9) # type: ignore

class Company(BaseModel):
    name: str
    address: str
    email: str
    phone: str
    website: str

class Invoice(BaseModel):
    number: str
    date: str
    client: str
    items: List[Item]
    total_due: condecimal(gt=0, decimal_places=2) # type: ignore
    due_by: str
    payment_info: PaymentInfo
    company: Company

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")
structured_llm = llm.with_structured_output(Invoice)

In [4]:
import base64

# read the image file
with open("files/invoice.png", "rb") as image_file:
    invoice_image = base64.b64encode(image_file.read()).decode('utf-8')

In [5]:
from langchain_core.messages import HumanMessage

messages = [HumanMessage(
    content=[{"type": "text", "text": "Parse this image."},
             {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{invoice_image}"}}],
)]
response = structured_llm.invoke(messages)
invoice = response

In [6]:
print(invoice)

number='12345' date='May 24th, 2024' client='Client Name' items=[Item(quantity=2, description='Blue large widgets', price=Decimal('15'), subtotal=Decimal('30')), Item(quantity=4, description='Green medium widgets', price=Decimal('10'), subtotal=Decimal('40')), Item(quantity=5, description='Red small widgets with logo', price=Decimal('7'), subtotal=Decimal('35'))] total_due=Decimal('105') due_by='May 30th, 2024' payment_info=PaymentInfo(account_number='123567744', routing_number='120000547') company=Company(name='Anvil Co', address='123 Main Street\nSan Francisco CA, 94103', email='hello@useanvil.com', phone='555 444 6666', website='useanvil.com')
