In [1]:
pip install pydantic

Collecting pydantic
  Downloading pydantic-2.5.2-py3-none-any.whl (381 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m381.9/381.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting typing-extensions>=4.6.1
  Downloading typing_extensions-4.8.0-py3-none-any.whl (31 kB)
Collecting pydantic-core==2.14.5
  Downloading pydantic_core-2.14.5-cp39-cp39-macosx_10_7_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting annotated-types>=0.4.0
  Downloading annotated_types-0.6.0-py3-none-any.whl (12 kB)
Installing collected packages: typing-extensions, annotated-types, pydantic-core, pydantic
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.3.0
    Uninstalling typing_extensions-4.3.0:
      Successfully uninstalled typing_extensions-4.3.0
Successfully installed annotated-types-0.6.0 p

In [2]:
pip install langchain

Collecting langchain
  Downloading langchain-0.0.341-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m0m
Collecting tenacity<9.0.0,>=8.1.0
  Downloading tenacity-8.2.3-py3-none-any.whl (24 kB)
Collecting dataclasses-json<0.7,>=0.5.7
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting langsmith<0.1.0,>=0.0.63
  Downloading langsmith-0.0.67-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-core<0.0.7,>=0.0.6
  Downloading langchain_core-0.0.6-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.2/174.2 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
Collecting typing-inspect<1,>=0.4.0
  Downloading typing_inspect-0.9.

In [6]:
from pydantic import BaseModel, Field
from typing import List
from enum import Enum

class SubItem(BaseModel):
    description: str

class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'
    
class Item(BaseModel):
    description: str=Field(default="<UNKNOWN>", description="name")
    predictedDescription: str=Field(default="<UNKNOWN>", description="name")
    includedItems: List[SubItem]=Field(default_factory=list)
    quantity: int=Field(default=0, description="number of items")
    unitPrice: float=Field(default=0.00, description="cost per unit")
    totalPrice: float=Field(default=0.00, description="total cost of unit(s) purchased")
    discountAmount: float=Field(default=0.00, description="discount for item")

class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(default="<UNKNOWN>", description="phone number")
    receiptDate: str=Field(default="1/01/1991", description="purchase date")
    receiptTime: str=Field(default="00:00:00", description="time purchased")
    totalItems: int=Field(default=0, description="number of items")
    diningOptions: str=Field(default="None", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(default=0.00, description="tax amount")
    total: float=Field(default=0.00, description="total amount paid")
    ITEMS: List[Item]
    
# As ITEMS is the only category in any of the classes that doesn't have a default value, it is the only category that is REQUIRED

In [7]:
from langchain.output_parsers import PydanticOutputParser

receiptParser = PydanticOutputParser(pydantic_object=ReceiptInfo)
print(receiptParser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Item": {"properties": {"description": {"default": "<UNKNOWN>", "description": "name", "title": "Description", "type": "string"}, "predictedDescription": {"default": "<UNKNOWN>", "description": "name", "title": "Predicteddescription", "type": "string"}, "includedItems": {"items": {"$ref": "#/$defs/SubItem"}, "title": "Includeditems", "type": "array"}, "quantity": {"default": 0, "description": "number of items", "title": "Quantity", "type": "integer"}, "unitPrice": {"default": 0.0, "description": "cost per unit", "title": "Unitprice"

In [8]:
item_1 = Item(description="PEDANTIC PEAS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)
item_2 = Item(description="CRAZY CARROTS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)

receiptInfo = ReceiptInfo(merchant="Walmart", address="123 Manoa Rd", city="Honolulu", state="HI", phoneNumber="1234567890",
                          receiptDate="01/01/1999", receiptTime="12:00am", totalItems=2, paymentType="credit",
                          creditCardType="visa", tax=1.00, total=10.00, totalDiscount=0.01, ITEMS=[item_1, item_2])
print(receiptInfo)

merchant='Walmart' address='123 Manoa Rd' city='Honolulu' state='HI' phoneNumber='1234567890' receiptDate='01/01/1999' receiptTime='12:00am' totalItems=2 diningOptions='None' paymentType=<PaymentType.CREDIT: 'credit'> creditCardType='visa' totalDiscount=0.01 tax=1.0 total=10.0 ITEMS=[Item(description='PEDANTIC PEAS', predictedDescription='<UNKNOWN>', includedItems=[], quantity=1, unitPrice=5.0, totalPrice=5.0, discountAmount=0.0), Item(description='CRAZY CARROTS', predictedDescription='<UNKNOWN>', includedItems=[], quantity=1, unitPrice=5.0, totalPrice=5.0, discountAmount=0.0)]
