In [1]:
pip install pydantic

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install langchain

Note: you may need to restart the kernel to use updated packages.


In [3]:
from pydantic import BaseModel, Field
from typing import List
from enum import Enum

class SubItem(BaseModel):
    description: str

class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'
    
class Item(BaseModel):
    description: str=Field(default="<UNKNOWN>", description="name")
    includedItems: List[SubItem]=Field(default_factory=list)
    quantity: int=Field(default=0, description="number of items")
    unitPrice: float=Field(default=0.00, description="cost per unit")
    totalPrice: float=Field(default=0.00, description="total cost of unit(s) purchased")
    discountAmount: float=Field(default=0.00, description="discount for item")

class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(default="<UNKNOWN>", description="phone number")
    receiptDate: str=Field(default="1/01/1991", description="purchase date")
    receiptTime: str=Field(default="00:00:00", description="time purchased")
    totalItems: int=Field(default=0, description="number of items")
    diningOptions: str=Field(default="None", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(default=0.00, description="tax amount")
    total: float=Field(default=0.00, description="total amount paid")
    ITEMS: List[Item]
    
# As ITEMS is the only category in any of the classes that doesn't have a default value, it is the only category that is REQUIRED

In [4]:
from langchain.output_parsers import PydanticOutputParser

receiptParser = PydanticOutputParser(pydantic_object=ReceiptInfo)
print(receiptParser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Item": {"properties": {"description": {"default": "<UNKNOWN>", "description": "the name of the item", "title": "Description", "type": "string"}, "includedItems": {"items": {"$ref": "#/$defs/SubItem"}, "title": "Includeditems", "type": "array"}, "quantity": {"default": 0, "description": "how many of this item was purchased", "title": "Quantity", "type": "integer"}, "unitPrice": {"default": 0.0, "description": "the cost of one unit", "title": "Unitprice", "type": "number"}, "totalPrice": {"default": 0.0, "description": "the total cos

In [5]:
item_1 = Item(description="PEDANTIC PEAS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)
item_2 = Item(description="CRAZY CARROTS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)

receiptInfo = ReceiptInfo(merchant="Walmart", address="123 Manoa Rd", city="Honolulu", state="HI", phoneNumber="1234567890",
                          receiptDate="01/01/1999", receiptTime="12:00am", totalItems=2, paymentType="credit",
                          creditCardType="visa", tax=1.00, total=10.00, totalDiscount=0.01, ITEMS=[item_1, item_2])
print(receiptInfo)

merchant='Walmart' address='123 Manoa Rd' city='Honolulu' state='HI' phoneNumber='1234567890' receiptDate='01/01/1999' receiptTime='12:00am' totalItems=2 diningOptions='None' paymentType=<PaymentType.CREDIT: 'credit'> creditCardType='visa' tax=1.0 total=10.0 totalDiscount=0.01 ITEMS=[Item(description='PEDANTIC PEAS', includedItems=[], quantity=1, unitPrice=5.0, totalPrice=5.0, discountAmount=0.0), Item(description='CRAZY CARROTS', includedItems=[], quantity=1, unitPrice=5.0, totalPrice=5.0, discountAmount=0.0)]
