In [326]:
pip install pydantic

Note: you may need to restart the kernel to use updated packages.


In [135]:
pip install langchain

Note: you may need to restart the kernel to use updated packages.


In [136]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [499]:
### Imports ###
from pydantic import BaseModel, Field, field_validator
from typing import List
from enum import Enum
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chat_models import ChatOpenAI
import os
import openai
import json
import typing

In [400]:
'''
Defining a subitem to be used for the includedItems category in the Item object.
The includedItems fields consists of a list of SubItems and can be empty.
For example, a 'cheesburger' Item might have includedItems:['bun', 'patty', 'cheese']
'''
class SubItem(BaseModel):
    description: str

'''
An enumeration of the types of payment methods for the paymentType category in the ReceiptInfo object.
The default in ReceiptInfo is 'cash'.
'''   
class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'

'''
This object represents a single item (good/service) that was purchased in the receipt text.
'''
class Item(BaseModel):
    description: str=Field(description="item name")
    unabbreviatedDescription: str=Field(default="", description="unabbreviated name of field:description")
    includedItems: List[str]=Field(default_factory = list)
    quantity: int=Field(description="number of items")
    unitPrice: float=Field(description="cost per unit")
    totalPrice: float=Field(description="total cost of unit(s) purchased")
    discountAmount: float=Field(description="discount for item")
    
'''
This object represents the all of the information residing in one receipt text file.
Raw receipt text files are to be parsed into JSON object format for use in later analysis.
'''
class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(description="phone number")
    receiptDate: str=Field(description="purchase date")
    receiptTime: str=Field(description="time purchased")
    totalItems: int=Field(description="number of items")
    diningOptions: str=Field(default="", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(description="tax amount")
    total: float=Field(description="total amount paid")
    ITEMS: List[Item]
    
    @field_validator('paymentType', mode="before")
    def validate_paymentType(cls, paymentType: str) -> PaymentType:
        string = paymentType.lower()
        returnValue = PaymentType.CASH
        if 'credit' in string:
            returnValue = PaymentType.CREDIT
        elif 'debit' in string:
            returnValue = PaymentType.DEBIT
        return returnValue

In [417]:
def make_receiptParser():
    return PydanticOutputParser(pydantic_object=ReceiptInfo)

receiptParser = make_receiptParser()
receiptParser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"$defs": {"Item": {"properties": {"description": {"description": "item name", "title": "Description", "type": "string"}, "unabbreviatedDescription": {"default": "", "description": "unabbreviated name of field:description", "title": "Unabbreviateddescription", "type": "string"}, "includedItems": {"items": {"type": "string"}, "title": "Includeditems", "type": "array"}, "quantity": {"description": "number of items", "title": "Quantity", "type": "integer"}, "unitPrice": {"description": "cost per unit", "title": "Unitprice", "type": "numbe

In [402]:
example_1_input = '''H
MART <UNKNOWN>
http://www.hmart.com
458 Keawe st
Honolulu, <UNKNOWN> 96813
TEL <UNKNOWN>
Your Cashier was
SY RAMEN HOT MULTI
8.99 B
IND MI GORENG NOOD
4.99 B
HT BEEF DUMPLING D
19.99 B
NS SHRIMP HOT SNCK
1.99 B
ME AZUKI ICE CREAM
6.49 B
TAX
2.00
**** BALANCE
44.45
Discover Credit - C
ACCOUNT <UNKNOWN>
***********1153
APPROVAL CODE: 01669R
SEQUENCE NUMBER: 33790
No CVM
Amount:USD $44.45
CARD:Discover CREDIT XXXX1153 EMV
APPROVAL CODE 01669R
AID:A0000001523010
TVR: 0000008000
<UNKNOWN>
TSI: E800
APPLICATION CRYPTOGRAM A433DC30B1FE402
APPLICATION PREFERRED NAME:Discover Cr
APPLICATION LABEL:Discover
<UNKNOWN>
ARC:00
RespDate: 07152023
Resp <UNKNOWN> 201640
<UNKNOWN>
TOTAL AMOUNT: $44.45
RESPONSE CODE: APPROVED
07/15/23 08:16pm 103 2
Discover
44.45
CHANGE
0.00
TOTAL NUMBER OF ITEMS SOLD =
5
07/15/23 08:17pm 81 2 281 103
** RETURN POLICY **
Unused product can be exchanged or
refunded with receipt within 7 days of
purchase unless otherwise noted below
Meat, fish, produce and refrigerated
food items must be returned with
receipt within 24 hours.
Electronics and <UNKNOWN> Must be
returned with receipt within 14 days
of purchase and must be unused.
If you are unable to <UNKNOWN> item to
store. please call customer service.
00008100202812307152017
<UNKNOWN> Thank You and Please Come Again **'''

example_1_output='''{{"merchant":"H MART","address":"458 Keawe st","city":"Honolulu","state":"<UNKNOWN>","phoneNumber":"<UNKNOWN>","tax":2.00,"total":44.45,"receiptDate":"07/15/23","receiptTime":"08:16pm","totalItems":5,"paymentType":"credit","DiningOptions":"<UNKNOWN>","creditCardType":"Discover","totalDiscount":0.00,"ITEMS":[{{"description":"SY RAMEN HOT MULTI","unabbreviatedDescription":"Sy Ramen Hot Multi","includedItems":[],"quantity":1,"unitPrice":8.99,"totalPrice":8.99,"discountAmount":0.00}},{{"description":"IND MI GORENG NOOD","unabbreviatedDescription":"Ind Mi Goreng Noodle","includedItems":[],"quantity":1,"unitPrice":4.99,"totalPrice":4.99,"discountAmount":0.00}},{{"description":"HT BEEF DUMPLING D","unabbreviatedDescription":"Hot Beef Dumpling","includedItems":[],"quantity":1,"unitPrice":19.99,"totalPrice":19.99,"discountAmount":0.00}},{{"description":"NS SHRIMP HOT SNCK","unabbreviatedDescription":"Shrimp Hot Snack","includedItems":[],"quantity":1,"unitPrice":1.99,"totalPrice":1.99,"discountAmount":0.00}},{{"description":"ME AZUKI ICE CREAM","unabbreviatedDescription":"Azuki Ice Cream","includedItems":[],"quantity":1,"unitPrice":6.49,"totalPrice":6.49,"discountAmount":0.00}}]}}'''

In [403]:
example_2_input ='''<UNKNOWN> PURCHASE **
Panda Express #2150
Honclulu, HI
(808)956-7229
6/9/2023 11:02:50 AM
-TO GO-
Order: 260527
Server: LamTan T
1 Plate
10.60
FRIED RICE-1/2
FRIED <UNKNOWN>
STR BN CKN BRST
ORANGE CKN
1 XTRA ENTREE
1.50
VEG SPRING <UNKNOWN>
<UNKNOWN>
12.10
TAX
0.57
Total
12.67
Visa
12.67
Acct XXXXXXXX9212
<UNKNOWN> 060248
*Card details below
EMV: Contactless
APL: VISA DEBIT
AID: A0000000031010
Panda Rewards has arrived!
<UNKNOWN>
Sign Up at <UNKNOWN>
*
* Enter Unique Rewards Code to earn *
PANDA POINTS on this order!
*
*Code valid for 7 days from purchase*
Unique Rewards Code:
558844128417
WE'D LOVE TC HEAR FROM YOU!
Share your thoughts and receive
a Free Small A La Carte Entree
*
w/purchase of a 2-entree Plate.
*
<UNKNOWN> 2 days, go to
*
<UNKNOWN>
<UNKNOWN>
*
<UNKNOWN>
*
*
*
Survey Code:
<UNKNOWN>
*
*
Email <UNKNOWN> quired to receive coupon
*
*
*General Manager with $100K potential
Join the Panda team!
PandaCareers.com'''

example_2_output ='''{{"merchant":"Panda Express # 2150","address":"<UNKNOWN>","city":"Honclulu","state":"HI","phoneNumber":"(808)956-7229","tax":0.57,"total":12.67,"receiptDate":"6/9/2023","receiptTime":"11:02:50 AM","totalItems":2,"paymentType":"Debit","DiningOptions":"TO GO","creditCardType":"Visa","totalDiscount":0.00,"ITEMS":[{{"description":"Plate","unabbreviatedDescription":"Plate","includedItems": ["FRIED RICE-½", "FRIED <UNKNOWN>","STR BN CKN BREAST","ORANGE CKN"],"quantity":1,"unitPrice":10.60,"totalPrice":10.60,"discountAmount":0.00}},{{"description":"XTRA ENTREE","unabbreviatedDescription":"Extra Entree","includedItems":["VEG SPRING <UNKNOWN>"],"quantity":1,"unitPrice":1.50,"totalPrice":1.50,"discountAmount":0.00}}]}}'''

In [404]:
example_3_input='''Longs Drugs <UNKNOWN>
4211 WAIALAE AVE
HONOLULU, HI 96816
808.732.0781
REG#10 TRN#4352 CSHR#0000095 STR#9220
1 CFRIO SF PEG BAG 3Z 4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CR GYSR SPR WTR
33.8
1.29B
1
BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
1 CR GYSR SPR WTR
33.8
1.29B
1 BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
9 ITEMS
Survey ID #
4378 6100 0784 757 18
SUBTOTAL
16.47
HI 4.712% TAX
.77
TOTAL
17.24
CHARGE
17.24
************2130
RF
CHASE VISA
************2130
APPROVED# 04998D
REF# 103528
TRAN TYPE: SALE
AID: A0000000031010
TC: BD71734C41358DEE
TERMINAL# 84206407
NO SIGNATURE REQUIRED
CVM: 1F0000
<UNKNOWN> 0000000000
TSI(9B): 0000
CHANGE
.00
3509 2203 1844 3521 08
Returns with receipt, subject to
CVS Return Policy, thru 09/01/2023
Refund amount is based on price
after all coupons and discounts.
JULY 3, 2023
2:48 PM
<UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN>
<UNKNOWN>
GET YOUR CVS EXTRACARE CARD
We would love to hear your feedback
on your recent experience with us.
This survey will take only
1 minute to complete.
Share Your Feedback
<UNKNOWN>
Hablamos español
THANK YOU. SHOP 24 HOURS AT CVS.COM'''

example_3_output='''{{"merchant":"Longs Drugs","address":"4211 WAIALAE AVE","city":"HONOLULU","state":"HI","phoneNumber":"808.732.0781","tax":0.77,"total":17.24,"receiptDate":"JULY 3, 2023","receiptTime":"2:48 PM","totalItems":6,"paymentType":"Credit","DiningOptions":"None","creditCardType":"CHASE VISA","totalDiscount":0.00,"ITEMS":[{{"description":"CFRIO SF PEG BAG","unabbreviatedDescription":"CoffeeRio Peg Bag","includedItems":[],"quantity":3,"unitPrice":4.59,"totalPrice":13.77,"discountAmount":0.00}},{{"description": "CR GYSR SPR WTR","unabbreviatedDescription":"Crystal Geyser Spring Water","includedItems":[],"quantity":2,"unitPrice":1.29,"totalPrice":2.58,"discountAmount":0.00}},{{"description":"BOTTLE DEPOSIT","unabbreviatedDescription":"Bottle Deposit","includedItems":[],"quantity":2,"unitPrice":0.05,"totalPrice":0.10,"discountAmount":0.00}},{{"description":"HI CONTAINER FEE","unabbreviatedDescription":"HI Container Fee","includedItems":[],"quantity":2,"unitPrice":0.01,"totalPrice":0.02,"discountAmount":0.00}}]}}'''

In [405]:
example_4_input='''129
For question comments or concerns
Call McDonald's Hotline
800-683-5587
Now Delivering with
Door Dash
Survey Code:
14616-01290-70423-16249-00085-
McDonald's Restaurant #14616
3549 RUSSETT GREEN E (WM#1985)
MD
ANNE
<UNKNOWN> 20724
TEL# 301-7767980
Thank You Valued Customer
KS# 1
07/04/2023 04:24 PM
Sidel
Order 29
1 Happy Meal Ch Burger
4.39
1 Cheeseburger
NO Pickle
1 Extra Kids Fry
1 Apple Juice
1 S Apl Jc Surcharge
1 ELEMENTAL
1 S Grimace Bday Shake
3.69
1 S Shake Surcharge
Subtotal
8.08
Tax
0.48
Take-Out Total
8.56
Cashless
8.56
Change
0.00
MER# 464239
CARD ISSUER
ACCOUNT#
Visa SALE
<UNKNOWN>
TRANSACTION AMOUNT
8.56
CONTACTLESS
AUTHORIZATION CODE - 03172D
SEQ# 035443
AID: A0000000031010
Now Hiring
Text MD349 To 38000
Sign up for MyMcDonald's rewards
to earn points on future visits'''

example_4_output='''{{"merchant":"McDonald's Restaurant","address":"3549 RUSSETT GREEN E","city":"<UNKNOWN>","state":MD","phoneNumber":"800-683-5587","tax":0.48,"total":8.56,"receiptDate":"07/04/2023","receiptTime":"04:24 PM","totalItems":2,"paymentType":"CREDIT","DiningOptions":"Take-Out","creditCardType":"Visa","totalDiscount":0.00,"ITEMS":[{{"description":"Happy Meal Ch Burger","unabbreviatedDescription":"Happy Meal Cheese Burger","includedItems":["Cheeseburger","NO Pickle","Extra Kids Fry","Apple Juice","S Apl Jc Surcharge","ELEMENTAL"],"quantity":1,"unitPrice":4.39,"totalPrice":4.39,"discountAmount":0.00}},{{"description":"S Grimace Bday Shake","unabbreviatedDescription":"Grimace Birthday Shake","includedItems":["S Shake Surcharge"],"quantity":1,"unitPrice":3.69,"totalPrice":3.69,"discountAmount":0.00}}]}}'''

In [406]:
example_5_input='''Longs Drugs <UNKNOWN>
91-919 FORT WEAVER RD
EWA BEACH, HI 96706
808.689.5860
REG#01 TRN#8020 CSHR#2262192 STR#7356
Helped by: JEFFREY
ExtraCare Card <UNKNOWN>
1858
1 OREO ORIG DBL STFF 14.0
2.93B
ORIGINAL PRICE
6.99
2/9.00
2.49 -
COUPON SAVINGS
1.57 -
1 BAUD WAFER CHOCLT <UNKNOWN>
<UNKNOWN>
ORIGINAL PRICE
2.69
3/5.00
1.02 -
COUPON SAVINGS
.57 -
1 HWNISL TEABG GVGNS <UNKNOWN>
3.60B
ORIGINAL PRICE
5.49
COUPON SAVINGS
1.89 -
COUPONS APPLIED
1 $4 OFF YOUR PURCHASE
4.00 - CVS
1 2% BACK IN <UNKNOWN> R
.03 - CVS
3 ITEMS
SUBTOTAL
7.63
HI 4.712% TAX
.36
TOTAL
7.99
DEBIT
7.99
3476
CH
US DEBIT
3476
APPROVED# 003818
REF# 010205
TRAN <UNKNOWN> SALE
AID: A0000000042203
<UNKNOWN> <UNKNOWN>
TERMINAL# 84198694
PIN VERIFIED ONLINE
CVM: 420300
<UNKNOWN> 0000048000
TSI(9B): E800
CHANGE
00
Returns Return with 0200 12
3507 3563 1998
CVS Refund amount <UNKNOWN> thru subject to
after all coupons is based and discounts. 09/16/2023 on <UNKNOWN>
JULY 18, 2023
7:38 PM
<UNKNOWN> <UNKNOWN>'''

example_5_output='''{{"merchant":"Longs Drugs","address":"91-919 FORT WEAVER RD","city":"EWA BEACH","state":"HI","phoneNumber":"808.689.5860","tax":0.36,"total":7.99,"receiptDate":"JULY 18, 2023","receiptTime":"7:38 PM","totalItems":3,"paymentType":"DEBIT","DiningOptions":"None","creditCardType":"<UNKNOWN>","totalDiscount":7.54,"ITEMS":[{{"description":"OREO ORIG DBL STFF","unabbreviatedDescription":"Oreo Double Stuff","includedItems":[],"quantity":1,"unitPrice":6.99,"totalPrice":2.93,"discountAmount":4.06}},{{"description":"BAUD WAFER CHOCLT","unabbreviatedDescription":"Wafer Chocolate","includedItems":[],"quantity":1,"unitPrice":2.69,"totalPrice":1.10,"discountAmount":1.59}},{{"description":"HWNISL TEABG GVGNS","unabbreviatedDescription":"Teabag","includedItems":[],"quantity":1,"unitPrice":5.49,"totalPrice":3.60,"discountAmount":1.89}}]}}'''

In [407]:
prompt_prefix = '''You are a capable large language model. 
Your task is to extract data from a given receipt and format it into the JSON schema below. 
Use the default values if you're not sure. 
Try to infer a value for the fields: unabbreviatedDescription, totalItems.
The values for the fields: description and unnabbreviatedDescription can not be the same.
Text can be used for multiple fields. 
The field paymentType is an enumeration whose legal values are ["cash", "credit", "debit].
Use double-quotes for all string values.

{format_instructions}

'''

prompt_examples = [
    {"ExampleInput": example_1_input, "ExampleOutput": example_1_output},
    {"ExampleInput": example_2_input, "ExampleOutput": example_2_output},
    {"ExampleInput": example_3_input, "ExampleOutput": example_3_output},
    {"ExampleInput": example_4_input, "ExampleOutput": example_4_output},
    {"ExampleInput": example_5_input, "ExampleOutput": example_5_output},
]
example_prompt = PromptTemplate(
    input_variables=["ExampleInput", "ExampleOutput"],  
    template = "input:\n{ExampleInput}\noutput:\n{ExampleOutput}"
)
example_prompt.format(ExampleInput=example_5_input, ExampleOutput=example_5_output)

execute_fewshot_prompt = FewShotPromptTemplate(
    prefix = prompt_prefix,
    input_variables=["input"], 
    partial_variables={'format_instructions': receiptParser.get_format_instructions()},
    examples= prompt_examples,
    example_prompt = example_prompt,
    example_separator="\n",
    suffix = "input:\n{input}\noutput:\n"
)


In [408]:
data = {"input":
'''129
For question comments or concerns
Call McDonald's Hotline
800-683-5587
Now Delivering with
Door Dash
Survey Code:
14616-01290-70423-16249-00085-
McDonald's Restaurant #14616
3549 RUSSETT GREEN E (WM#1985)
MD
ANNE
<UNKNOWN> 20724
TEL# 301-7767980
Thank You Valued Customer
KS# 1
07/04/2023 04:24 PM
Sidel
Order 29
1 Happy Meal Ch Burger
4.39
1 Cheeseburger
NO Pickle
1 Extra Kids Fry
1 Apple Juice
1 S Apl Jc Surcharge
1 ELEMENTAL
1 S Grimace Bday Shake
3.69
1 S Shake Surcharge
Subtotal
8.08
Tax
0.48
Take-Out Total
8.56
Cashless
8.56
Change
0.00
MER# 464239
CARD ISSUER
ACCOUNT#
Visa SALE
<UNKNOWN>
TRANSACTION AMOUNT
8.56
CONTACTLESS
AUTHORIZATION CODE - 03172D
SEQ# 035443
AID: A0000000031010
Now Hiring
Text MD349 To 38000
Sign up for MyMcDonald's rewards
to earn points on future visits
'''}
print(data)

{'input': "129\nFor question comments or concerns\nCall McDonald's Hotline\n800-683-5587\nNow Delivering with\nDoor Dash\nSurvey Code:\n14616-01290-70423-16249-00085-\nMcDonald's Restaurant #14616\n3549 RUSSETT GREEN E (WM#1985)\nMD\nANNE\n<UNKNOWN> 20724\nTEL# 301-7767980\nThank You Valued Customer\nKS# 1\n07/04/2023 04:24 PM\nSidel\nOrder 29\n1 Happy Meal Ch Burger\n4.39\n1 Cheeseburger\nNO Pickle\n1 Extra Kids Fry\n1 Apple Juice\n1 S Apl Jc Surcharge\n1 ELEMENTAL\n1 S Grimace Bday Shake\n3.69\n1 S Shake Surcharge\nSubtotal\n8.08\nTax\n0.48\nTake-Out Total\n8.56\nCashless\n8.56\nChange\n0.00\nMER# 464239\nCARD ISSUER\nACCOUNT#\nVisa SALE\n<UNKNOWN>\nTRANSACTION AMOUNT\n8.56\nCONTACTLESS\nAUTHORIZATION CODE - 03172D\nSEQ# 035443\nAID: A0000000031010\nNow Hiring\nText MD349 To 38000\nSign up for MyMcDonald's rewards\nto earn points on future visits\n"}


In [409]:

model = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=1.00, openai_api_key="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")
chain = execute_fewshot_prompt | model | receiptParser

In [411]:
response = chain.invoke(data)

response

ReceiptInfo(merchant="McDonald's Restaurant", address='3549 RUSSETT GREEN E', city='<UNKNOWN>', state='MD', phoneNumber='800-683-5587', receiptDate='07/04/2023', receiptTime='04:24 PM', totalItems=2, diningOptions='', paymentType=<PaymentType.CREDIT: 'credit'>, creditCardType='Visa', totalDiscount=0.0, tax=0.48, total=8.56, ITEMS=[Item(description='Happy Meal Ch Burger', unabbreviatedDescription='Happy Meal Cheese Burger', includedItems=['Cheeseburger', 'NO Pickle', 'Extra Kids Fry', 'Apple Juice', 'S Apl Jc Surcharge', 'ELEMENTAL'], quantity=1, unitPrice=4.39, totalPrice=4.39, discountAmount=0.0), Item(description='S Grimace Bday Shake', unabbreviatedDescription='Grimace Birthday Shake', includedItems=['S Shake Surcharge'], quantity=1, unitPrice=3.69, totalPrice=3.69, discountAmount=0.0)])

In [412]:
response.model_dump_json()

'{"merchant":"McDonald\'s Restaurant","address":"3549 RUSSETT GREEN E","city":"<UNKNOWN>","state":"MD","phoneNumber":"800-683-5587","receiptDate":"07/04/2023","receiptTime":"04:24 PM","totalItems":2,"diningOptions":"","paymentType":"credit","creditCardType":"Visa","totalDiscount":0.0,"tax":0.48,"total":8.56,"ITEMS":[{"description":"Happy Meal Ch Burger","unabbreviatedDescription":"Happy Meal Cheese Burger","includedItems":["Cheeseburger","NO Pickle","Extra Kids Fry","Apple Juice","S Apl Jc Surcharge","ELEMENTAL"],"quantity":1,"unitPrice":4.39,"totalPrice":4.39,"discountAmount":0.0},{"description":"S Grimace Bday Shake","unabbreviatedDescription":"Grimace Birthday Shake","includedItems":["S Shake Surcharge"],"quantity":1,"unitPrice":3.69,"totalPrice":3.69,"discountAmount":0.0}]}'

In [413]:
json_objects = []
receipts_folder = "receipts/text"
for filename in os.listdir(receipts_folder):
    if filename.endswith('.txt'):
        with open(os.path.join(receipts_folder, filename)) as f:
            data = f.read()
            print(filename)
            response = chain.invoke({"input": data})
            json_objects.append(response.model_dump_json())

            
with open('json_objects', 'w') as fp:
    for item in json_objects:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')

7dc47e18-a003-45ad-8224-4bd488247e76.txt
50b1e424-ec8c-4179-af9c-0f9c651d0274.txt
4931a7bf-b584-4e3c-8501-f09f838cc946.txt
d3b0cffe-0eb7-43be-aa77-bcd7da1f0fa1.txt
2f4eb93a-8e56-4bef-bde2-5ca54cf88465.txt
e0c6abf4-c472-45a4-84d0-db008deb4ef6.txt
10c4a651-16c4-4721-99ac-a6d53e3f5656.txt
5ca657d1-3e7f-4c13-b9f7-d03e29677db9.txt
30ae33b7-518d-43af-a950-1d22ebef2b3c.txt
376aaf32-f8a8-4d53-b76d-5b3c3af73a64.txt
2a2b0b3f-13cf-4550-bdc4-c0b8251772ad.txt
6a0740b0-4490-423f-b795-4014d7562f40.txt
4c0726db-cd68-499a-9cf6-e52dd57baefa.txt
ab1c2056-0cd1-4e36-940c-fe7687f243a9.txt
Done


In [378]:
json_objects

['{"merchant":"Halal Gyro Kabob House","address":"240 EAST DELAWARE AVENUE","city":"NEWARK","state":"DE","phoneNumber":"4439937029","receiptDate":"06-Jul-2023","receiptTime":"7:57:49P","totalItems":1,"diningOptions":"","paymentType":"credit","creditCardType":"VISA","totalDiscount":0.0,"tax":0.0,"total":16.09,"ITEMS":[{"description":"#18. Lamb Salad","unabbreviatedDescription":"Lamb Salad","includedItems":[],"quantity":1,"unitPrice":13.99,"totalPrice":13.99,"discountAmount":0.0}]}',
 '{"merchant":"WHOLE FOODS MARKET","address":"388 Kamakee St Ste 100","city":"Honolulu","state":"HI","phoneNumber":"808-379-1800","receiptDate":"08/02/2023","receiptTime":"05:03 PM","totalItems":5,"diningOptions":"","paymentType":"credit","creditCardType":"VISA","totalDiscount":0.0,"tax":1.69,"total":37.46,"ITEMS":[{"description":"365WFM OG ITALIAN BAG","unabbreviatedDescription":"365 Whole Foods Market Organic Italian Bread","includedItems":[],"quantity":1,"unitPrice":4.19,"totalPrice":4.19,"discountAmount"

In [323]:
ReceiptInfo(BaseModel=json.loads(response.content))

ValidationError: 11 validation errors for ReceiptInfo
merchant
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
address
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
city
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
state
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
phoneNumber
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
receiptDate
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
receiptTime
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
totalItems
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
tax
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
total
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
ITEMS
  Field required [type=missing, input_value={'BaseModel': {'ReceiptIn...iscountAmount': 0.0}]}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing

In [207]:
def make_model(model="gpt-3.5-turbo", temperature=1.00, openai_api_key="INSERT OPENAI API KEY"):
    return ChatOpenAIChatOpenAI(model=model, temperature=temperature, openai_api_key=openai_api_key)

In [208]:
def make_chain(fewshot_prompt, model, receiptParser):
    chain = execute_fewshot_prompt | model | receiptParser
    return chain

In [120]:
### Imports ###
from pydantic import BaseModel, Field
from typing import List
from enum import Enum
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chat_models import ChatOpenAI
import os
import openai

'''
Defining a subitem to be used for the includedItems category in the Item object.
The includedItems fields consists of a list of SubItems and can be empty.
For example, a 'cheesburger' Item might have includedItems:['bun', 'patty', 'cheese']
'''
class SubItem(BaseModel):
    description: str

'''
An enumeration of the types of payment methods for the paymentType category in the ReceiptInfo object.
The default in ReceiptInfo is 'cash'.
'''   
class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'

'''
This object represents a single item (good/service) that was purchased in the receipt text.
'''
class Item(BaseModel):
    description: str=Field(description="item name")
    unabbreviatedDescription: str=Field(default="", description="unabbreviated name of field:description")
    includedItems: List[SubItem]=Field(default_factory=list)
    quantity: int=Field(description="number of items")
    unitPrice: float=Field(description="cost per unit")
    totalPrice: float=Field(description="total cost of unit(s) purchased")
    discountAmount: float=Field(description="discount for item")
    
'''
This object represents the all of the information residing in one receipt text file.
Raw receipt text files are to be parsed into JSON object format for use in later analysis.
'''
class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(description="phone number")
    receiptDate: str=Field(description="purchase date")
    receiptTime: str=Field(description="time purchased")
    totalItems: int=Field(description="number of items")
    diningOptions: str=Field(default="", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(description="tax amount")
    total: float=Field(description="total amount paid")
    ITEMS: List[Item]

def make_receiptParser():
    return PydanticOutputParser(pydantic_object=ReceiptInfo)

def get_prompt_prefix():
    return '''You are a capable large language model. Your task is to extract data from a given receipt and format it into the JSON schema below. Use the default values if you're not sure. Try to infer a value for the field: unabbreviatedDescription. The values for the fields: description and unnabbreviatedDescription can not be the same. Text can be used for multiple fields.
    
    {format_instructions}
    
    '''

def get_example_prompt(input_variables=["ExampleInput", "ExampleOutput"], template= "input:\n{ExampleInput}\noutput:\n{ExampleOutput}"):
    return (PromptTemplate(input_variables = input_variables, template = template))

def get_suffix():
    return "input:\n{input}\noutput:\n"

def make_fewshot_prompt(receiptParser):
    return (FewShotPromptTemplate(
    prefix = get_prompt_prefix(),
    input_variables=["input"], 
    partial_variables={'format_instructions': receiptParser().get_format_instructions()},
    examples=get_prompt_examples(),
    example_prompt = get_example_prompt(),
    example_separator="\n",
    suffix = get_suffix(),
    ))

def make_model(model="gpt-3.5-turbo", temperature=1.00, openai_api_key="INSERT OPENAI API KEY"):
    return ChatOpenAIChatOpenAI(model=model, temperature=temperature, openai_api_key=openai_api_key)

def make_chain(fewshot_prompt, model, receiptParser):
    chain = fewshot_prompt | model | receiptParser
    return chain

In [159]:
receipts_folder = "receipts/text"
files = os.listdir(receipts_folder)
print(files)
index = 0
for filename in os.listdir(receipts_folder):
    if filename.endswith('.txt') and index <= 1:
        with open(os.path.join(receipts_folder, filename)) as f:
            print(f.read())
            index += 1

['7dc47e18-a003-45ad-8224-4bd488247e76.txt', '50b1e424-ec8c-4179-af9c-0f9c651d0274.txt', '4931a7bf-b584-4e3c-8501-f09f838cc946.txt', 'd3b0cffe-0eb7-43be-aa77-bcd7da1f0fa1.txt', '2f4eb93a-8e56-4bef-bde2-5ca54cf88465.txt', 'e0c6abf4-c472-45a4-84d0-db008deb4ef6.txt', '10c4a651-16c4-4721-99ac-a6d53e3f5656.txt', '5ca657d1-3e7f-4c13-b9f7-d03e29677db9.txt', '30ae33b7-518d-43af-a950-1d22ebef2b3c.txt', '376aaf32-f8a8-4d53-b76d-5b3c3af73a64.txt', '2a2b0b3f-13cf-4550-bdc4-c0b8251772ad.txt', '.ipynb_checkpoints', '6a0740b0-4490-423f-b795-4014d7562f40.txt', '4c0726db-cd68-499a-9cf6-e52dd57baefa.txt', 'ab1c2056-0cd1-4e36-940c-fe7687f243a9.txt']
Halal Gyro Kabob House
240 EAST DELAWARE AVENUE
NEWARK, DE 19711
4439937029
https://www.halalgyrokabobhouse.com
ORDER: 77
For Here
Cashier: Employee
06-Jul-2023 7:57:49P
Transaction 244806
1
#18. Lamb Salad
$13.99
Total
$13.99
Tip
$2.10
CREDIT CARD AUTH
$16.09
VISA 2130
Retain this copy for statement validation
06-Jul-2023 7:57:59P
$16.09 <UNKNOWN> Method: CO

In [262]:
from prompt_examples import get_prompt_examples

In [263]:
get_prompt_examples()

[{'ExampleInput': 'H\nMART <UNKNOWN>\nhttp://www.hmart.com\n458 Keawe st\nHonolulu, <UNKNOWN> 96813\nTEL <UNKNOWN>\nYour Cashier was\nSY RAMEN HOT MULTI\n8.99 B\nIND MI GORENG NOOD\n4.99 B\nHT BEEF DUMPLING D\n19.99 B\nNS SHRIMP HOT SNCK\n1.99 B\nME AZUKI ICE CREAM\n6.49 B\nTAX\n2.00\n**** BALANCE\n44.45\nDiscover Credit - C\nACCOUNT <UNKNOWN>\n***********1153\nAPPROVAL CODE: 01669R\nSEQUENCE NUMBER: 33790\nNo CVM\nAmount:USD $44.45\nCARD:Discover CREDIT XXXX1153 EMV\nAPPROVAL CODE 01669R\nAID:A0000001523010\nTVR: 0000008000\n<UNKNOWN>\nTSI: E800\nAPPLICATION CRYPTOGRAM A433DC30B1FE402\nAPPLICATION PREFERRED NAME:Discover Cr\nAPPLICATION LABEL:Discover\n<UNKNOWN>\nARC:00\nRespDate: 07152023\nResp <UNKNOWN> 201640\n<UNKNOWN>\nTOTAL AMOUNT: $44.45\nRESPONSE CODE: APPROVED\n07/15/23 08:16pm 103 2\nDiscover\n44.45\nCHANGE\n0.00\nTOTAL NUMBER OF ITEMS SOLD =\n5\n07/15/23 08:17pm 81 2 281 103\n** RETURN POLICY **\nUnused product can be exchanged or\nrefunded with receipt within 7 days of\npu

In [418]:
string='''{"merchant":"Foodland","address":"1450 Ala Moana Blvd, Suite 8000","city":"Honolulu","state":"HI","phoneNumber":"808-949-5044","tax":0.46,"total":10.14,"receiptDate":"10/15/23","receiptTime":"09:04pm","totalItems":2,"paymentType":"credit","DiningOptions":"None","creditCardType":"Discover","totalDiscount":0.00,"ITEMS":[{"description":"FOUNTAIN DRNK","unabbreviatedDescription":"Fountain Drink","includedItems":[],"quantity":1,"unitPrice":1.69,"totalPrice":1.69,"discountAmount":0.00},{"description":"AMB BENTO SPCY CKN","unabbreviatedDescription":"Amb Bento Spicy Chicken","includedItems":[],"quantity":1,"unitPrice":<UNKNOWN>,"totalPrice":<UNKNOWN>,"discountAmount":0.00}]}'''
string[600:620]

'ty":1,"unitPrice":<U'

In [440]:
string = "<UNKNOWN>10.0"
string.split()

['<UNKNOWN>10.0']

In [439]:
try:
    for token in string.split():
        try:
            # if this succeeds, you have your (first) float
            print(float(token), "is a float")
        except ValueError:
            print(token, "is something else")
except:
    print(0)

<UNKNOWN>10.0 is something else


In [461]:
string = "DINE".lower()
for_here_terms = ['for', 'here', 'dine', 'in', 'house', 'on', 'stay']
to_go_terms = ['take', 'out', 'carry', 'to', 'go', 'pick', 'up', 'delivery', 'grab', 'away']
[string.__contains__(term) for term in for_here_terms]

[False, False, True, True, False, False, False]

In [462]:
sum([string.__contains__(term) for term in to_go_terms])

0

In [463]:
[string.__contains__(term) for term in for_here_terms]

[False, False, True, True, False, False, False]

In [466]:
returnValue = ''
dine_in_terms = ['for', 'here', 'dine', 'in', 'house', 'on']
to_go_terms = ['take', 'out', 'carry', 'to', 'go', 'pick', 'up', 'delivery', 'grab', 'away']
dine_in_score = sum([string.__contains__(term) for term in dine_in_terms])
to_go_score = sum([string.__contains__(term) for term in to_go_terms])
if (dine_in_score > to_go_score):
    returnValue = 'DINE IN'
elif (dine_in_score < to_go_score):
    returnValue = 'TO GO'
elif ((dine_in_score != 0) and (to_go_score != 0) and (dine_in_score == to_go_score)):
    returnValue = 'TO GO'

In [467]:
returnValue

'DINE IN'

In [502]:
'''
An enumeration of the types of payment methods for the paymentType category in the ReceiptInfo object.
The default in ReceiptInfo is 'cash'.
'''   
class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'

'''
This object represents a single item (good/service) that was purchased in the receipt text.
'''
class Item(BaseModel):
    description: str=Field(description="item name")
    unabbreviatedDescription: str=Field(default="", description="unabbreviated name of field:description")
    includedItems: List[str]=Field(default_factory=list)
    quantity: int=Field(default=0, description="number of items")
    unitPrice: float=Field(default=0.00, description="cost per unit")
    totalPrice: float=Field(deafult=0.00, description="total cost of unit(s) purchased")
    discountAmount: float=Field(default=0.00, description="discount for item")
    
    @field_validator('unitPrice', mode='before')
    @classmethod
    def validate_paymentType(cls, unitPrice: typing.Any) -> float:
        returnValue = 0.00
        if (isinstance(unitPrice, str)):
            try:
                returnValue = (float(string))
            except:
                returnValue = 0.00
        elif (isinstance(unitPrice, int)):
            returnValue = float(unitPrice)
        elif (isinstance(unitPrice, float)):
            returnValue = unitPrice
        return returnValue

    @field_validator('totalPrice', mode='before')
    @classmethod
    def validate_paymentType(cls, totalPrice: typing.Any) -> float:
        returnValue = 0.00
        if (isinstance(totalPrice, str)):
            try:
                returnValue = (float(string))
            except:
                returnValue = 0.00
        elif (isinstance(totalPrice, float)):
            returnValue = int(totalPrice)
        elif (isinstance(totalPrice, int)):
            returnValue = totalPrice
        return returnValue
    
    @field_validator('quantity', mode='before')
    @classmethod
    def validate_paymentType(cls, quantity: typing.Any) -> int:
        returnValue = 0
        if (isinstance(quantity, str)):
            try:
                returnValue = math.ceil(float(string))
            except:
                returnValue = 0
        elif (isinstance(quantity, int)):
            returnValue = quantity
        elif (isinstance(quantity, float)):
            returnValue = math.ceil(quantity)
        else:
            returnValue = 0
        return returnValue

    
'''
This object represents the all of the information residing in one receipt text file.
Raw receipt text files are to be parsed into JSON object format for use in later analysis.
'''
class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(description="phone number")
    receiptDate: str=Field(description="purchase date")
    receiptTime: str=Field(description="time purchased")
    totalItems: int=Field(description="number of items")
    diningOptions: str=Field(default="", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(description="tax amount")
    total: float=Field(description="total amount paid")
    ITEMS: List[Item]
    
    @field_validator('paymentType', mode='before')
    def validate_paymentType(cls, paymentType: str) -> PaymentType:
        string = paymentType.lower()
        returnValue = PaymentType.CASH
        if 'credit' in string:
            returnValue = PaymentType.CREDIT
        elif 'debit' in string:
            returnValue = PaymentType.DEBIT
        return returnValue
    
    @field_validator('diningOptions', mode='before')
    @classmethod
    def validate_diningOptions(cls, diningOptions: str) -> str:
        string = diningOptions.lower()
        returnValue = ''
        dine_in_terms = ['for', 'here', 'dine', 'in', 'house', 'on']
        to_go_terms = ['take', 'out', 'carry', 'to', 'go', 'pick', 'up', 'delivery', 'grab', 'away']
        dine_in_score = sum([string.__contains__(term) for term in dine_in_terms])
        to_go_score = sum([string.__contains__(term) for term in to_go_terms])
        if (dine_in_score > to_go_score):
            returnValue = 'DINE IN'
        elif (dine_in_score < to_go_score):
            returnValue = 'TO GO'
        elif ((dine_in_score != 0) and (to_go_score != 0) and (dine_in_score == to_go_score)):
            returnValue = 'TO GO'
        return returnValue
    
    # @field_validator('merchant', 'address', 'city', 'state', 'phoneNumber','receiptDate', 'receiptTime', mode='before')
    # @classmethod
    # def escape_double_quotes(cls, input_val: str) -> str:
    #     return input_val.replace('"', '\\"')
        


In [508]:
json_string = '''{"merchant":"GAP OUTLET","address":"20 CITY <UNKNOWN> SPACE 1560","city":"ORANGE","state":"CA","phoneNumber":"(714) 938-9970","receiptDate":"03/15/2023","receiptTime":"02:20:54 PM","totalItems":3,"diningOptions":"","paymentType":"cash","creditCardType":"<UNKNOWN>","totalDiscount":17.8,"tax":3.04,"total":42.21,"ITEMS":[{"description":"GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt","unabbreviatedDescription":"GapKids Disney Mickey Mouse Graphic T-Shirt","includedItems":[],"quantity":1,"unitPrice":"<UNKNOWN>","totalPrice":"<UNKNOWN>","discountAmount":7.5},{"description":"GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt","unabbreviatedDescription":"GapKids Disney Mickey Mouse Graphic T-Shirt","includedItems":[],"quantity":1,"unitPrice":24.99,"totalPrice":17.49,"discountAmount":7.5},{"description":"Gap <UNKNOWN> Star Wars 4\" Boxers","unabbreviatedDescription":"Gap Star Wars 4\" Boxers","includedItems":[],"quantity":1,"unitPrice":6.99,"totalPrice":4.19,"discountAmount":2.8}]}'''
print(json_string[830:850])

json_obj = json.loads(json_string.replace("Star Wars 4\" Boxer", "Star Wars 4\\\" Boxer"))
print(json_obj)
dictionary = {"merchant": 'GAP OUTLET', 'address': '20 CITY <UNKNOWN> SPACE 1560', 'city': 'ORANGE', 'state': 'CA', 'phoneNumber': '(714) 938-9970', 'receiptDate': '03/15/2023', 'receiptTime': '02:20:54 PM', 'totalItems': 3, 'diningOptions': '', 'paymentType': 'cash', 'creditCardType': '<UNKNOWN>', 'totalDiscount': 17.8, 'tax': 3.04, 'total': 42.21, 'ITEMS': [{'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt', 'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt', 'includedItems': [], 'quantity': 1, 'unitPrice': '<UNKNOWN>', 'totalPrice': '<UNKNOWN>', 'discountAmount': 7.5}, {'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt', 'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt', 'includedItems': [], 'quantity': 1, 'unitPrice': 24.99, 'totalPrice': 17.49, 'discountAmount': 7.5}, {'description': 'Gap <UNKNOWN> Star Wars 4" Boxers', 'unabbreviatedDescription': 'Gap Star Wars 4" Boxers', 'includedItems': [], 'quantity': 1, 'unitPrice': 6.99, 'totalPrice': 4.19, 'discountAmount': 2.8}]}
dictionary
#ReceiptInfo(BaseModel = json_obj)

> Star Wars 4" Boxer
{'merchant': 'GAP OUTLET', 'address': '20 CITY <UNKNOWN> SPACE 1560', 'city': 'ORANGE', 'state': 'CA', 'phoneNumber': '(714) 938-9970', 'receiptDate': '03/15/2023', 'receiptTime': '02:20:54 PM', 'totalItems': 3, 'diningOptions': '', 'paymentType': 'cash', 'creditCardType': '<UNKNOWN>', 'totalDiscount': 17.8, 'tax': 3.04, 'total': 42.21, 'ITEMS': [{'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt', 'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt', 'includedItems': [], 'quantity': 1, 'unitPrice': '<UNKNOWN>', 'totalPrice': '<UNKNOWN>', 'discountAmount': 7.5}, {'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt', 'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt', 'includedItems': [], 'quantity': 1, 'unitPrice': 24.99, 'totalPrice': 17.49, 'discountAmount': 7.5}, {'description': 'Gap <UNKNOWN> Star Wars 4" Boxers', 'unabbreviatedDescription': 'Gap Star Wars 4" Boxers', 'includedIte

{'merchant': 'GAP OUTLET',
 'address': '20 CITY <UNKNOWN> SPACE 1560',
 'city': 'ORANGE',
 'state': 'CA',
 'phoneNumber': '(714) 938-9970',
 'receiptDate': '03/15/2023',
 'receiptTime': '02:20:54 PM',
 'totalItems': 3,
 'diningOptions': '',
 'paymentType': 'cash',
 'creditCardType': '<UNKNOWN>',
 'totalDiscount': 17.8,
 'tax': 3.04,
 'total': 42.21,
 'ITEMS': [{'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt',
   'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt',
   'includedItems': [],
   'quantity': 1,
   'unitPrice': '<UNKNOWN>',
   'totalPrice': '<UNKNOWN>',
   'discountAmount': 7.5},
  {'description': 'GapKids <UNKNOWN> Disney Mickey Mouse Graphic T-Shirt',
   'unabbreviatedDescription': 'GapKids Disney Mickey Mouse Graphic T-Shirt',
   'includedItems': [],
   'quantity': 1,
   'unitPrice': 24.99,
   'totalPrice': 17.49,
   'discountAmount': 7.5},
  {'description': 'Gap <UNKNOWN> Star Wars 4" Boxers',
   'unabbreviatedDescription': 'Ga

In [None]:
example_3_output='''{{"merchant":"Longs Drugs","address":"4211 WAIALAE AVE","city":"HONOLULU","state":"HI","phoneNumber":"808.732.0781","tax":0.77,"total":17.24,"receiptDate":"JULY 3, 2023","receiptTime":"2:48 PM","totalItems":6,"paymentType":"Credit","DiningOptions":"None","creditCardType":"CHASE VISA","totalDiscount":0.00,"ITEMS":[{{"description":"CFRIO SF PEG BAG","unabbreviatedDescription":"CoffeeRio Peg Bag","includedItems":[],"quantity":3,"unitPrice":4.59,"totalPrice":13.77,"discountAmount":0.00}},{{"description": "CR GYSR SPR WTR","unabbreviatedDescription":"Crystal Geyser Spring Water","includedItems":[],"quantity":2,"unitPrice":1.29,"totalPrice":2.58,"discountAmount":0.00}},{{"description":"BOTTLE DEPOSIT","unabbreviatedDescription":"Bottle Deposit","includedItems":[],"quantity":2,"unitPrice":0.05,"totalPrice":0.10,"discountAmount":0.00}},{{"description":"HI CONTAINER FEE","unabbreviatedDescription":"HI Container Fee","includedItems":[],"quantity":2,"unitPrice":0.01,"totalPrice":0.02,"discountAmount":0.00}}]}}'''

In [509]:
?field_validator

[0;31mSignature:[0m
[0mfield_validator[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0m__field[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0mfields[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmode[0m[0;34m:[0m [0;34m'FieldValidatorModes'[0m [0;34m=[0m [0;34m'after'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcheck_fields[0m[0;34m:[0m [0;34m'bool | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'Callable[[Any], Any]'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Usage docs: https://docs.pydantic.dev/2.5/concepts/validators/#field-validators

Decorate methods on the class indicating that they should be used to validate fields.

Example usage:
```py
from typing import Any

from pydantic import (
    BaseModel,
    ValidationError,
    field_validator,
)

class Model(BaseModel):
    a: str

    @field_validator('a')
    @classmethod
    de