In [87]:
pip install pydantic

Note: you may need to restart the kernel to use updated packages.


In [88]:
pip install langchain

Note: you may need to restart the kernel to use updated packages.


In [89]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [90]:
### Imports ###
from pydantic import BaseModel, Field
from typing import List
from enum import Enum
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chat_models import ChatOpenAI
import os
import openai

In [92]:
'''
Defining a subitem to be used for the includedItems category in the Item object.
The includedItems fields consists of a list of SubItems and can be empty.
For example, a 'cheesburger' Item might have includedItems:['bun', 'patty', 'cheese']
'''
class SubItem(BaseModel):
    description: str

'''
An enumeration of the types of payment methods for the paymentType category in the ReceiptInfo object.
The default in ReceiptInfo is 'cash'.
'''   
class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'

'''
This object represents a single item (good/service) that was purchased in the receipt text.
'''
class Item(BaseModel):
    description: str=Field(description="item name")
    unabbreviatedDescription: str=Field(default="", description="unabbreviated name of field:description")
    includedItems: List[SubItem]=Field(default_factory=list)
    quantity: int=Field(description="number of items")
    unitPrice: float=Field(description="cost per unit")
    totalPrice: float=Field(description="total cost of unit(s) purchased")
    discountAmount: float=Field(description="discount for item")
    
'''
This object represents the all of the information residing in one receipt text file.
Raw receipt text files are to be parsed into JSON object format for use in later analysis.
'''
class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(description="phone number")
    receiptDate: str=Field(description="purchase date")
    receiptTime: str=Field(description="time purchased")
    totalItems: int=Field(description="number of items")
    diningOptions: str=Field(default="", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(description="tax amount")
    total: float=Field(description="total amount paid")
    ITEMS: List[Item]

In [93]:
# Create the Pydantic parser to be used for converting raw receipt text files into JSON objects
receiptParser = PydanticOutputParser(pydantic_object=ReceiptInfo)

In [94]:
# EXAMPLE 1
item_1 = Item(description="PEDANTIC PEAS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)
item_2 = Item(description="CRAZY CARROTS", quantity=1, unitPrice=5.00, totalPrice=5.00, discountAmount=0.00)

receiptInfo = ReceiptInfo(merchant="Walmart", address="123 Manoa Rd", city="Honolulu", state="HI", phoneNumber="1234567890",
                          receiptDate="01/01/1999", receiptTime="12:00am", totalItems=2, paymentType="credit",
                          creditCardType="visa", tax=1.00, total=10.00, totalDiscount=0.01, ITEMS=[item_1, item_2])

In [95]:
# EXAMPLE 2
item_1 = Item(description="SY RAMEN HOT MULTI", quantity=1, unitPrice=8.99, totalPrice=8.99, discountAmount=0.00)
item_2 = Item(description="IND MI GORENG NOOD", quantity=1, unitPrice=4.99, totalPrice=4.99, discountAmount=0.00)
item_3 = Item(description="HT BEEF DUMPLING D", quantity=1, unitPrice=19.99, totalPrice=19.99, discountAmount=0.00)
item_4 = Item(description="NS SHRIMP HOT SNCK", quantity=1, unitPrice=1.99, totalPrice=1.99, discountAmount=0.00)
item_5 = Item(description="ME AZUKI ICE CREAM", quantity=1, unitPrice=6.49, totalPrice=6.49, discountAmount=0.00)

receiptInfo = ReceiptInfo(merchant="H MART", address="458 Keawe st", city="Honolulu", state="<UNKNOWN>", 
                          phoneNumber="<UNKNOWN>", receiptDate="07/15/23", receiptTime="08:16pm", totalItems=5, 
                          paymentType="credit", creditCardType="Discover", tax=2.00, total=44.45, totalDiscount=0.00, 
                          ITEMS=[item_1, item_2, item_3, item_4, item_5])

In [96]:
example_1_input = '''H
MART <UNKNOWN>
http://www.hmart.com
458 Keawe st
Honolulu, <UNKNOWN> 96813
TEL <UNKNOWN>
Your Cashier was
SY RAMEN HOT MULTI
8.99 B
IND MI GORENG NOOD
4.99 B
HT BEEF DUMPLING D
19.99 B
NS SHRIMP HOT SNCK
1.99 B
ME AZUKI ICE CREAM
6.49 B
TAX
2.00
**** BALANCE
44.45
Discover Credit - C
ACCOUNT <UNKNOWN>
***********1153
APPROVAL CODE: 01669R
SEQUENCE NUMBER: 33790
No CVM
Amount:USD $44.45
CARD:Discover CREDIT XXXX1153 EMV
APPROVAL CODE 01669R
AID:A0000001523010
TVR: 0000008000
<UNKNOWN>
TSI: E800
APPLICATION CRYPTOGRAM A433DC30B1FE402
APPLICATION PREFERRED NAME:Discover Cr
APPLICATION LABEL:Discover
<UNKNOWN>
ARC:00
RespDate: 07152023
Resp <UNKNOWN> 201640
<UNKNOWN>
TOTAL AMOUNT: $44.45
RESPONSE CODE: APPROVED
07/15/23 08:16pm 103 2
Discover
44.45
CHANGE
0.00
TOTAL NUMBER OF ITEMS SOLD =
5
07/15/23 08:17pm 81 2 281 103
** RETURN POLICY **
Unused product can be exchanged or
refunded with receipt within 7 days of
purchase unless otherwise noted below
Meat, fish, produce and refrigerated
food items must be returned with
receipt within 24 hours.
Electronics and <UNKNOWN> Must be
returned with receipt within 14 days
of purchase and must be unused.
If you are unable to <UNKNOWN> item to
store. please call customer service.
00008100202812307152017
<UNKNOWN> Thank You and Please Come Again **'''


example_1_output='''{{"ReceiptInfo":{{"merchant": "H MART","address": "458 Keawe st","city": "Honolulu","state": "<UNKNOWN>","phoneNumber": "<UNKNOWN>","tax": "2.00","total": "44.45","receiptDate": "07/15/23","receiptTime": "08:16pm",“totalItems”:"5",“paymentType”:”Credit”,“DiningOptions”:”<UNKNOWN>”,“creditCardType”:”Discover”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "SY RAMEN HOT MULTI","unabbreviatedDescription": "Sy Ramen Hot Multi",“includedItems":[],"quantity": "1","unitPrice": "8.99","totalPrice": "8.99","discountAmount": "0.00"}},{{"description": "IND MI GORENG NOOD”,"unabbreviatedDescription": "Ind Mi Goreng Noodle",“includedItems":[],"quantity": "1","unitPrice": "4.99","totalPrice": "4.99","discountAmount": "0.00"}},{{"description": "HT BEEF DUMPLING D”,"unabbreviatedDescription": "Hot Beef Dumpling",“includedItems":[],"quantity": "1","unitPrice": "19.99","totalPrice": "19.99","discountAmount": "0.00"}},{{"description": "NS SHRIMP HOT SNCK”,"unabbreviatedDescription": "Shrimp Hot Snack",“includedItems":[],"quantity": "1","unitPrice": "1.99","totalPrice": "1.99","discountAmount": "0.00"}},{{"description": "ME AZUKI ICE CREAM”,"unabbreviatedDescription": "Azuki Ice Cream",“includedItems":[],"quantity": "1","unitPrice": "6.49","totalPrice": "6.49","discountAmount": "0.00"}},]}}}}'''

In [97]:
example_2_input ='''<UNKNOWN> PURCHASE **
Panda Express #2150
Honclulu, HI
(808)956-7229
6/9/2023 11:02:50 AM
-TO GO-
Order: 260527
Server: LamTan T
1 Plate
10.60
FRIED RICE-1/2
FRIED <UNKNOWN>
STR BN CKN BRST
ORANGE CKN
1 XTRA ENTREE
1.50
VEG SPRING <UNKNOWN>
<UNKNOWN>
12.10
TAX
0.57
Total
12.67
Visa
12.67
Acct XXXXXXXX9212
<UNKNOWN> 060248
*Card details below
EMV: Contactless
APL: VISA DEBIT
AID: A0000000031010
Panda Rewards has arrived!
<UNKNOWN>
Sign Up at <UNKNOWN>
*
* Enter Unique Rewards Code to earn *
PANDA POINTS on this order!
*
*Code valid for 7 days from purchase*
Unique Rewards Code:
558844128417
WE'D LOVE TC HEAR FROM YOU!
Share your thoughts and receive
a Free Small A La Carte Entree
*
w/purchase of a 2-entree Plate.
*
<UNKNOWN> 2 days, go to
*
<UNKNOWN>
<UNKNOWN>
*
<UNKNOWN>
*
*
*
Survey Code:
<UNKNOWN>
*
*
Email <UNKNOWN> quired to receive coupon
*
*
*General Manager with $100K potential
Join the Panda team!
PandaCareers.com'''

example_2_output ='''{{"ReceiptInfo":{{"merchant": "Panda Express # 2150","address": "<UNKNOWN>","city": "Honclulu","state": "HI","phoneNumber": "(808)956-7229","tax": "0.57","total": "12.67","receiptDate": "6/9/2023","receiptTime": "11:02:50 AM",“totalItems”:"2",“paymentType”:”Debit”,“DiningOptions”:”TO GO”,“creditCardType”:”Visa”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "Plate","unabbreviatedDescription": "Plate",“includedItems”: ["FRIED RICE-½", "FRIED <UNKNOWN>", "STR BN CKN BREAST", "ORANGE CKN"],"quantity": "1","unitPrice": "10.60","totalPrice": "10.60","discountAmount": "0.00"}},{{"description": "XTRA ENTREE”,"unabbreviatedDescription": "Extra Entree",“includedItems”: [“VEG SPRING <UNKNOWN>”],"quantity": "1","unitPrice": "1.50","totalPrice": "1.50","discountAmount": "0.00"}},]}}}}'''

In [98]:
example_3_input='''Longs Drugs <UNKNOWN>
4211 WAIALAE AVE
HONOLULU, HI 96816
808.732.0781
REG#10 TRN#4352 CSHR#0000095 STR#9220
1 CFRIO SF PEG BAG 3Z 4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CR GYSR SPR WTR
33.8
1.29B
1
BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
1 CR GYSR SPR WTR
33.8
1.29B
1 BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
9 ITEMS
Survey ID #
4378 6100 0784 757 18
SUBTOTAL
16.47
HI 4.712% TAX
.77
TOTAL
17.24
CHARGE
17.24
************2130
RF
CHASE VISA
************2130
APPROVED# 04998D
REF# 103528
TRAN TYPE: SALE
AID: A0000000031010
TC: BD71734C41358DEE
TERMINAL# 84206407
NO SIGNATURE REQUIRED
CVM: 1F0000
<UNKNOWN> 0000000000
TSI(9B): 0000
CHANGE
.00
3509 2203 1844 3521 08
Returns with receipt, subject to
CVS Return Policy, thru 09/01/2023
Refund amount is based on price
after all coupons and discounts.
JULY 3, 2023
2:48 PM
<UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN>
<UNKNOWN>
GET YOUR CVS EXTRACARE CARD
We would love to hear your feedback
on your recent experience with us.
This survey will take only
1 minute to complete.
Share Your Feedback
<UNKNOWN>
Hablamos español
THANK YOU. SHOP 24 HOURS AT CVS.COM'''

example_3_output='''{{"ReceiptInfo":{{"merchant": "Longs Drugs","address": "4211 WAIALAE AVE","city": "HONOLULU","state": "HI","phoneNumber": "808.732.0781","tax": "0.77","total": "17.24","receiptDate": "JULY 3, 2023","receiptTime": "2:48 PM",“totalItems”:"6",“paymentType”:”Credit”,“DiningOptions”:”None”,“creditCardType”:”CHASE VISA”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "CFRIO SF PEG BAG","unabbreviatedDescription": "CoffeeRio Peg Bag", “includedItems":[],"quantity": "3","unitPrice": "4.59","totalPrice": "13.77","discountAmount": "0.00"}},{{"description": "CR GYSR SPR WTR","unabbreviatedDescription": "Crystal Geyser Spring Water", “includedItems":[],"quantity": "2","unitPrice": "1.29","totalPrice": "2.58","discountAmount": "0.00"}},{{"description": "BOTTLE DEPOSIT","unabbreviatedDescription": "Bottle Deposit",“includedItems":[],"quantity": "2","unitPrice": "0.05","totalPrice": "0.10","discountAmount": "0.00"}},{{"description": "HI CONTAINER FEE","unabbreviatedDescription": "HI Container Fee",“includedItems":[],"quantity": "2","unitPrice": "0.01","totalPrice": "0.02”,"discountAmount": "0.00"}},]}}}}'''

In [99]:
example_4_input='''129
For question comments or concerns
Call McDonald's Hotline
800-683-5587
Now Delivering with
Door Dash
Survey Code:
14616-01290-70423-16249-00085-
McDonald's Restaurant #14616
3549 RUSSETT GREEN E (WM#1985)
MD
ANNE
<UNKNOWN> 20724
TEL# 301-7767980
Thank You Valued Customer
KS# 1
07/04/2023 04:24 PM
Sidel
Order 29
1 Happy Meal Ch Burger
4.39
1 Cheeseburger
NO Pickle
1 Extra Kids Fry
1 Apple Juice
1 S Apl Jc Surcharge
1 ELEMENTAL
1 S Grimace Bday Shake
3.69
1 S Shake Surcharge
Subtotal
8.08
Tax
0.48
Take-Out Total
8.56
Cashless
8.56
Change
0.00
MER# 464239
CARD ISSUER
ACCOUNT#
Visa SALE
<UNKNOWN>
TRANSACTION AMOUNT
8.56
CONTACTLESS
AUTHORIZATION CODE - 03172D
SEQ# 035443
AID: A0000000031010
Now Hiring
Text MD349 To 38000
Sign up for MyMcDonald's rewards
to earn points on future visits'''

example_4_output='''{{"ReceiptInfo": {{"merchant": "McDonald's Restaurant","address": "3549 RUSSETT GREEN E","city": "<UNKNOWN>","state": MD","phoneNumber": "800-683-5587","tax": "0.48","total": "8.56","receiptDate": "07/04/2023”,"receiptTime": "04:24 PM",“totalItems”:"2",“paymentType”:”CREDIT”,“DiningOptions”:”Take-Out”,“creditCardType”:”Visa”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "Happy Meal Ch Burger","unabbreviatedDescription": "Happy Meal Cheese Burger", “includedItems”: [“Cheeseburger", "NO Pickle", "Extra Kids Fry", "Apple Juice", "S Apl Jc Surcharge", "ELEMENTAL”],"quantity": "1","unitPrice": “4.39","totalPrice": "4.39","discountAmount": "0.00"}},{{"description": "S Grimace Bday Shake","unabbreviatedDescription": "Grimace Birthday Shake", “includedItems”: [“S Shake Surcharge”],"quantity": "1","unitPrice": “3.69","totalPrice": "3.69","discountAmount": "0.00"}}]}}}}'''

In [100]:
example_5_input='''Longs Drugs <UNKNOWN>
91-919 FORT WEAVER RD
EWA BEACH, HI 96706
808.689.5860
REG#01 TRN#8020 CSHR#2262192 STR#7356
Helped by: JEFFREY
ExtraCare Card <UNKNOWN>
1858
1 OREO ORIG DBL STFF 14.0
2.93B
ORIGINAL PRICE
6.99
2/9.00
2.49 -
COUPON SAVINGS
1.57 -
1 BAUD WAFER CHOCLT <UNKNOWN>
<UNKNOWN>
ORIGINAL PRICE
2.69
3/5.00
1.02 -
COUPON SAVINGS
.57 -
1 HWNISL TEABG GVGNS <UNKNOWN>
3.60B
ORIGINAL PRICE
5.49
COUPON SAVINGS
1.89 -
COUPONS APPLIED
1 $4 OFF YOUR PURCHASE
4.00 - CVS
1 2% BACK IN <UNKNOWN> R
.03 - CVS
3 ITEMS
SUBTOTAL
7.63
HI 4.712% TAX
.36
TOTAL
7.99
DEBIT
7.99
3476
CH
US DEBIT
3476
APPROVED# 003818
REF# 010205
TRAN <UNKNOWN> SALE
AID: A0000000042203
<UNKNOWN> <UNKNOWN>
TERMINAL# 84198694
PIN VERIFIED ONLINE
CVM: 420300
<UNKNOWN> 0000048000
TSI(9B): E800
CHANGE
00
Returns Return with 0200 12
3507 3563 1998
CVS Refund amount <UNKNOWN> thru subject to
after all coupons is based and discounts. 09/16/2023 on <UNKNOWN>
JULY 18, 2023
7:38 PM
<UNKNOWN> <UNKNOWN>'''

example_5_output='''{{"ReceiptInfo":{{"merchant": "Longs Drugs","address": "91-919 FORT WEAVER RD","city": "EWA BEACH","state": "HI","phoneNumber": "808.689.5860","tax": "0.36","total": "7.99","receiptDate": "JULY 18, 2023”,"receiptTime": "7:38 PM",“totalItems”:"3",“paymentType”:”DEBIT”,“DiningOptions”:”None”,“creditCardType”:”<UNKNOWN>”,“totalDiscount”:”7.54”,"ITEMS":[{{"description": "OREO ORIG DBL STFF","unabbreviatedDescription": "Oreo Double Stuff",“includedItems":[], "quantity": "1","unitPrice": “6.99","totalPrice": "2.93","discountAmount": "4.06"}},{{"description": "BAUD WAFER CHOCLT","unabbreviatedDescription": "Wafer Chocolate",“includedItems":[], "quantity": "1","unitPrice": “2.69","totalPrice": "1.10","discountAmount": "1.59"}},{{"description": " HWNISL TEABG GVGNS","unabbreviatedDescription": "Teabag",“includedItems":[],"quantity": "1","unitPrice": “5.49","totalPrice": "3.60","discountAmount": "1.89"}}]}}}}'''

In [101]:
prompt_prefix = '''You are a capable large language model. 
Your task is to extract data from a given receipt and format it into the JSON schema below. 
Use the default values if you're not sure. 
Try to infer a value for the fields: unabbreviatedDescription, totalItems.
The values for the fields: description and unnabbreviatedDescription can not be the same.
Text can be used for multiple fields.

{format_instructions}

'''

prompt_examples = [
    {"ExampleInput": example_1_input, "ExampleOutput": example_1_output},
    {"ExampleInput": example_2_input, "ExampleOutput": example_2_output},
    {"ExampleInput": example_3_input, "ExampleOutput": example_3_output},
    {"ExampleInput": example_4_input, "ExampleOutput": example_4_output},
    {"ExampleInput": example_5_input, "ExampleOutput": example_5_output},
]
example_prompt = PromptTemplate(
    input_variables=["ExampleInput", "ExampleOutput"],  
    template = "input:\n{ExampleInput}\noutput:\n{ExampleOutput}"
)
example_prompt.format(ExampleInput=example_5_input, ExampleOutput=example_5_output)

execute_fewshot_prompt = FewShotPromptTemplate(
    prefix = prompt_prefix,
    input_variables=["input"], 
    partial_variables={'format_instructions': receiptParser.get_format_instructions()},
    examples= prompt_examples,
    example_prompt = example_prompt,
    example_separator="\n",
    suffix = "input:\n{input}\noutput:\n"
)

In [118]:
data = {"input": '''see's
<UNKNOWN>
CANDIES
2HI001 See's Candies, Inc (2HI001 )
4211 Waialae Ave
Honolulu, HI 96816
808-737-9592
www.sees.com
Store: 701
Register 1
<UNKNOWN> 7/3/23
<UNKNOWN> 2:52 PM
Ticket: 545540
<UNKNOWN> 070100000003
Welcome to our <UNKNOWN>
Item
Qty
Price
Amount
<UNKNOWN>
Chocolate Colored Film
506635
4
1.35
5.40
Subtotal
5.40
HI1 Candy
0.25
Total
5.65
Visa
5.65
************2130
Auth # : 09710D
Transaction Type: Sale
Entry Method: Contactless
Auth <UNKNOWN> 2:52 PM
Trace <UNKNOWN> CREDIT-138012
Card Holder <UNKNOWN> CARDHOLDER/VISA
AID A0000000031010
TVR 0000000000
TSI 0000
ARC 00
APP VISA CREDIT
Change
0.00
For customer support, you can call or
text us at 800.347.7337
Thank you for shopping at See's Candies.
Come back soon.
Sold Item Count = 4
T1131131Y1133931D4AXHLM
Customer Copy'''}
print(make_fewshot_prompt(make_receiptParser).format(**data))

You are a capable large language model. Your task is to extract data from a given receipt and format it into the JSON schema below. Use the default values if you're not sure. Try to infer a value for the field: unabbreviatedDescription. The values for the fields: description and unnabbreviatedDescription can not be the same. Text can be used for multiple fields.
    
    The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Item": {"properties": {"description": {"description": "item name", "title": "Description", "type": "string"}, "unabbreviatedDescription": {"default": "", "descript

In [103]:
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key="INSERT OPENAI API KEY")
chain = execute_fewshot_prompt | model | receiptParser

In [105]:
def make_model(model="gpt-3.5-turbo", temperature=1.00, openai_api_key="INSERT OPENAI API KEY"):
    return ChatOpenAIChatOpenAI(model=model, temperature=temperature, openai_api_key=openai_api_key)

In [None]:
def make_chain(fewshot_prompt, model, receiptParser):
    chain = execute_fewshot_prompt | model | receiptParser
    return chain

In [120]:
### Imports ###
from pydantic import BaseModel, Field
from typing import List
from enum import Enum
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.chat_models import ChatOpenAI
import os
import openai

'''
Defining a subitem to be used for the includedItems category in the Item object.
The includedItems fields consists of a list of SubItems and can be empty.
For example, a 'cheesburger' Item might have includedItems:['bun', 'patty', 'cheese']
'''
class SubItem(BaseModel):
    description: str

'''
An enumeration of the types of payment methods for the paymentType category in the ReceiptInfo object.
The default in ReceiptInfo is 'cash'.
'''   
class PaymentType(Enum):
    CREDIT = 'credit'
    DEBIT = 'debit'
    CASH = 'cash'

'''
This object represents a single item (good/service) that was purchased in the receipt text.
'''
class Item(BaseModel):
    description: str=Field(description="item name")
    unabbreviatedDescription: str=Field(default="", description="unabbreviated name of field:description")
    includedItems: List[SubItem]=Field(default_factory=list)
    quantity: int=Field(description="number of items")
    unitPrice: float=Field(description="cost per unit")
    totalPrice: float=Field(description="total cost of unit(s) purchased")
    discountAmount: float=Field(description="discount for item")
    
'''
This object represents the all of the information residing in one receipt text file.
Raw receipt text files are to be parsed into JSON object format for use in later analysis.
'''
class ReceiptInfo(BaseModel):
    merchant: str=Field(description="name of merchant")
    address: str=Field(description="address")
    city: str=Field(description="city")
    state: str=Field(description="state")
    phoneNumber: str=Field(description="phone number")
    receiptDate: str=Field(description="purchase date")
    receiptTime: str=Field(description="time purchased")
    totalItems: int=Field(description="number of items")
    diningOptions: str=Field(default="", description="here or to-go items for consumable items")
    paymentType: PaymentType=Field(default="cash", description="payment method")
    creditCardType: str=Field(default="<UNKNOWN>", description="credit card type")
    totalDiscount: float=Field(default=0.00, description="total discount")
    tax: float=Field(description="tax amount")
    total: float=Field(description="total amount paid")
    ITEMS: List[Item]

def make_receiptParser():
    return PydanticOutputParser(pydantic_object=ReceiptInfo)

def get_prompt_prefix():
    return '''You are a capable large language model. Your task is to extract data from a given receipt and format it into the JSON schema below. Use the default values if you're not sure. Try to infer a value for the field: unabbreviatedDescription. The values for the fields: description and unnabbreviatedDescription can not be the same. Text can be used for multiple fields.
    
    {format_instructions}
    
    '''

def get_example_prompt(input_variables=["ExampleInput", "ExampleOutput"], template= "input:\n{ExampleInput}\noutput:\n{ExampleOutput}"):
    return (PromptTemplate(input_variables = input_variables, template = template))

def get_suffix():
    return "input:\n{input}\noutput:\n"

def make_fewshot_prompt(receiptParser):
    return (FewShotPromptTemplate(
    prefix = get_prompt_prefix(),
    input_variables=["input"], 
    partial_variables={'format_instructions': receiptParser().get_format_instructions()},
    examples=get_prompt_examples(),
    example_prompt = get_example_prompt(),
    example_separator="\n",
    suffix = get_suffix(),
    ))

def make_model(model="gpt-3.5-turbo", temperature=1.00, openai_api_key="INSERT OPENAI API KEY"):
    return ChatOpenAIChatOpenAI(model=model, temperature=temperature, openai_api_key=openai_api_key)

def make_chain(fewshot_prompt, model, receiptParser):
    chain = fewshot_prompt | model | receiptParser
    return chain

def get_prompt_examples():
    example_1_input = '''H
MART <UNKNOWN>
http://www.hmart.com
458 Keawe st
Honolulu, <UNKNOWN> 96813
TEL <UNKNOWN>
Your Cashier was
SY RAMEN HOT MULTI
8.99 B
IND MI GORENG NOOD
4.99 B
HT BEEF DUMPLING D
19.99 B
NS SHRIMP HOT SNCK
1.99 B
ME AZUKI ICE CREAM
6.49 B
TAX
2.00
**** BALANCE
44.45
Discover Credit - C
ACCOUNT <UNKNOWN>
***********1153
APPROVAL CODE: 01669R
SEQUENCE NUMBER: 33790
No CVM
Amount:USD $44.45
CARD:Discover CREDIT XXXX1153 EMV
APPROVAL CODE 01669R
AID:A0000001523010
TVR: 0000008000
<UNKNOWN>
TSI: E800
APPLICATION CRYPTOGRAM A433DC30B1FE402
APPLICATION PREFERRED NAME:Discover Cr
APPLICATION LABEL:Discover
<UNKNOWN>
ARC:00
RespDate: 07152023
Resp <UNKNOWN> 201640
<UNKNOWN>
TOTAL AMOUNT: $44.45
RESPONSE CODE: APPROVED
07/15/23 08:16pm 103 2
Discover
44.45
CHANGE
0.00
TOTAL NUMBER OF ITEMS SOLD =
5
07/15/23 08:17pm 81 2 281 103
** RETURN POLICY **
Unused product can be exchanged or
refunded with receipt within 7 days of
purchase unless otherwise noted below
Meat, fish, produce and refrigerated
food items must be returned with
receipt within 24 hours.
Electronics and <UNKNOWN> Must be
returned with receipt within 14 days
of purchase and must be unused.
If you are unable to <UNKNOWN> item to
store. please call customer service.
00008100202812307152017
<UNKNOWN> Thank You and Please Come Again **'''

    example_1_output='''{{"ReceiptInfo":{{"merchant": "H MART","address": "458 Keawe st","city": "Honolulu","state": "<UNKNOWN>","phoneNumber": "<UNKNOWN>","tax": "2.00","total": "44.45","receiptDate": "07/15/23","receiptTime": "08:16pm",“totalItems”:"5",“paymentType”:”Credit”,“DiningOptions”:”<UNKNOWN>”,“creditCardType”:”Discover”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "SY RAMEN HOT MULTI","unabbreviatedDescription": "Sy Ramen Hot Multi",“includedItems":[],"quantity": "1","unitPrice": "8.99","totalPrice": "8.99","discountAmount": "0.00"}},{{"description": "IND MI GORENG NOOD”,"unabbreviatedDescription": "Ind Mi Goreng Noodle",“includedItems":[],"quantity": "1","unitPrice": "4.99","totalPrice": "4.99","discountAmount": "0.00"}},{{"description": "HT BEEF DUMPLING D”,"unabbreviatedDescription": "Hot Beef Dumpling",“includedItems":[],"quantity": "1","unitPrice": "19.99","totalPrice": "19.99","discountAmount": "0.00"}},{{"description": "NS SHRIMP HOT SNCK”,"unabbreviatedDescription": "Shrimp Hot Snack",“includedItems":[],"quantity": "1","unitPrice": "1.99","totalPrice": "1.99","discountAmount": "0.00"}},{{"description": "ME AZUKI ICE CREAM”,"unabbreviatedDescription": "Azuki Ice Cream",“includedItems":[],"quantity": "1","unitPrice": "6.49","totalPrice": "6.49","discountAmount": "0.00"}},]}}}}'''

    example_2_input ='''<UNKNOWN> PURCHASE **
Panda Express #2150
Honclulu, HI
(808)956-7229
6/9/2023 11:02:50 AM
-TO GO-
Order: 260527
Server: LamTan T
1 Plate
10.60
FRIED RICE-1/2
FRIED <UNKNOWN>
STR BN CKN BRST
ORANGE CKN
1 XTRA ENTREE
1.50
VEG SPRING <UNKNOWN>
<UNKNOWN>
12.10
TAX
0.57
Total
12.67
Visa
12.67
Acct XXXXXXXX9212
<UNKNOWN> 060248
*Card details below
EMV: Contactless
APL: VISA DEBIT
AID: A0000000031010
Panda Rewards has arrived!
<UNKNOWN>
Sign Up at <UNKNOWN>
*
* Enter Unique Rewards Code to earn *
PANDA POINTS on this order!
*
*Code valid for 7 days from purchase*
Unique Rewards Code:
558844128417
WE'D LOVE TC HEAR FROM YOU!
Share your thoughts and receive
a Free Small A La Carte Entree
*
w/purchase of a 2-entree Plate.
*
<UNKNOWN> 2 days, go to
*
<UNKNOWN>
<UNKNOWN>
*
<UNKNOWN>
*
*
*
Survey Code:
<UNKNOWN>
*
*
Email <UNKNOWN> quired to receive coupon
*
*
*General Manager with $100K potential
Join the Panda team!
PandaCareers.com'''

    example_2_output ='''{{"ReceiptInfo":{{"merchant": "Panda Express # 2150","address": "<UNKNOWN>","city": "Honclulu","state": "HI","phoneNumber": "(808)956-7229","tax": "0.57","total": "12.67","receiptDate": "6/9/2023","receiptTime": "11:02:50 AM",“totalItems”:"2",“paymentType”:”Debit”,“DiningOptions”:”TO GO”,“creditCardType”:”Visa”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "Plate","unabbreviatedDescription": "Plate",“includedItems”: ["FRIED RICE-½", "FRIED <UNKNOWN>", "STR BN CKN BREAST", "ORANGE CKN"],"quantity": "1","unitPrice": "10.60","totalPrice": "10.60","discountAmount": "0.00"}},{{"description": "XTRA ENTREE”,"unabbreviatedDescription": "Extra Entree",“includedItems”: [“VEG SPRING <UNKNOWN>”],"quantity": "1","unitPrice": "1.50","totalPrice": "1.50","discountAmount": "0.00"}},]}}}}'''

    example_3_input='''Longs Drugs <UNKNOWN>
4211 WAIALAE AVE
HONOLULU, HI 96816
808.732.0781
REG#10 TRN#4352 CSHR#0000095 STR#9220
1 CFRIO SF PEG BAG 3Z 4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CFRIO SF PEG BAG
3Z
4.59B
1 CR GYSR SPR WTR
33.8
1.29B
1
BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
1 CR GYSR SPR WTR
33.8
1.29B
1 BOTTLE DEPOSIT
.05F
1 HI CONTAINER FEE
.01F
9 ITEMS
Survey ID #
4378 6100 0784 757 18
SUBTOTAL
16.47
HI 4.712% TAX
.77
TOTAL
17.24
CHARGE
17.24
************2130
RF
CHASE VISA
************2130
APPROVED# 04998D
REF# 103528
TRAN TYPE: SALE
AID: A0000000031010
TC: BD71734C41358DEE
TERMINAL# 84206407
NO SIGNATURE REQUIRED
CVM: 1F0000
<UNKNOWN> 0000000000
TSI(9B): 0000
CHANGE
.00
3509 2203 1844 3521 08
Returns with receipt, subject to
CVS Return Policy, thru 09/01/2023
Refund amount is based on price
after all coupons and discounts.
JULY 3, 2023
2:48 PM
<UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN> <UNKNOWN>
<UNKNOWN>
GET YOUR CVS EXTRACARE CARD
We would love to hear your feedback
on your recent experience with us.
This survey will take only
1 minute to complete.
Share Your Feedback
<UNKNOWN>
Hablamos español
THANK YOU. SHOP 24 HOURS AT CVS.COM'''

    example_3_output='''{{"ReceiptInfo":{{"merchant": "Longs Drugs","address": "4211 WAIALAE AVE","city": "HONOLULU","state": "HI","phoneNumber": "808.732.0781","tax": "0.77","total": "17.24","receiptDate": "JULY 3, 2023","receiptTime": "2:48 PM",“totalItems”:"6",“paymentType”:”Credit”,“DiningOptions”:”None”,“creditCardType”:”CHASE VISA”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "CFRIO SF PEG BAG","unabbreviatedDescription": "CoffeeRio Peg Bag", “includedItems":[],"quantity": "3","unitPrice": "4.59","totalPrice": "13.77","discountAmount": "0.00"}},{{"description": "CR GYSR SPR WTR","unabbreviatedDescription": "Crystal Geyser Spring Water", “includedItems":[],"quantity": "2","unitPrice": "1.29","totalPrice": "2.58","discountAmount": "0.00"}},{{"description": "BOTTLE DEPOSIT","unabbreviatedDescription": "Bottle Deposit",“includedItems":[],"quantity": "2","unitPrice": "0.05","totalPrice": "0.10","discountAmount": "0.00"}},{{"description": "HI CONTAINER FEE","unabbreviatedDescription": "HI Container Fee",“includedItems":[],"quantity": "2","unitPrice": "0.01","totalPrice": "0.02”,"discountAmount": "0.00"}},]}}}}'''

    example_4_input='''129
For question comments or concerns
Call McDonald's Hotline
800-683-5587
Now Delivering with
Door Dash
Survey Code:
14616-01290-70423-16249-00085-
McDonald's Restaurant #14616
3549 RUSSETT GREEN E (WM#1985)
MD
ANNE
<UNKNOWN> 20724
TEL# 301-7767980
Thank You Valued Customer
KS# 1
07/04/2023 04:24 PM
Sidel
Order 29
1 Happy Meal Ch Burger
4.39
1 Cheeseburger
NO Pickle
1 Extra Kids Fry
1 Apple Juice
1 S Apl Jc Surcharge
1 ELEMENTAL
1 S Grimace Bday Shake
3.69
1 S Shake Surcharge
Subtotal
8.08
Tax
0.48
Take-Out Total
8.56
Cashless
8.56
Change
0.00
MER# 464239
CARD ISSUER
ACCOUNT#
Visa SALE
<UNKNOWN>
TRANSACTION AMOUNT
8.56
CONTACTLESS
AUTHORIZATION CODE - 03172D
SEQ# 035443
AID: A0000000031010
Now Hiring
Text MD349 To 38000
Sign up for MyMcDonald's rewards
to earn points on future visits'''

    example_4_output='''{{"ReceiptInfo": {{"merchant": "McDonald's Restaurant","address": "3549 RUSSETT GREEN E","city": "<UNKNOWN>","state": MD","phoneNumber": "800-683-5587","tax": "0.48","total": "8.56","receiptDate": "07/04/2023”,"receiptTime": "04:24 PM",“totalItems”:"2",“paymentType”:”CREDIT”,“DiningOptions”:”Take-Out”,“creditCardType”:”Visa”,“totalDiscount”:”0.00”,"ITEMS":[{{"description": "Happy Meal Ch Burger","unabbreviatedDescription": "Happy Meal Cheese Burger", “includedItems”: [“Cheeseburger", "NO Pickle", "Extra Kids Fry", "Apple Juice", "S Apl Jc Surcharge", "ELEMENTAL”],"quantity": "1","unitPrice": “4.39","totalPrice": "4.39","discountAmount": "0.00"}},{{"description": "S Grimace Bday Shake","unabbreviatedDescription": "Grimace Birthday Shake", “includedItems”: [“S Shake Surcharge”],"quantity": "1","unitPrice": “3.69","totalPrice": "3.69","discountAmount": "0.00"}}]}}}}'''

    example_5_input='''Longs Drugs <UNKNOWN>
91-919 FORT WEAVER RD
EWA BEACH, HI 96706
808.689.5860
REG#01 TRN#8020 CSHR#2262192 STR#7356
Helped by: JEFFREY
ExtraCare Card <UNKNOWN>
1858
1 OREO ORIG DBL STFF 14.0
2.93B
ORIGINAL PRICE
6.99
2/9.00
2.49 -
COUPON SAVINGS
1.57 -
1 BAUD WAFER CHOCLT <UNKNOWN>
<UNKNOWN>
ORIGINAL PRICE
2.69
3/5.00
1.02 -
COUPON SAVINGS
.57 -
1 HWNISL TEABG GVGNS <UNKNOWN>
3.60B
ORIGINAL PRICE
5.49
COUPON SAVINGS
1.89 -
COUPONS APPLIED
1 $4 OFF YOUR PURCHASE
4.00 - CVS
1 2% BACK IN <UNKNOWN> R
.03 - CVS
3 ITEMS
SUBTOTAL
7.63
HI 4.712% TAX
.36
TOTAL
7.99
DEBIT
7.99
3476
CH
US DEBIT
3476
APPROVED# 003818
REF# 010205
TRAN <UNKNOWN> SALE
AID: A0000000042203
<UNKNOWN> <UNKNOWN>
TERMINAL# 84198694
PIN VERIFIED ONLINE
CVM: 420300
<UNKNOWN> 0000048000
TSI(9B): E800
CHANGE
00
Returns Return with 0200 12
3507 3563 1998
CVS Refund amount <UNKNOWN> thru subject to
after all coupons is based and discounts. 09/16/2023 on <UNKNOWN>
JULY 18, 2023
7:38 PM
<UNKNOWN> <UNKNOWN>'''

    example_5_output='''{{"ReceiptInfo":{{"merchant": "Longs Drugs","address": "91-919 FORT WEAVER RD","city": "EWA BEACH","state": "HI","phoneNumber": "808.689.5860","tax": "0.36","total": "7.99","receiptDate": "JULY 18, 2023”,"receiptTime": "7:38 PM",“totalItems”:"3",“paymentType”:”DEBIT”,“DiningOptions”:”None”,“creditCardType”:”<UNKNOWN>”,“totalDiscount”:”7.54”,"ITEMS":[{{"description": "OREO ORIG DBL STFF","unabbreviatedDescription": "Oreo Double Stuff",“includedItems":[], "quantity": "1","unitPrice": “6.99","totalPrice": "2.93","discountAmount": "4.06"}},{{"description": "BAUD WAFER CHOCLT","unabbreviatedDescription": "Wafer Chocolate",“includedItems":[], "quantity": "1","unitPrice": “2.69","totalPrice": "1.10","discountAmount": "1.59"}},{{"description": " HWNISL TEABG GVGNS","unabbreviatedDescription": "Teabag",“includedItems":[],"quantity": "1","unitPrice": “5.49","totalPrice": "3.60","discountAmount": "1.89"}}]}}}}'''
    
    return [
        {"ExampleInput": example_1_input, "ExampleOutput": example_1_output},
        {"ExampleInput": example_2_input, "ExampleOutput": example_2_output},
        {"ExampleInput": example_3_input, "ExampleOutput": example_3_output},
        {"ExampleInput": example_4_input, "ExampleOutput": example_4_output},
        {"ExampleInput": example_5_input, "ExampleOutput": example_5_output},
    ]