## Model Baca Receipt

In [1]:
import torch
import xmltodict
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "naver-clova-ix/donut-base-finetuned-cord-v2"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForVision2Seq.from_pretrained(model_name)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [3]:
image_path = "receipt1.jpg"
image = Image.open(image_path).convert("RGB")

decoder_input_ids = processor.tokenizer(
    "<s_cord-v2>", add_special_tokens=False
).input_ids
decoder_input_ids = torch.tensor(decoder_input_ids).unsqueeze(0)
pixel_values = processor(image, return_tensors="pt").pixel_values

In [7]:
generation_output = model.generate(
    pixel_values,
    decoder_input_ids=decoder_input_ids,
    max_length=model.decoder.config.max_position_embeddings,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    num_beams=1, 
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

In [9]:
decoded_sequence = processor.batch_decode(generation_output.sequences)[0]
decoded_sequence = decoded_sequence.replace(processor.tokenizer.eos_token, "")
decoded_sequence = decoded_sequence.replace(processor.tokenizer.pad_token, "")
decoded_sequence += "</s_cord-v2>"
decoded_sequence

'<s_cord-v2><s_menu><s_nm> Bintang Bremer</s_nm><s_cnt> 1</s_cnt><s_price> 59,000</s_price><sep/><s_nm> Chicken H-H</s_nm><s_cnt> 1</s_cnt><s_price> 190,000</s_price><sep/><s_nm> Ades</s_nm><s_cnt> 1</s_cnt><s_price> 10,000</s_price></s_menu><s_sub_total><s_subtotal_price> 259,000</s_subtotal_price><s_discount_price> 19,000</s_discount_price><s_service_price> 9,600</s_service_price><s_tax_price> 52,416</s_tax_price></s_sub_total><s_total><s_total_price> 302,016</s_total_price></s_total></s_cord-v2>'

In [10]:
my_dict = xmltodict.parse(decoded_sequence)
pprint(my_dict)

{'s_cord-v2': {'s_menu': {'s_cnt': ['1', '1', '1'],
                          's_nm': ['Bintang Bremer', 'Chicken H-H', 'Ades'],
                          's_price': ['59,000', '190,000', '10,000'],
                          'sep': [None, None]},
               's_sub_total': {'s_discount_price': '19,000',
                               's_service_price': '9,600',
                               's_subtotal_price': '259,000',
                               's_tax_price': '52,416'},
               's_total': {'s_total_price': '302,016'}}}


## Contoh Split Bill Sederhana

In [17]:
nama = ["Adi", "Ana"]
menu_adi = ["Chicken H-H"]
menu_ana = ["Bintang Bremer", "Ades"]

total_adi = 0
total_ana = 0

list_nama_menu = my_dict["s_cord-v2"]["s_menu"]["s_nm"]
list_harga_menu = my_dict["s_cord-v2"]["s_menu"]["s_price"]
for nama_menu, harga_menu in zip(list_nama_menu, list_harga_menu):
    harga_menu_angka = int(harga_menu.replace(",", ""))
    if nama_menu in menu_adi:
        total_adi += harga_menu_angka
    if nama_menu in menu_ana:
        total_ana += harga_menu_angka

In [18]:
total_adi

190000

In [19]:
total_ana

69000