Environment setup

In [None]:
%pip install donut datasets torch tqdm transformers

In [None]:
# Set the LC_ALL environment variable
LC_ALL="C"

# Install the donut-python package
%pip install -q donut-python

Fetch model & processor

In [None]:
from transformers import DonutProcessor, VisionEncoderDecoderModel

processor = DonutProcessor.from_pretrained("Vasettha/Donut_Cord")
model = VisionEncoderDecoderModel.from_pretrained("Vasettha/Donut_Cord")

Inference

In [5]:
import re
import json
import torch
from donut import JSONParseEvaluator
from PIL import Image  # Import the Image class from the PIL module

device = "cuda" if torch.cuda.is_available() else "cpu"

# Assuming your model and processor are already defined
model.eval()
model.to(device)

# Assuming your tokenizer is named processor, and you have a sample image
image_path = "/content/6.JPG"
image = Image.open(image_path).convert("RGB")

# prepare encoder inputs
pixel_values = processor(image, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)

# prepare decoder inputs
task_prompt = "<s_cord-v2>"
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
decoder_input_ids = decoder_input_ids.to(device)

# autoregressively generate sequence
outputs = model.generate(
    pixel_values,
    decoder_input_ids=decoder_input_ids,
    max_length=model.decoder.config.max_position_embeddings,
    early_stopping=True,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    num_beams=1,
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

# turn into JSON
seq = processor.batch_decode(outputs.sequences)[0]
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
seq = re.sub(r"<.*?>", "", seq, count=1).strip()  # remove first task start token
seq = processor.token2json(seq)

print("Generated sequence:", seq)


Generated sequence: {'menu': [{'nm': 'Woman', 'cnt': '1', 'price': '0'}, {'nm': 'Ham Cheese', 'cnt': '2', 'price': '74,000'}, {'nm': 'Ice Java Tea', 'cnt': '1', 'price': '16,000'}, {'nm': 'Mineral Water', 'cnt': '1', 'price': '13,000'}, {'nm': 'Black & White', 'cnt': '1', 'price': '72,000'}], 'sub_total': {'subtotal_price': '175,000'}, 'total': {'total_price': '175,000', 'cashprice': '200,000', 'changeprice': '25,000'}}


CSV OUTPUT FOR CATEGORIZATION MODEL

In [9]:
import csv

# Extracting data from the JSON
if isinstance(seq['menu'], list):
    menu_items = seq['menu']
else:
    menu_items = [seq['menu']]

# Writing data to CSV
csv_file_path = 'item_list.csv'

with open(csv_file_path, 'w', newline='') as csvfile:
    fieldnames = ['Item']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # Write header
    writer.writeheader()

    # Write menu items
    for item in menu_items:
        writer.writerow({'Item': item.get('nm', '')})

print(f'Data has been written to {csv_file_path}')


Data has been written to item_list.csv


JSON OUTPUT FOR APP

In [7]:
import json

# Extracting data from the JSON
if isinstance(seq['menu'], list):
    menu_items = seq['menu']
else:
    menu_items = [seq['menu']]

total_price = seq['total']['total_price']

# Creating a dictionary to represent the data
data_dict = {'menu_items': []}

# Add menu items to the dictionary
for item in menu_items:
    data_dict['menu_items'].append({
        'Item': item.get('nm', ''),
        'Quantity': item.get('cnt', ''),
        'Price': item.get('price', '')
    })

# Add total price to the dictionary
data_dict['total'] = {'Item': 'Total', 'Quantity': '', 'Price': total_price}

# Writing data to JSON
json_file_path = 'menu_data.json'

with open(json_file_path, 'w') as jsonfile:
    json.dump(data_dict, jsonfile, indent=2)

print(f'Data has been written to {json_file_path}')


Data has been written to menu_data.json
