In [None]:
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch

# Load the processor and model
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")

# Check for GPU and move model to it if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Load the image
image = Image.open("/content/bill_image_receipt.png").convert("RGB")

# Prepare the model's input
task_prompt = "<s_cord-v2>"
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

pixel_values = processor(image, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
decoder_input_ids = decoder_input_ids.to(device)

# Perform inference
outputs = model.generate(
    pixel_values,
    decoder_input_ids=decoder_input_ids,
    max_length=model.decoder.config.max_position_embeddings,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

# Decode the output to get the JSON string
sequence = processor.batch_decode(outputs.sequences)[0]
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
sequence = sequence.replace(task_prompt, "")

# Parse the JSON string
json_output = processor.token2json(sequence)

# print(json_output)

# receipt_texts = "receipt_text.txt"
# with open(receipt_texts, "w") as f:


output:

In [None]:
{
    "menu": [
        {
            "nm": "The Lone Pine",
            "cnt": "617-3236-6207",
            "price": "Road"
        },
        {
            "nm": "Table",
            "unitprice": "08000008",
            "cnt": "08",
            "price": "12:45"
        },
        {
            "nm": "Carlsberg Bottle",
            "cnt": "2",
            "price": "16.00"
        },
        {
            "nm": "Heineken Draft Standard",
            "cnt": "3",
            "price": "24.60"
        },
        {
            "nm": "Heineken Draft Half Liter",
            "cnt": "1",
            "price": "15.20"
        },
        {
            "nm": "Carlsberg Bucket (5 bottles)",
            "cnt": "2",
            "price": "80.00"
        },
        {
            "nm": "Grilled Chicken Breast",
            "cnt": "4",
            "price": "74.00"
        },
        {
            "nm": "Sirloin Steak",
            "cnt": "3",
            "price": "96.00"
        },
        {
            "nm": "Coke",
            "cnt": "1",
            "price": "3.50"
        },
        {
            "nm": "Ice Cream",
            "cnt": "5",
            "price": "18.00"
        }
    ],
    "sub_total": {
        "subtotal_price": "327.30",
        "discount_price": "32.73",
        "service_price": "16.36",
        "tax_price": "32.73"
    },
    "total": {
        "total_price": "376.40",
        "cashprice": [
            "400.00",
            "23.60"
        ],
        "changeprice": "10",
        "creditcardprice": "10"
    }
}