In [2]:
import re
import pandas as pd

ocr_raw_text = """
WARUNG MAKAN SEDERHANA
Jl. Kebon Jeruk No. 12
--------------------------------
Tanggal : 22-11-2025
--------------------------------
Nasi Goreng Spesial    25.000
Es Teh Manis           5.000
2 x Kerupuk Kaleng     4.000
Ayam Bakar Madu       30.000
--------------------------------
Total                 70.400
"""

def parse_receipt(text):
    lines = text.strip().split('\n')
    items = []
    total_transaction = 0

    price_pattern = re.compile(r'(\d{1,3}(?:\.\d{3})*(?:,\d{2})?)\s*$')
    qty_pattern = re.compile(r'^(\d+)\s*[xX]\s*(.+?)\s+(\d{1,3}(?:\.\d{3})*(?:,\d{2})?)\s*$')

    ignore_keywords = ['TOTAL', 'SUBTOTAL', 'CASH', 'KEMBALI', 'PAJAK']

    print(f"--- MEMULAI PARSING ---")
    
    for line in lines:
        clean_line = line.strip()
        if not clean_line: continue
        
        is_keyword = any(keyword in clean_line.upper() for keyword in ignore_keywords)
        
        qty_match = qty_pattern.match(clean_line)
        if qty_match:
            qty = int(qty_match.group(1))
            name = qty_match.group(2).strip()
            price = float(qty_match.group(3).replace('.', '').replace(',', '.'))
            items.append({'name': name, 'qty': qty, 'price': price})
            continue

        price_match = price_pattern.search(clean_line)
        if price_match and not is_keyword:
            price = float(price_match.group(1).replace('.', '').replace(',', '.'))
            name = clean_line[:price_match.start()].strip()
            if len(name) > 2:
                items.append({'name': name, 'qty': 1, 'price': price})
                continue

    return items

items = parse_receipt(ocr_raw_text)
print("\n=== HASIL PARSING ===")
print(pd.DataFrame(items))

--- MEMULAI PARSING ---

=== HASIL PARSING ===
                  name  qty    price
0  Jl. Kebon Jeruk No.    1     12.0
1    Tanggal : 22-11-2    1     25.0
2  Nasi Goreng Spesial    1  25000.0
3         Es Teh Manis    1   5000.0
4       Kerupuk Kaleng    2   4000.0
5      Ayam Bakar Madu    1  30000.0
