In [17]:
from paddleocr import PaddleOCR, draw_ocr # main OCR dependencies
from matplotlib import pyplot as plt # plot images
import cv2 #opencv
import os # folder directory navigation
import ollama

In [18]:
# Setup model
ocr_model = PaddleOCR(lang='en', use_gpu=False)


[2025/02/08 18:38:59] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\harsh/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\harsh/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

In [19]:
img_path = os.path.join('.', 'Walmart.jpeg')

In [20]:
# Run the ocr method on the ocr model
result = ocr_model.ocr(img_path)

[2025/02/08 18:39:00] ppocr DEBUG: dt_boxes num : 53, elapsed : 0.09740567207336426
[2025/02/08 18:39:03] ppocr DEBUG: rec_res num  : 53, elapsed : 2.142279863357544


In [21]:
result

[[[[[78.0, 39.0], [365.0, 45.0], [364.0, 63.0], [77.0, 56.0]],
   ('Give us feedback @ survey.walmart.com', 0.943331241607666)],
  [[[77.0, 55.0], [289.0, 58.0], [289.0, 76.0], [77.0, 73.0]],
   ('Thank you!ID#:7PB65JWCFZ2', 0.9179755449295044)],
  [[[158.0, 75.0], [305.0, 75.0], [305.0, 107.0], [158.0, 107.0]],
   ('Walmart', 0.9831969141960144)],
  [[[133.0, 107.0], [316.0, 108.0], [316.0, 126.0], [133.0, 125.0]],
   ('949-498-6669 Mgr:MICHAEL', 0.9599562287330627)],
  [[[164.0, 126.0], [288.0, 126.0], [288.0, 139.0], [164.0, 139.0]],
   ('951 AVENIDA PICO', 0.9468727111816406)],
  [[[140.0, 139.0], [303.0, 140.0], [303.0, 154.0], [140.0, 153.0]],
   ('SAN CLEMENTE CA 92673', 0.9396104216575623)],
  [[[77.0, 151.0], [367.0, 153.0], [366.0, 171.0], [77.0, 169.0]],
   ('ST#02527OP#009045 TE#45 TR#06193', 0.9391543865203857)],
  [[[78.0, 168.0], [180.0, 170.0], [180.0, 184.0], [77.0, 182.0]],
   ('GV OATMEAL', 0.9469197988510132)],
  [[[177.0, 170.0], [286.0, 170.0], [286.0, 183.0], [17

In [22]:
def get_strings_simple(data):
    """Extract strings using simple list comprehension"""
    return [item[1] for item in data[0]]


In [23]:
def query_qwen2(text):
    """Sends text to the locally running Qwen2.5-3B model and forces valid JSON output."""
    prompt = f"""
    Extract structured details from the following receipt text. Respond only in JSON format without any extra text.
    **Input Receipt Text:**
    \"\"\"{text}\"\"\"
    **JSON Format (Example Output):**
    {{
        "vendor": "Walmart",
        "amount": "45.67",
        "date": "2024-02-08",
        "category": "Grocery"
    }}
    Now, extract details from the given receipt and return only JSON:
    """
    response = ollama.chat(model='qwen2.5:3b', messages=[{"role": "user", "content": prompt}])
    json_text = response['message']['content'].strip()
    # Ensure only valid JSON is returned
    try:
        structured_data = json.loads(json_text)  # Parse JSON response
        return structured_data
    except json.JSONDecodeError:
        print("Error: Model response is not valid JSON.\nResponse:\n", json_text)
        return None

In [24]:
def process_receipt(ocr_text_list):
    """Takes OCR-extracted text and processes it using Qwen2.5-3B to extract structured receipt details."""
    combined_text = " ".join([text[0] for text in ocr_text_list])  # Combine OCR-extracted text
    structured_data = query_qwen2(combined_text)  # Send to Qwen model
    return structured_data

In [26]:
# Read and process image
result = ocr_model.ocr(img_path)

[2025/02/08 18:40:05] ppocr DEBUG: dt_boxes num : 53, elapsed : 0.05650210380554199
[2025/02/08 18:40:06] ppocr DEBUG: rec_res num  : 53, elapsed : 1.6820707321166992


In [28]:
# Extract strings
strings = get_strings_simple(result)

print(strings)

[('Give us feedback @ survey.walmart.com', 0.943331241607666), ('Thank you!ID#:7PB65JWCFZ2', 0.9179755449295044), ('Walmart', 0.9831969141960144), ('949-498-6669 Mgr:MICHAEL', 0.9599562287330627), ('951 AVENIDA PICO', 0.9468727111816406), ('SAN CLEMENTE CA 92673', 0.9396104216575623), ('ST#02527OP#009045 TE#45 TR#06193', 0.9391543865203857), ('GV OATMEAL', 0.9469197988510132), ('007874243408F', 0.978279173374176), ('1.760', 0.9943475723266602), ('OT 200Z TUM', 0.9058380126953125), ('081236803115', 0.997002124786377), ('6.74X', 0.9370657205581665), ('M ATHLETICS', 0.9614916443824768), ('019104567781', 0.9976446032524109), ('24.97X', 0.9188626408576965), ('DEXAS 15X20', 0.9481031894683838), ('008429710921', 0.9988482594490051), ('12.97', 0.9962234497070312), ('SUBTOTAL', 0.9964640140533447), ('46.44', 0.9972065687179565), ('TAX1', 0.9896816611289978), ('7.750%', 0.9917407631874084), ('3.46', 0.997167706489563), ('TOTAL', 0.9981451034545898), ('49.90', 0.9983013272285461), ('DEBIT TEND', 

In [29]:
# Process receipt
receipt_details = process_receipt(strings)
print("Extracted Receipt Details:", receipt_details)

Extracted Receipt Details: {'vendor': 'Walmart', 'amount': '49.90', 'date': '2024-02-08', 'category': 'Grocery'}
