In [1]:
from ultralytics import YOLO
from paddleocr import PaddleOCR
import cv2
import pandas as pd
import os
import glob
import numpy as np

In [2]:
def process_images(image_dir, output_csv):
   yolo_model = YOLO('runs/obb/train/weights/best.pt')
   ocr = PaddleOCR(lang='en', use_gpu=False, det_db_thresh=0.3, rec_thresh=0.65)
   results_list = []
   
   for img_path in glob.glob(os.path.join(image_dir, '*.jpg')):
       if '_debug' in img_path:
           continue
           
       image = cv2.imread(img_path)
       predictions = yolo_model.predict(image, conf=0.1)[0]
       
       image_results = {
           'GTIN': '',
           'SR_NO': '',
           'LOT': '',
           'EXP': '',
           'image_name': os.path.basename(img_path)
       }
       
       if predictions.obb:
           boxes = predictions.obb.xyxyxyxy.cpu().numpy()
           classes = predictions.obb.cls.cpu().numpy()
           
           for box, cls_id in zip(boxes, classes):
               try:
                   box_int = box.astype(np.int32)
                   x_min, y_min = np.min(box_int, axis=0)
                   x_max, y_max = np.max(box_int, axis=0)
                   
                   roi = image[y_min:y_max, x_min:x_max]
                   pad = 2
                   roi = cv2.copyMakeBorder(roi, pad, pad, pad, pad, 
                                          cv2.BORDER_CONSTANT, value=[255,255,255])
                   
                   result = ocr.ocr(roi)
                   if result[0]:
                       text = result[0][0][1][0].strip()
                       class_name = yolo_model.names[int(cls_id)]
                       
                       # Clean and format text based on class
                       if class_name == 'GTIN':
                           text = ''.join(c for c in text if c.isdigit())
                       elif class_name == 'SR NO':
                           text = ''.join(c for c in text if c.isdigit())
                       elif class_name == 'EXP':
                           if '/' in text:
                               text = text.replace('EXP', '').strip()
                       elif class_name == 'LOT':
                           text = text.replace('LOT', '').strip()
                           if text.startswith('T'):
                               text = 'U' + text[1:]  # Replace T with U for common OCR error
                       
                       image_results[class_name.replace(' ', '_')] = text

               except Exception as e:
                   print(f"Error processing box: {e}")
                   continue

       results_list.append(image_results)
       
   df = pd.DataFrame(results_list)
   df.to_csv(output_csv, index=False)
   return df

In [3]:
# Usage for each dataset split
splits = ['test']
for split in splits:
    input_dir = f'C:/Users/aarya/Videos/New PaddleOCR/{split}/images'
    output_csv = f'{split}_results.csv'
    results = process_images(input_dir, output_csv)

[2024/11/29 11:37:58] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\aarya/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\aarya/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006059885025024414
[2024/11/29 11:38:03] ppocr DEBUG: rec_res num  : 1, elapsed : 0.062774658203125
[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.008015871047973633
[2024/11/29 11:38:03] ppocr DEBUG: rec_res num  : 1, elapsed : 0.09488511085510254
[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006537675857543945
[2024/11/29 11:38:03] ppocr DEBUG: rec_res num  : 1, elapsed : 0.07316851615905762
[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.007201433181762695
[2024/11/29 11:38:03] ppocr DEBUG: rec_res num  : 1, elapsed : 0.0850365161895752

0: 640x640 11.1ms
Speed: 2.0ms preprocess, 11.1ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006215095520019531
[2024/11/29 11:38:03] ppocr DEBUG: rec_res num  : 1, elapsed : 0.07824969291687012
[2024/11/29 11:38:03] ppocr DEBUG: dt_boxes num : 1,

[2024/11/29 11:38:05] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.009526968002319336
[2024/11/29 11:38:06] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06322908401489258
[2024/11/29 11:38:06] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.010524749755859375
[2024/11/29 11:38:06] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06421184539794922
[2024/11/29 11:38:06] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006911754608154297
[2024/11/29 11:38:06] ppocr DEBUG: rec_res num  : 1, elapsed : 0.08450460433959961

0: 640x640 33.6ms
Speed: 3.7ms preprocess, 33.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:06] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006003856658935547
[2024/11/29 11:38:06] ppocr DEBUG: rec_res num  : 1, elapsed : 0.0767662525177002
[2024/11/29 11:38:06] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.00756525993347168
[2024/11/29 11:38:06] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06878018379211426
[2024/11/29 11:38:06] ppocr DEBUG: dt_boxes num : 1

[2024/11/29 11:38:08] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06671023368835449
[2024/11/29 11:38:08] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.008747577667236328
[2024/11/29 11:38:08] ppocr DEBUG: rec_res num  : 1, elapsed : 0.1202244758605957

0: 640x640 28.0ms
Speed: 7.5ms preprocess, 28.0ms inference, 6.9ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:09] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006061077117919922
[2024/11/29 11:38:09] ppocr DEBUG: rec_res num  : 1, elapsed : 0.07366299629211426
[2024/11/29 11:38:09] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006134986877441406
[2024/11/29 11:38:09] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06974411010742188
[2024/11/29 11:38:09] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.008107423782348633
[2024/11/29 11:38:09] ppocr DEBUG: rec_res num  : 1, elapsed : 0.0676422119140625
[2024/11/29 11:38:09] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006655693054199219
[2024/11/29 11:38:09] ppocr DEBUG: rec_res num  : 1


0: 640x640 34.2ms
Speed: 4.0ms preprocess, 34.2ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:11] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.008510828018188477
[2024/11/29 11:38:12] ppocr DEBUG: rec_res num  : 1, elapsed : 0.09075403213500977
[2024/11/29 11:38:12] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.007517337799072266
[2024/11/29 11:38:12] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06804704666137695
[2024/11/29 11:38:12] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.007515668869018555
[2024/11/29 11:38:12] ppocr DEBUG: rec_res num  : 1, elapsed : 0.07014298439025879
[2024/11/29 11:38:12] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.0070607662200927734
[2024/11/29 11:38:12] ppocr DEBUG: rec_res num  : 1, elapsed : 0.09294939041137695

0: 640x640 31.5ms
Speed: 3.0ms preprocess, 31.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:12] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.010580778121948242
[2024/11/29 11:

[2024/11/29 11:38:14] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.007513761520385742
[2024/11/29 11:38:14] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06628847122192383
[2024/11/29 11:38:14] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.007012128829956055
[2024/11/29 11:38:14] ppocr DEBUG: rec_res num  : 1, elapsed : 0.12591147422790527
[2024/11/29 11:38:14] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.1028909683227539
[2024/11/29 11:38:15] ppocr DEBUG: rec_res num  : 1, elapsed : 0.14222359657287598

0: 640x640 9.5ms
Speed: 2.0ms preprocess, 9.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:15] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.031568050384521484
[2024/11/29 11:38:15] ppocr DEBUG: rec_res num  : 1, elapsed : 0.10318827629089355
[2024/11/29 11:38:15] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.0335383415222168
[2024/11/29 11:38:15] ppocr DEBUG: rec_res num  : 1, elapsed : 0.07428884506225586
[2024/11/29 11:38:15] ppocr DEBUG: dt_boxes num : 1, el

[2024/11/29 11:38:17] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.006517648696899414
[2024/11/29 11:38:17] ppocr DEBUG: rec_res num  : 1, elapsed : 0.09234261512756348
[2024/11/29 11:38:17] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.005524158477783203
[2024/11/29 11:38:17] ppocr DEBUG: rec_res num  : 1, elapsed : 0.0964958667755127

0: 640x640 36.7ms
Speed: 4.5ms preprocess, 36.7ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 640)
[2024/11/29 11:38:17] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.030622482299804688
[2024/11/29 11:38:17] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06614804267883301
[2024/11/29 11:38:17] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.008195638656616211
[2024/11/29 11:38:18] ppocr DEBUG: rec_res num  : 1, elapsed : 0.08228468894958496
[2024/11/29 11:38:18] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.028588056564331055
[2024/11/29 11:38:18] ppocr DEBUG: rec_res num  : 1, elapsed : 0.06424093246459961
[2024/11/29 11:38:18] ppocr DEBUG: dt_boxes num : 