In [1]:
from ultralytics import YOLO
from PIL import Image, ImageDraw
import pathlib
import glob
import tqdm
import cv2
import numpy as np

In [27]:
def get_yolo_preds(img, docseg_model, thresh, shrink_ht = 1, shrink_wt = 1):
    image = cv2.imread(img, 0)
    orig_image = image.copy()
    # BGR to RGB
    image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
    # Resize the image
    height, width, _ = image.shape
    dets = []
    results = docseg_model(image, save=True, show_labels=False, show_conf=False, show_boxes=True, conf = conf_thresh)
    #results[0].save(filename = f'/home/dhruv/Projects/TD-Results/YOLO/{dataset}/{mode}/' + img.split('/')[-1])
    for entry in results:
        bboxes = entry.boxes.xyxy.numpy()
        classes = entry.boxes.cls.numpy()
        conf = entry.boxes.conf.numpy()
        for i in range(len(bboxes)):
            box = bboxes[i]
            if conf[i] > thresh:
                dets.append([0, box[1], width, box[3]])
    return dets

In [121]:
# Load the YOLO model
docseg_model = YOLO('model/yolo-row-300.pt')
docseg_model.overrides['iou'] = 0.2  # NMS IoU threshold
conf_thresh = 0.5

### Post-processing

In [122]:
import cv2

In [123]:
def draw_bboxes(img_file, bboxes, color = (255, 0, 255), thickness= 2):
    image = cv2.imread(img_file)
    for b in bboxes:
        start_point = (int(b[0]), int(b[1]))
        end_point = (int(b[2]), int(b[3]))
        image = cv2.rectangle(image, start_point, end_point, color, thickness)
    return image

In [141]:
def post_process_dets(height, width, dets, thresh):
    ys = []
    # First one y is 0
    dets[0][1] = 0
    # Last one y is height
    dets[-1][1] = height 
    for d in dets:
        ys.append(int(d[1]))
        ys.append(int(d[3]))
    ys.sort()
    final_ys = []
    for i in range(len(ys[:-1])):
        if ys[i + 1] - ys[i] > thresh:
            final_ys.append(ys[i])
    final_ys.append(height)
    print(final_ys)
    res = []
    for i in range(len(final_ys[:-1])):
        res.append([0, final_ys[i], width, final_ys[i + 1]])
    print(res)
    return res

In [145]:
# Single Image Function
def get_yolo_rows(img_file):
    dets = get_yolo_preds(img_file, docseg_model, conf_thresh)
    image = cv2.imread(img_file)
    ht, wt, _ = image.shape
    processed_dets = post_process_dets(ht, wt, dets, int(ht * 0.05))
    return processed_dets

In [142]:
# List of sample images to process
img_path = '*.png'
img_list = glob.glob(img_path)
print(len(img_list))

11


In [146]:
get_yolo_rows(img_list[0])


0: 288x640 10 table rows, 34.4ms
Speed: 2.2ms preprocess, 34.4ms inference, 0.8ms postprocess per image at shape (1, 3, 288, 640)
Results saved to [1mruns/detect/predict9[0m
[2, 66, 110, 158, 233, 275, 317, 366, 440, 525, 628]
[[0, 2, 1509, 66], [0, 66, 1509, 110], [0, 110, 1509, 158], [0, 158, 1509, 233], [0, 233, 1509, 275], [0, 275, 1509, 317], [0, 317, 1509, 366], [0, 366, 1509, 440], [0, 440, 1509, 525], [0, 525, 1509, 628]]


[[0, 2, 1509, 66],
 [0, 66, 1509, 110],
 [0, 110, 1509, 158],
 [0, 158, 1509, 233],
 [0, 233, 1509, 275],
 [0, 275, 1509, 317],
 [0, 317, 1509, 366],
 [0, 366, 1509, 440],
 [0, 440, 1509, 525],
 [0, 525, 1509, 628]]

In [144]:
for i in range(len(img_list)):
    img_file = img_list[i]
    dets = get_yolo_preds(img_file, docseg_model, conf_thresh)
    image = cv2.imread(img_file)
    ht, wt, _ = image.shape
    processed_dets = post_process_dets(ht, wt, dets, int(ht * 0.05))
    final_img = draw_bboxes(img_file, processed_dets)
    cv2.imwrite('yolo-rows-' + str(i + 1) + '.jpg', final_img)


0: 288x640 10 table rows, 33.2ms
Speed: 2.5ms preprocess, 33.2ms inference, 1.1ms postprocess per image at shape (1, 3, 288, 640)
Results saved to [1mruns/detect/predict9[0m
[2, 66, 110, 158, 233, 275, 317, 366, 440, 525, 628]
[[0, 2, 1509, 66], [0, 66, 1509, 110], [0, 110, 1509, 158], [0, 158, 1509, 233], [0, 233, 1509, 275], [0, 275, 1509, 317], [0, 317, 1509, 366], [0, 366, 1509, 440], [0, 440, 1509, 525], [0, 525, 1509, 628]]

0: 288x640 12 table rows, 29.8ms
Speed: 2.3ms preprocess, 29.8ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)
Results saved to [1mruns/detect/predict9[0m
[2, 61, 110, 156, 232, 273, 316, 363, 440, 525, 574, 635]
[[0, 2, 1533, 61], [0, 61, 1533, 110], [0, 110, 1533, 156], [0, 156, 1533, 232], [0, 232, 1533, 273], [0, 273, 1533, 316], [0, 316, 1533, 363], [0, 363, 1533, 440], [0, 440, 1533, 525], [0, 525, 1533, 574], [0, 574, 1533, 635]]

0: 288x640 10 table rows, 31.2ms
Speed: 4.2ms preprocess, 31.2ms inference, 0.6ms postprocess per i