In [1]:
# https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here
# https://github.com/clovaai/CRAFT-pytorch

In [1]:
import json
import os
import numpy as np
import urllib.parse
from PIL import Image
import cv2
import types
from collections import namedtuple

In [2]:
b_path='/home/atogni/Desktop/ocr_gradient/craft_model/result/'
bboxes_path=urllib.parse.urljoin(b_path,'bboxes_infos.json')

In [3]:
with open(bboxes_path) as f:
    boxes=json.load(f)

In [26]:
im = Image.open(urllib.parse.urljoin(b_path, 'res_CH_JQugBNjc_2.jpg'))
width, height = im.size
im.show()

In [5]:
def extract_4bboxes_area_pxl(list_bboxes):
    "Extract area of a list of boxes in 4 vertex format"
    a=0.0
    for box in list_bboxes:
        dx=round(max(box[0][0], box[1][0]) - min(box[0][0], box[1][0]), 2)
        dy=round(max(box[0][1], box[3][1]) - min(box[0][1], box[3][1]), 2)
        a+=dx*dy
    return round(a, 0)

def show_cv_to_pil(cv_img):
    "Auxiliary function to fix opencv.show() not working on linux"
    cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
    im_pil = Image.fromarray(cv_img)
    im_pil.show()
    
def box_to_tl_br(box_vertices):
    """Transform 4-vertex box to named tuple:
    Box = namedtuple("Box", "top_left bottom_right dx dy")"""
    
    tl=[min([v[0] for v in box_vertices]), min([v[1] for v in box_vertices])]
    br=[max([v[0] for v in box_vertices]), max([v[1] for v in box_vertices])]
    dx=round(max(box_vertices[0][0], box_vertices[1][0]) - min(box_vertices[0][0], box_vertices[1][0]), 2)
    dy=round(max(box_vertices[0][1], box_vertices[3][1]) - min(box_vertices[0][1], box_vertices[3][1]), 2)
    
    return Box(tl, br, dx, dy)


def tl_br_list_to_centers(tl_dr_Idx_Box, round_to=int):
    "Extract boxes centers from list of Idx_Box named tuple"
    centers=[]
    for box in tl_dr_Idx_Box:
        if round_to >0:
            centers.append((box.id, [round(box.top_left[0] + 0.5*(box.dx), round_to), round(box.top_left[1] + 0.5*(box.dy) , round_to) ]))
        else:
            centers.append((box.id, [int(box.top_left[0] + 0.5*(box.dx)), int(box.top_left[1] + 0.5*(box.dy)) ]))
    return centers


def create_big_box_from_ids(ids, boxes_data):
    "Merge boxes from id list into a single box"
    to_bound=[]
    for box in boxes_data:
        if getattr(box, 'id') in ids: to_bound.append(box) 
    to_bound.sort(key=lambda a: a.top_left[0])
    TLx=to_bound[0].top_left[0]
    to_bound.sort(key=lambda a: a.top_left[1])
    TLy=to_bound[0].top_left[1]

    to_bound.sort(key=lambda a: a.bottom_right[0])
    BRx=to_bound[-1].bottom_right[0]
    to_bound.sort(key=lambda a: a.bottom_right[1])
    BRy=to_bound[-1].bottom_right[1]
    
    return [(int(TLx), int(TLy)), (int(BRx), int(BRy))]

def area_of_tl_br_box_list(tl_br_tup_lst):
    "Calculate area of a tl_dr box list"
    a=0.0
    for tl_br_tup in tl_br_tup_lst:
        #print(tl_br_tup)
        dx=round(max(tl_br_tup[0][0], tl_br_tup[1][0]) - min(tl_br_tup[0][0], tl_br_tup[1][0]), 0)
        dy=round(max(tl_br_tup[0][1], tl_br_tup[1][1]) - min(tl_br_tup[0][1], tl_br_tup[1][1]), 0)
        a+= int(dx*dy)
    return a

In [6]:
f"Pct area covered by text boxes: {round((extract_bboxes_area_pxl(boxes['CH_JQugBNjc_2.jpg']) / (width*height))*100, 2)}%"


'Pct area covered by boxes: 15.82%'

In [7]:
Box = namedtuple("Box", "top_left bottom_right dx dy")
Idx_Box=namedtuple("Idx_Box", "id top_left bottom_right dx dy")

In [9]:
tldr_boxes={}
n=0
for k, v in boxes.items():
    tldr_boxes[k]=[box_to_tl_br(box_vertices=box) for box in v]
    
for k, v in tldr_boxes.items():
    for i in range(len(v)):
        v[i]=Idx_Box(i, *v[i])

In [10]:
b_list=tldr_boxes['CH_JQugBNjc_2.jpg']

In [11]:
b_list

[Idx_Box(id=0, top_left=[271.4495544433594, 61.836456298828125], bottom_right=[543.1077270507812, 100.0404281616211], dx=270.89, dy=31.68),
 Idx_Box(id=1, top_left=[272.0000305175781, 98.66665649414062], bottom_right=[572.0, 130.66665649414062], dx=300.0, dy=32.0),
 Idx_Box(id=2, top_left=[41.33333206176758, 230.6666717529297], bottom_right=[250.6666717529297, 258.6666564941406], dx=209.33, dy=28.0),
 Idx_Box(id=3, top_left=[40.0, 268.0], bottom_right=[166.6666717529297, 296.0], dx=126.67, dy=28.0),
 Idx_Box(id=4, top_left=[37.33333206176758, 304.0], bottom_right=[202.6666717529297, 336.0], dx=165.33, dy=32.0),
 Idx_Box(id=5, top_left=[37.26239776611328, 331.88360595703125], bottom_right=[181.06959533691406, 371.5979919433594], dx=141.64, dy=29.22),
 Idx_Box(id=6, top_left=[347.3746643066406, 402.5814208984375], bottom_right=[563.505126953125, 440.4438781738281], dx=215.1, dy=30.6),
 Idx_Box(id=7, top_left=[346.6666564941406, 441.3333435058594], bottom_right=[508.0, 469.3333435058594],

In [34]:
def group_closest_boxes(b_list, y_toll):
    if len(b_list) >1:
        ids_grouped=[]
        for i in range(0, len(b_list)-1):
            if abs(b_list[i].top_left[1] - b_list[i+1].top_left[1]) < y_toll:
                ids_grouped.append(b_list[i].id)
            else: 
                ids_grouped.append(b_list[i].id)
                break

        if len(ids_grouped) < len(b_list):
            try:
                remaining=b_list[max(ids_grouped)+1:]
            except:
                remaining= []
        return ids_grouped, remaining
    else:
        ids_grouped=b_list[0].id
        return ids_grouped, None
        

def iterate_search(bboxes, groups):
    g, r = group_closest_boxes(bboxes, 50)
    groups.append(g)
    if len(r)>1:
        return True
    else:
        return False
    

In [45]:
g, r = group_closest_boxes(b_list, 50)


In [47]:
g, r = group_closest_boxes(r, 50)

In [48]:
r

[]

In [36]:
group_closest_boxes(r, 50)

([8, 9, 10, 11, 12, 13], [])

In [39]:
check=True
groups=[]
max_groups=10
while check:
        g, r = group_closest_boxes(b_list, 50)
        groups.append(g)
        if len(r)>1:
            check = iterate_search(bboxes=r, groups=groups)
        else:
            check=False

big_boxes=[]            
for g in groups: 
    big_boxes.append(create_big_box_from_ids(g, tldr_boxes['CH_JQugBNjc_2.jpg']))


In [40]:
big_boxes

[[(270.08740234375, 63.959259033203125),
  (573.3157348632812, 132.0764923095703)],
 [(37.26239776611328, 230.66664123535156),
  (251.9999542236328, 371.5979919433594)]]

In [42]:
tldr_boxes['CH_JQugBNjc_2.jpg']


[Idx_Box(id=0, top_left=[328.0, 65.33333587646484], bottom_right=[385.3333435058594, 92.0], dx=57.33, dy=26.67),
 Idx_Box(id=1, top_left=[388.39892578125, 63.959259033203125], bottom_right=[516.8175659179688, 100.00591278076172], dx=127.07, dy=30.4),
 Idx_Box(id=2, top_left=[272.0, 66.66666412353516], bottom_right=[324.0, 92.0], dx=52.0, dy=25.33),
 Idx_Box(id=3, top_left=[518.6666870117188, 68.0], bottom_right=[542.6666870117188, 92.0], dx=24.0, dy=24.0),
 Idx_Box(id=4, top_left=[270.08740234375, 94.49179077148438], bottom_right=[364.83056640625, 132.0764923095703], dx=92.09, dy=29.21),
 Idx_Box(id=5, top_left=[473.0207214355469, 96.74925994873047], bottom_right=[573.3157348632812, 131.62832641601562], dx=98.41, dy=28.32),
 Idx_Box(id=6, top_left=[438.6666564941406, 102.66666412353516], bottom_right=[469.3333435058594, 125.33333587646484], dx=30.67, dy=22.67),
 Idx_Box(id=7, top_left=[369.3333435058594, 104.0], bottom_right=[436.0, 130.6666717529297], dx=66.67, dy=26.67),
 Idx_Box(id=

### Solution: MeanShift clustering

In [62]:
clustered_box=namedtuple("clustered_box", "id top_left bottom_right dx dy cluster_id") 

In [109]:
from sklearn.cluster import MeanShift
from collections import Counter, defaultdict

In [110]:
centers=tl_br_to_centers(b_list, round_to=-1)

In [111]:
X = np.array([c[1] for c in centers])
clst = MeanShift(bandwidth=None).fit(X)

In [112]:
cluster_id_to_box_id=list(zip(clst.labels_, [c[0] for c in centers] ))
clustered_boxes = collections.defaultdict(list)
for x in cluster_id_to_box_id:
    clustered_boxes[x[0]].append(x[1])
clustered_boxes_ids=dict(clustered_boxes)

In [113]:
B=[]
for k, v in clustered_boxes_ids.items():
    B.append(create_big_box_from_ids(v, tldr_boxes['CH_JQugBNjc_2.jpg']))

In [120]:
# Showing results
orig_img=cv2.imread(urllib.parse.urljoin(b_path, 'res_CH_JQugBNjc_2.jpg'))
big_boxes_img=orig_img.copy()
for r in B:
    cv2.rectangle(big_boxes_img, r[0], r[1], (255, 0, 0), 2)
show_cv_to_pil(big_boxes_img)

In [146]:
B

[[(271, 61), (572, 130)], [(37, 230), (250, 371)], [(346, 402), (563, 540)]]

In [149]:
f"[enhanced] Pct area covered by text boxes: {area_of_tl_br_box_list(B)/(600**2)*100}%"

'[enhanced] Pct area covered by text boxes: 22.43%'