In [None]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import json
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [None]:
IMG_FOLDER = '../../download_from_drive/data/ProcessedO7'
ANNOTATION_FOLDER = '../annotations/Batch1/ds0/ann/'
ann_paths = [os.path.join(ANNOTATION_FOLDER, fn) for fn in os.listdir(ANNOTATION_FOLDER) if fn.endswith(".json")]
ann_paths[:3]

In [None]:
colors = [(255, 0, 255), (255, 0, 0), (0,255,0), (0,255,255), (0,0,255), (130,250,0), (250, 130, 0)]
ann_dict = {'filename':[], 'points':[], 'value':[]}

for ann_name in ann_paths[:]:
    
    # load json into a dict
    ann = json.load(open(ann_name, 'rb+'))    
    
    # read rows
    rows = [
        sorted(np.array(obj['points']['exterior']), key=lambda x: x[0])
        for obj in ann['objects'] if obj['classTitle']=='row_line'
    ]
    rows = np.array(rows)
    try:
        rows = np.array(sorted(rows, key = lambda x: x[0,1]))
    except:
        print('row', ann_name)
        continue
#     print("ROWS", row)
    
    # read cols
    columns = [
        sorted(np.array(obj['points']['exterior']), key=lambda x: x[1])
        for obj in ann['objects'] if obj['classTitle'].startswith('line')
    ]
    columns = np.array(columns)
    try:
        columns = np.array(sorted(columns, key = lambda x: x[0,0]))
    except:
        print('col', ann_name, columns)
        continue
#     print("COLUMNS", columns)
        
    cells = [
        {
            'point': np.array(obj['points']['exterior'][0]),
            'value': obj['tags'][0]['value']
        }
        for obj in ann['objects'] if ('line' not in obj['classTitle']) and (len(obj['tags']) > 0)
    ]
    cells = np.array(sorted(cells, key = lambda x: x['point'][1]))
    
    polygons, img_path = [], os.path.join(IMG_FOLDER, ann_name.split('/')[-1])
    for i in range(1, len(rows)):
        for j in range(1, len(columns)):
            weight = (j-1)%4
            y1 = np.average(rows[i-1][:,1], weights=[10.-weight, weight])
            y2 = np.average(rows[i][:,1], weights=[10.-weight, weight])
            weight = (i-1)
            x1 = round(np.average(columns[j-1][:,0], weights=[len(rows)-weight, weight]))
            x2 = round(np.average(columns[j][:,0], weights=[len(rows)-weight, weight]))
            poly = Polygon([(x1, y1), (x1, y2), (x2, y2), (x2, y1)])
            if int(y1) == int(y2):
                print('cos', ann_name, rows)
#             cv2.rectangle(img, (x1, int(y1)), (x2, int(y2)), colors[(i+j)%len(colors)], 2)
            polygons.append(poly)
        
    for poly in polygons:
        contain = 0
        for i, cell in enumerate(cells):
            x, y = cell['point']
            point = Point(x, y)
            if poly.contains(point) or (i==(len(cells)-1) and contain==0):
                polygon = poly
                contain = 1
            else:
                continue
            x1, y1, x2, y2 = np.round(poly.bounds).astype(int)

            delta_y = y2-y1
            delta_x = x2-x1
            y1 += 0.2 * delta_y
            y2 += 0.2 * delta_y
            x1 -= 0.1 * delta_x
            x2 += 0.1 * delta_x
            y1, y2 = round(y1), round(y2)
            x1, x2 = round(x1), round(x2)
#             cv2.rectangle(img, (x1, y1), (x2, y2), colors[i%len(colors)], 5)
            ann_dict['filename'].append(os.path.join(ANNOTATION_FOLDER, ann_name.split('/')[-1]))
            ann_dict['points'].append(tuple([x1,y1,x2,y2]))
            ann_dict['value'].append(cell.get('value') if poly.contains(point) else 'none')

#     plt.figure(figsize=(20, 20))
#     plt.imshow(img); plt.show()
#     break
ann_df = pd.DataFrame(ann_dict)
ann_df

In [None]:
for _ in range(5):
    rnd_ann_fn = ann_df.sample(1).filename.values[0]
    df = ann_df.query("filename == @rnd_ann_fn")
    img = cv2.imread(os.path.join(IMG_FOLDER, rnd_ann_fn.split("/")[-1].split(".json")[0]))
    for _, row in df.iterrows():
        x1, y1, x2, y2 = row.points
        rnd_color = tuple([np.random.rand()*255 for _ in range(3)])
        cv2.rectangle(img, (x1, y1), (x2, y2), rnd_color, 2)
    
    plt.figure(figsize=(12,12))
#     cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    plt.imshow(img)
    plt.axis("off")
    plt.show()

In [None]:
ann_df.to_pickle("cellDetection_annotations.pkl")