In [14]:
from argparse import ArgumentParser
import mmcv
import numpy as np
import os
import os.path as osp
import numpy as np
from shapely.geometry import Polygon, LineString, MultiLineString, GeometryCollection
from shapely.algorithms.polylabel import polylabel
import math
import cv2
import json
import pickle

def load_ann(filename):
    with open(filename) as f:
        texts = f.read().split('x: [[')[1:]
    num_objs = len(texts)
    polygon_list = []
    for i in range(num_objs):
        try:
            text = texts[i].split(',')
            x = text[0][:-2].split()
            x = np.array(x, dtype = np.float32)
            y = text[1][6:-2].split()
            y = np.array(y, dtype = np.float32)
            box = np.hstack((x[:, np.newaxis], y[:, np.newaxis]))
            hard = (text[-1][20:-3] == '#')
            polygon = Polygon(box)
        except Exception as err:
            print(filename, texts[i], err)
        polygon_list.append([polygon, hard])
    return polygon_list

def load_pred(filename):
    with open(filename, 'rb') as f:
        array = pickle.load(f)
    pred_polygon_list = []
    for pred_per_sample in array:
        polygon_list = []
        for pred_one_out in pred_per_sample:
            coords = pred_one_out[:-1]
            score = float(pred_one_out[-1])
            if score < 0.995:
                continue
            polygon = Polygon(coords.reshape((-1, 2)))
            polygon_list.append(polygon)
        pred_polygon_list.append(polygon_list)
    return pred_polygon_list

def polygon_iou(poly1, poly2):
    """
    Intersection over union between two shapely polygons.
    """
    iou = 0.0
    if poly1.intersects(poly2):
        try:
            inter_area = poly1.intersection(poly2).area
            union_area = poly1.area + poly2.area - inter_area
            iou = float(inter_area) / union_area
        except Exception as err:
            iou = 0.5
#             print(poly1)
    return iou

def evaluation(pred_polygon_list, gt_polygon_list):
    assert len(pred_polygon_list)==len(gt_polygon_list), "sample numbers should be same"
    num_gt = 0
    num_pred = 0
    num_gt_correct = 0
    num_pred_correct = 0
    for sample_idx in range(len(pred_polygon_list)):
        gt_polygons = gt_polygon_list[sample_idx]
        pred_polygons = pred_polygon_list[sample_idx]
        num_gt += len(gt_polygons)
        num_pred += len(pred_polygons)
        for gt_polygon, hard in gt_polygons:
            num_gt -= 1 if hard else 0
            gt_correct = False
            for pred_polygon in pred_polygons:
                iou = polygon_iou(gt_polygon, pred_polygon)
                if iou >= 0.5:
                    num_pred_correct += 1 if not hard else 0
                    num_pred -= 1 if hard else 0
                    gt_correct = True
            num_gt_correct += 1 if (gt_correct and not hard) else 0
    p = 1.0 * num_pred_correct / max(num_pred, 1e-10)
    r = 1.0 * num_gt_correct / max(num_gt, 1e-10)
    f = 2.0 * p * r / max(p + r, 1e-10)
    print("num_pred correct/all (%d/%d); num_gt correct/all (%d/%d)"%(num_pred_correct, num_pred, num_gt_correct, num_gt))
    print("precision: %f%%"%(p*100.0))
    print("recall: %f%%"%(r*100.0))
    print("f-measure: %f%%"%(f*100.0))
        
# load pred pickle
pickle_file = '../../experiments/total_33d/total_33d_05022133.pkl' #wff_total_04031144'/home/wff/curve-text/' + 'total_with_pretrain.pkl'
pred_polygon_list = load_pred(pickle_file)
# load gt ann
img_prefix = '/home/wff/curve-text/data/TotalText/'
test_file = os.path.join(img_prefix, 'ImageSets/Main/test.txt')
img_ids = mmcv.list_from_file(test_file)
gt_polygon_list = []
for img_id in img_ids:
    ann_file = os.path.join(img_prefix, 'Annotations', img_id + '.txt')
    gt_polygon_list.append(load_ann(ann_file))
# evaluation
result = evaluation(pred_polygon_list, gt_polygon_list)

num_pred correct/all (1692/2210); num_gt correct/all (1692/2215)
precision: 76.561086%
recall: 76.388262%
f-measure: 76.474576%


In [1]:
from argparse import ArgumentParser
import mmcv
import numpy as np
import os
import os.path as osp
import numpy as np
from shapely.geometry import Polygon, LineString, MultiLineString, GeometryCollection
from shapely.algorithms.polylabel import polylabel
import math
import cv2
import json
import pickle

# load gt ann
import hashlib
total_prefix = '/home/wff/curve-text/data/TotalText/'
test_file = os.path.join(total_prefix, 'ImageSets/Main/test.txt')
total_ids = mmcv.list_from_file(test_file)
total = dict()
for i in total_ids:
    total_jpg = os.path.join(total_prefix, 'JPGImages', i + '.jpg')
    file_md5 = hashlib.md5(open(total_jpg, 'rb').read()).hexdigest()
    total[file_md5] = total_jpg
print('total done', len(total))

art_prefix = '/home/wff/curve-text/data/ArT/'
train_file = os.path.join(art_prefix, 'ImageSets/Main/train.txt')
art_ids = mmcv.list_from_file(train_file)
art = dict()
for j in art_ids:
    art_jpg = os.path.join(art_prefix, 'JPGImages', j + '.jpg')
    file_md5 = hashlib.md5(open(art_jpg, 'rb').read()).hexdigest()
    art[file_md5] = art_jpg
print('art done', len(art))

total done 300
art done 5591


In [6]:
for key in total:
    if key in art:
        print(total[key], art[key])
print("compare test done")

compare test done


In [8]:
total_prefix = '/home/wff/curve-text/data/TotalText/'
train_file = os.path.join(total_prefix, 'ImageSets/Main/train.txt')
total_ids = mmcv.list_from_file(train_file)
total = dict()
for i in total_ids:
    total_jpg = os.path.join(total_prefix, 'JPGImages', i + '.jpg')
    file_md5 = hashlib.md5(open(total_jpg, 'rb').read()).hexdigest()
    total[file_md5] = total_jpg
print('total done', len(total))
for key in total:
    if key in art:
        print(total[key], art[key])
print("compare train done")

total done 1255
compare train done


In [None]:
total_prefix = '/home/wff/curve-text/data/CTW1500/'
train_file = os.path.join(total_prefix, 'ImageSets/Main/train.txt')
total_ids = mmcv.list_from_file(train_file)
total = dict()
for i in total_ids:
    total_jpg = os.path.join(total_prefix, 'JPGImages', i + '.jpg')
    file_md5 = hashlib.md5(open(total_jpg, 'rb').read()).hexdigest()
    total[file_md5] = total_jpg
print('CTW1500 done', len(total))
for key in total:
    if key in art:
        print(total[key], art[key])
print("compare train done")

In [9]:
for i in range(len(total)):
    for j in range(len(art)):
        if total[i].shape == art[j].shape:
            if (total[i] == art[j]).all():
                print('same picture:',total_ids[i], art_ids[j])

In [10]:
total_prefix = '/home/wff/curve-text/data/TotalText/'
total_train_file = os.path.join(total_prefix, 'ImageSets/Main/train.txt')
total_train_ids = mmcv.list_from_file(total_train_file)
total_train = []
for i in total_train_ids:
    total_jpg = os.path.join(total_prefix, 'JPGImages', i + '.jpg')
    total_train.append(cv2.imread(total_jpg))
print('total train done')

total train done


In [11]:
for i in range(len(total_train)):
    for j in range(len(art)):
        if total_train[i].shape == art[j].shape:
            if (total_train[i] == art[j]).all():
                print('same picture:',total_train_ids[i], art_ids[j])

In [None]:
art_prefix = '/home/wff/curve-text/data/ArT/'
art_test_file = os.path.join(art_prefix, 'ImageSets/Main/test.txt')
art_test_ids = mmcv.list_from_file(art_test_file)
art_test = []
for j in art_test_ids:
    art_jpg = os.path.join(art_prefix, 'JPGImages', j + '.jpg')
    art_test.append(cv2.imread(art_jpg))
print('art test done')
