In [1]:
import os
import scipy.io as io
import json
import numpy as np

### 把totaltext的mat格式的标注转换成common格式

In [22]:
valid_set = set(range(10)) | set('qazwsxedcrfvtgbyhnujmikolp')

def read_mat(path: str) -> tuple:
    assert os.path.exists(path), f'"{path}" not exist'
    raw_data = io.loadmat(path)
    polys, texts = [], []
    num_valid, num_ignore, num_invalid, num_special = 0, 0, 0, 0
    for i, info in enumerate(raw_data['polygt']):
        num_points = info[1].shape[1]
        if num_points > 1:
            polys.append(np.concatenate([info[1], info[3]]).T.tolist())
            if info[5] == '#':
                texts.append('###')
                num_ignore += 1
            else:
                texts.append(''.join(info[4].tolist()))
                num_valid += 1
                if len(set(texts[-1].lower()) - valid_set) > 0:
                    num_special += 1
        else:
            num_invalid += 1
    return polys, texts, num_valid, num_ignore, num_invalid, num_special

In [23]:
ann_root = '../data/total_text/test/Groundtruth/'
with open('test_mat.txt', 'w') as fw:
    total_valid, total_ignore, total_invalid, total_special = 0, 0, 0, 0
    for filename in os.listdir(ann_root):
        polys, texts, num_valid_gt, num_ignore, num_invalid, num_special = read_mat(os.path.join(ann_root, filename))
        total_valid += num_valid_gt
        total_ignore += num_ignore
        total_invalid += num_invalid
        total_special += num_special
        basename = os.path.basename(filename)[8:-4]
        data = []
        for poly, text in zip(polys, texts):
            data.append(dict(transcription=text, points=poly))
        fw.write(f'rgb/{basename}.jpg\t' + json.dumps(data, ensure_ascii=False) + '\n')
    print(f"total valid: {total_valid}, total ignore: {total_ignore}, total invalid(#): {total_invalid}, total special(-,'.): {total_special}")

total valid: 2204, total ignore: 342, total invalid(#): 2, total special(-,'.): 216


  if info[5] == '#':


In [None]:
raw_data = io.loadmat('../data/total_text/test/Groundtruth/poly_gt_img897.mat')
raw_data

### 从icdar2019中提取仅包含英文和数字的图片
注意：icdar标注并非严格按照totaltext格式上下两两配对

In [2]:
raw_labels = json.load(open('../data/ICDAR2019/train/train_labels.json'))

In [3]:
raw_labels['gt_0']

[{'transcription': 'EST',
  'points': [[1004, 689], [1095, 748], [1094, 774], [1007, 721]],
  'language': 'Latin',
  'illegibility': False},
 {'transcription': '1972',
  'points': [[1102, 755], [1191, 811], [1193, 837], [1107, 784]],
  'language': 'Latin',
  'illegibility': False},
 {'transcription': 'PoPeYes',
  'points': [[574, 521],
   [768, 585],
   [1050, 798],
   [1200, 894],
   [1386, 1018],
   [1542, 1118],
   [1599, 1295],
   [1432, 1196],
   [1300, 1114],
   [1112, 1015],
   [896, 901],
   [760, 842]],
  'language': 'Latin',
  'illegibility': False},
 {'transcription': '',
  'points': [[1606, 1308], [1627, 1321], [1630, 1344], [1607, 1333]],
  'language': 'Latin',
  'illegibility': False},
 {'transcription': 'BISCUITS',
  'points': [[1269, 1147],
   [1378, 1204],
   [1513, 1290],
   [1605, 1354],
   [1624, 1449],
   [1515, 1382],
   [1403, 1311],
   [1289, 1237]],
  'language': 'Latin',
  'illegibility': False},
 {'transcription': '###',
  'points': [[1195, 1109], [1258, 1142

In [5]:
subset_en = {}
for img_name, anns in raw_labels.items():
    for ann in anns:
        if ann['language'] != 'Latin':
            break
    else:
        subset_en[img_name] = anns
len(raw_labels), len(subset_en)

(5603, 2846)

In [8]:
with open('det_gt_en.txt', 'w') as fw:
    for img_name in sorted(list(subset_en.keys()), key=lambda x: int(x[3:])):
        fw.write(img_name + ".jpg\t" + json.dumps(subset_en[img_name], ensure_ascii=False) + "\n")