In [1]:
import detectron2
from detectron2.utils.logger import setup_logger

setup_logger()
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# import some common libraries
import numpy as np
import os, json, cv2, random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from matplotlib import pyplot as plt
import glob
from detectron2.data.datasets.coco import convert_to_coco_json

from vis import *
from dataset_bjtu import *
register_coco_instances("bjtu_train_washed", {}, "BJTU_washed/train.json", ".")
register_coco_instances("bjtu_test_washed", {}, "BJTU_washed/test.json", ".")

In [2]:
dataset_dicts = DatasetCatalog.get("bjtu_train_washed")

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[06/18 23:30:54 d2.data.datasets.coco]: [0mLoaded 562 images in COCO format from BJTU_washed/train.json


In [None]:
import json
with open("RCTW_17/dsdl/dsdl_OCR_full/set-train/train_samples.json", "r") as read_file:
    bjtu = json.load(read_file)

bjtu["samples"][0]

In [16]:
import json
with open("RCTW_17/dsdl/dsdl_OCR_full/set-train/train_samples.json", "r") as read_file:
    data = json.load(read_file)

data["samples"][1]

{'image': {'img_path': 'train_images/image_1.jpg',
  'image_shape': [2448, 2448]},
 'instances': [{'polygon': [[[1275.0, 1139.0],
     [1483.0, 1131.0],
     [1485.0, 1213.0],
     [1277.0, 1220.0]]],
   'text': '川·M',
   'difficult': False},
  {'polygon': [[[1242.0, 1220.0],
     [1534.0, 1213.0],
     [1535.0, 1293.0],
     [1243.0, 1300.0]]],
   'text': 'A07126',
   'difficult': False},
  {'polygon': [[[1187.0, 1359.0],
     [1588.0, 1347.0],
     [1593.0, 1511.0],
     [1192.0, 1523.0]]],
   'text': '大地四海',
   'difficult': False},
  {'polygon': [[[1211.0, 1500.0],
     [1599.0, 1503.0],
     [1599.0, 1600.0],
     [1211.0, 1597.0]]],
   'text': '电动车商城',
   'difficult': False},
  {'polygon': [[[1227.0, 1597.0],
     [1610.0, 1602.0],
     [1609.0, 1649.0],
     [1226.0, 1644.0]]],
   'text': '好品牌应有尽有',
   'difficult': False},
  {'polygon': [[[1236.0, 1640.0],
     [1616.0, 1647.0],
     [1616.0, 1678.0],
     [1235.0, 1671.0]]],
   'text': '资阳西门原松涛车站内',
   'difficult': False},
  {'p

In [5]:
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
import cv2
import os 
import tqdm

def get_rctw_dicts(img_dir):
    with open("RCTW_17/dsdl/dsdl_OCR_full/set-train/train_samples.json", "r") as read_file:
        data = json.load(read_file)
    dataset_dicts = []
    data = data["samples"]
    for idx, entry in tqdm.tqdm(enumerate(data)):
        img_path = entry["image"]["img_path"]
        h, w = cv2.imread(os.path.join("RCTW_17/raw",img_path)).shape[:2]
        record = {}
        record["file_name"] = img_path
        record["image_id"] = idx
        record["height"] = h
        record["width"] = w
        annotations = []
        for objs in entry["instances"]:
            polygon = objs["polygon"][0]
            xs = [point[0] for point in polygon]
            ys = [point[1] for point in polygon]
            x_min, x_max, y_min, y_max = min(xs), max(xs), min(ys), max(ys)
            obj = {
                "bbox": [x_min, y_min, x_max, y_max],
                "bbox_mode": BoxMode.XYXY_ABS,
                "category_id": 0,
            }
            annotations.append(obj)
        record["annotations"] = annotations
        dataset_dicts.append(record)
    return dataset_dicts
DatasetCatalog.register("rctw_train", lambda: get_rctw_dicts("RCTW_17/raw"))
MetadataCatalog.get("rctw_train").set(thing_classes=["text"])

AssertionError: Dataset 'rctw_train' is already registered!

In [6]:
from detectron2.data.datasets.coco import convert_to_coco_json
import json
convert_to_coco_json("rctw_train", "./rctw_train.json",allow_cached=True)


8034it [03:37, 36.95it/s]


In [None]:
def get_bjtu_dicts(img_dir):
    img_ext = ['jpg', 'png', 'jpeg', 'webp']
    img_files=sorted([filename for ext in img_ext for filename in glob.glob(img_dir + '/**/*.' + ext,recursive=True) ])
    json_files = sorted([filename for filename in glob.glob(img_dir + '/**/*.json',recursive=True) ])
    dataset_dicts = []
    for idx, (img_path, json_path) in enumerate(zip(img_files, json_files)):
        assert img_path.split('/')[-1].split('.')[0] == json_path.split('/')[-1].split('.')[0] # json-img 1-1 correspondence
        with open(json_path) as f:
            imgs_anns = json.load(f)
        record = {}
        height, width = cv2.imread(img_path).shape[:2]
        record["file_name"] = img_path
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
        annotations = []
        
        shapes = imgs_anns['shapes']
        if len(shapes) != 1:
            print(f"{len(shapes)} shapes in {img_path}")
            continue
        if shapes[0]["shape_type"] != "rectangle":
            print(f"{shapes[0]['shape_type']} shape_type in {img_path}")
            continue
        label = shapes[0]["label"]
        points = shapes[0]["points"]
        x_min, x_max = int(min(points[0][0], points[1][0])), int(max(points[0][0], points[1][0]))
        y_min, y_max = int(min(points[0][1], points[1][1])), int(max(points[0][1], points[1][1]))
        obj = {
            "bbox": [x_min, y_min, x_max, y_max],
            "bbox_mode": BoxMode.XYXY_ABS,
            "category_id": label2id[label],
        }
        record["annotations"] = [obj]
        dataset_dicts.append(record)

    return dataset_dicts

In [None]:

detectron2.data.datasets.load_coco_json("RCTW_17/dsdl/dsdl_OCR_full/set-train/train_samples.json", "RCTW_17/raw", "RCTW_train")