In [1]:
import json
import os
import xml.etree.ElementTree as ET
import copy
from tqdm import tqdm
from collections import Counter

In [2]:
def indent(elem, level=0):
    """缩进xml
    https://www.cnblogs.com/muffled/p/3462157.html
    """
    i = "\n" + level*"\t"
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "\t"
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i

In [3]:

def json2xml(json_dir: str, xml_dir: str):
    # 获取json列表
    json_list = os.listdir(json_dir)
    json_list = [i for i in json_list if i.endswith(".json")]
    json_list = sorted(json_list)

    # 获取img列表
    # img_list = os.listdir(img_dir)
    # img_list = [i for i in img_list if i.endswith((".jpg", ".jpeg", ".JPG", ".JPEG"))]
    # img_list = sorted(img_list)

    # assert len(json_list) == len(img_list), "json文件数量与图片数量不同"

    if not os.path.exists("xml"):
        os.makedirs("xml")
        print("make `xml` dir")


    with open("base.xml", mode="r", encoding="utf-8") as f:
        tree = ET.parse(f)
    root = tree.getroot()
    # 获取临时object
    base_object = copy.deepcopy(root.find("object"))

    labels = []

    # 遍历每个json和图片
    # for js, img in tqdm(zip(json_list, img_list), total=len(json_list)):
    for js in tqdm(json_list, total=len(json_list)):
        # assert js[:-5] == img[:-4], "json文件数量与图片名字不匹配"

        # 删除全部的object
        for o in root.findall("object"):
            root.remove(o)

        # 保存图片名称
        # root.find("filename").text = img

        file_path = os.path.join("json", js)
        with open(file_path) as f:
            d: dict = json.load(f)

        root.find("filename").text = d["imagePath"]

        # 保存图片大小通道
        root.find("size").find('width').text = str(d["imageWidth"])
        root.find("size").find('height').text = str(d["imageHeight"])
        root.find("size").find('depth').text = str(3)

        # 循环遍历保存框
        rectangles = d["shapes"]
        for rectange in rectangles:
            # 将坐标调整为左上角右下角形式
            x1 = rectange["points"][0][0]
            y1 = rectange["points"][0][1]
            x2 = rectange["points"][1][0]
            y2 = rectange["points"][1][1]

            xmin = round(min(x1, x2))
            ymin = round(min(y1, y2))
            xmax = round(max(x1, x2))
            ymax = round(max(y1, y2))
            # print(xmin, ymin, xmax, ymax)

            # 需要重新copy,不然多个框只会保存最后一个
            temp_object = copy.deepcopy(base_object)
            # 保存类别名称和坐标
            temp_object.find("name").text = rectange["label"]
            # 记录label
            labels.append(rectange["label"])

            temp_object.find("bndbox").find("xmin").text = str(xmin)
            temp_object.find("bndbox").find("ymin").text = str(ymin)
            temp_object.find("bndbox").find("xmax").text = str(xmax)
            temp_object.find("bndbox").find("ymax").text = str(ymax)

            # 将框保存起来
            root.append(temp_object)

        # for o in root.findall("object"):
        #     print(o.find("bndbox").find("xmin").text, o.find("bndbox").find("ymin").text, o.find("bndbox").find("xmax").text, o.find("bndbox").find("ymax").text)

        # 缩进root
        indent(root)
        new_tree = ET.ElementTree(root)
        xml_path = os.path.join(xml_dir, js[:-5]+".xml")
        new_tree.write(xml_path)

    # label
    counter = Counter(labels)
    # return counter list
    counter = sorted(counter.items(), key=lambda x:x[0])

    for k, v in counter:
        print(k, "=>", v)

    with open("label.txt", "w", encoding="utf-8") as f:
        for k, v in counter:
            f.write(k+"\n")
    return counter


In [4]:
counter = json2xml("json", "xml")

100%|██████████| 10/10 [00:00<00:00, 232.55it/s]

mian_hua_lao_ye_huang_hua => 3
mian_hua_ye_pian_lao_hua => 16





In [5]:
counter

[('mian_hua_lao_ye_huang_hua', 3), ('mian_hua_ye_pian_lao_hua', 16)]