In [1]:
import os
import xml.etree.ElementTree as ET
import copy
import math
import random

# 读取标准xml

In [2]:
with open("base.xml", mode="r", encoding="utf-8") as f:
    tree = ET.parse(f)
tree

<xml.etree.ElementTree.ElementTree at 0x218dfeedc40>

## 获取root

In [3]:
root = tree.getroot()
root

<Element 'annotation' at 0x00000218DFF2D310>

## 查看root属性

In [4]:
root.tag

'annotation'

In [5]:
root.attrib

{}

## 查看root的子属性

In [6]:
root.find("filename").text

'000001.jpg'

## 查看多级属性

In [7]:
print(root.find("size").find('width').text)
print(root.find("size").find('height').text)
print(root.find("size").find('depth').text)

353
500
3


## 通过 `root.findall()` 或者 `root.iter()` 获取迭代器

In [8]:
def find_childrens(root):
    # findall和iter效果类似
    for obj in root.findall('object'):
        print(obj.find("name").text, obj.find("bndbox").find("xmin").text, \
              obj.find("bndbox").find("ymin").text, obj.find("bndbox").find("xmax").text, \
              obj.find("bndbox").find("ymax").text)

In [9]:
find_childrens(root)

dog 48 240 195 371


## 保存子节点

In [10]:
base_object = copy.deepcopy(root.find("object"))

print(base_object.find("name").text)
print(base_object.find("bndbox").find("xmin").text)
print(base_object.find("bndbox").find("ymin").text)
print(base_object.find("bndbox").find("xmax").text)
print(base_object.find("bndbox").find("ymax").text)

dog
48
240
195
371


## 删除子节点

In [11]:
# 注意: 如果需要多次使用root添加object，每次都要remove，不然就会产生多余的object(之前的全部object)
for o in root.findall("object"):
    root.remove(o)

## 缩进xml

In [12]:
def indent(elem, level=0):
    """缩进xml
    https://www.cnblogs.com/muffled/p/3462157.html
    """
    i = "\n" + level*"\t"
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "\t"
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i

## 插入多个相同的子节点，注意使用deepcopy，不然插入的总是最后一个

In [13]:
for i in range(3):
    # 注意: 如果需要多次使用root添加object，每次都要remove，不然就会产生多余的object(之前的全部object)
    for o in root.findall("object"):
        root.remove(o)

    for j in range(2):
        # 防止保存多个object时只保存最后一个
        temp_object = copy.deepcopy(base_object) # important!!!

        temp_object.find("name").text = "stu_" + str(i) + "_" + str(j)
        temp_object.find("bndbox").find("xmin").text = str(i + math.pi + j + math.pi + random.random() * 10)
        temp_object.find("bndbox").find("ymin").text = str(i - math.pi + j - math.pi + random.random() * 10)
        temp_object.find("bndbox").find("xmax").text = str(i * math.pi + j * math.pi + random.random() * 10)
        temp_object.find("bndbox").find("ymax").text = str(i / math.pi + j / math.pi + random.random() * 10)
        root.append(temp_object)

    find_childrens(root)

    indent(root)

    new_tree = ET.ElementTree(root)

    new_tree.write(f"test_{i}.xml")

stu_0_0 11.509278080496228 -4.879700884122333 3.9897202079244276 8.327216385083045
stu_0_1 8.163189254323486 1.7444920181936343 7.488525272420946 4.9143500677160965
stu_1_0 13.632433870883888 0.33280132462587986 13.07772831935247 3.4828325364739374
stu_1_1 11.221862206525984 -2.645012626159044 14.39651948625655 5.072257749627924
stu_2_0 17.535726234613662 -0.25957426445520415 10.98757110408636 4.534452398453864
stu_2_1 15.481282178068444 2.305244587682415 17.42240426988318 1.5139120978798501
