In [1]:
plays_xml = """<plays completeness="incomplete">
<comedies>
    <play year="1602">All's Well That Ends Well</play>
    <play year="1595">Midsummer Night's Dream</play>
</comedies>
<histories>
    <play year="1598">Henry V</play>
    <play year="1592">Richard III</play>
</histories>
<tragedies>
    <play year="1605">Macbeth</play>
    <play year="1593">Titus Andronicus</play>
</tragedies>
</plays>"""

In [2]:
len(plays_xml)

384

In [6]:
plays_xml[0:45]

'<plays completeness="incomplete">\n<comedies>\n'

In [7]:
import xml.etree.ElementTree as ET

In [10]:
doc = ET.fromstring(plays_xml)
print(doc)

<Element 'plays' at 0x7f0dc84dfc90>


In [11]:
print(type(doc))

<class 'xml.etree.ElementTree.Element'>


In [12]:
len(doc)

3

In [13]:
print(doc.tag)

plays


In [14]:
ET.tostring(doc)[0:45]

b'<plays completeness="incomplete">\n<comedies>\n'

In [15]:
ET.dump(doc)

<plays completeness="incomplete">
<comedies>
    <play year="1602">All's Well That Ends Well</play>
    <play year="1595">Midsummer Night's Dream</play>
</comedies>
<histories>
    <play year="1598">Henry V</play>
    <play year="1592">Richard III</play>
</histories>
<tragedies>
    <play year="1605">Macbeth</play>
    <play year="1593">Titus Andronicus</play>
</tragedies>
</plays>


In [16]:
doc.attrib

{'completeness': 'incomplete'}

In [17]:
doc.get("completeness")

'incomplete'

In [18]:
doc.set("new-attribute", "Python rules!")
ET.dump(doc)

<plays completeness="incomplete" new-attribute="Python rules!">
<comedies>
    <play year="1602">All's Well That Ends Well</play>
    <play year="1595">Midsummer Night's Dream</play>
</comedies>
<histories>
    <play year="1598">Henry V</play>
    <play year="1592">Richard III</play>
</histories>
<tragedies>
    <play year="1605">Macbeth</play>
    <play year="1593">Titus Andronicus</play>
</tragedies>
</plays>


In [19]:
doc[0]

<Element 'comedies' at 0x7f0dc84df060>

In [20]:
doc[-1]

<Element 'tragedies' at 0x7f0dc84df9c0>

In [21]:
for genre in doc:
    print(genre.tag)

comedies
histories
tragedies


In [23]:
for genre in doc:
    print(genre.tag)
    for item in genre:
        print(item.text)

comedies
All's Well That Ends Well
Midsummer Night's Dream
histories
Henry V
Richard III
tragedies
Macbeth
Titus Andronicus


In [25]:
for genre in doc:
    print(genre.tag)
    for item in genre:
        print("\t" + item.text)

comedies
	All's Well That Ends Well
	Midsummer Night's Dream
histories
	Henry V
	Richard III
tragedies
	Macbeth
	Titus Andronicus


In [27]:
for genre in doc:
    print(genre.tag)
    for item in genre:
        print("\t'" + item.text + "' from " + item.get("year"))

comedies
	'All's Well That Ends Well' from 1602
	'Midsummer Night's Dream' from 1595
histories
	'Henry V' from 1598
	'Richard III' from 1592
tragedies
	'Macbeth' from 1605
	'Titus Andronicus' from 1593


In [28]:
data = []
for genre in doc:
    for item in genre:
        data.append((item.text, item.get("year"), genre.tag))
        
len(data)

6

In [29]:
histories = doc[1]
ET.dump(histories)

<histories>
    <play year="1598">Henry V</play>
    <play year="1592">Richard III</play>
</histories>


In [30]:
new_play = doc.makeelement("play", {})
new_play.tag

'play'

In [31]:
new_play.set("year", "1596")
new_play.text = "King John"

In [33]:
histories.append(new_play)
ET.dump(histories)

<histories>
    <play year="1598">Henry V</play>
    <play year="1592">Richard III</play>
<play year="1596">King John</play></histories>


In [34]:
def list_xml(element):
    print("Element: " + element.tag)
    print("Attributes: " + str(element.attrib))
    print("Children: ")
    for child in element:
        list_xml(child)
        
list_xml(histories)

Element: histories
Attributes: {}
Children: 
Element: play
Attributes: {'year': '1598'}
Children: 
Element: play
Attributes: {'year': '1592'}
Children: 
Element: play
Attributes: {'year': '1596'}
Children: 
