In [1]:
import xml.etree.ElementTree as ET

In [2]:
tree = ET.parse('data.xml')

In [3]:
print(type(tree))

<class 'xml.etree.ElementTree.ElementTree'>


In [4]:
root = tree.getroot()
root

<Element 'data' at 0x7fa534bb5818>

In [6]:
print(root.tag)
print(root.attrib)
print(len(root))

data
{}
3


In [9]:
# First child of root
country1 = root[0]

# First child of the child
rank = country1[0]

# What is the rank of the grandchild
print(rank.tag)

# What is the text inside this grandchild
print(rank.text)

# What are the attributes of the last element?
print(country1[4].attrib)

rank
1
{'name': 'Switzerland', 'direction': 'W'}


In [10]:
# Some info about third child of the root
country3 = root[2]
gdppc = country3[2]
gdppc.text

'13600'

#### Iterating through the file

In [11]:
# find all the child with tag country

for country in root.findall('country'):
    # rank is child of the country
    rank = country.find('rank').text
    
    #name is attribute of the country
    name = country.get('name')
    print(name, rank)

Liechtenstein 1
Singapore 4
Panama 68


In [15]:
for neighbor in root.iter('neighbor'):
    print(neighbor.attrib)

{'name': 'Austria', 'direction': 'E'}
{'name': 'Switzerland', 'direction': 'W'}
{'name': 'Malaysia', 'direction': 'N'}
{'name': 'Costa Rica', 'direction': 'W'}
{'name': 'Colombia', 'direction': 'E'}


#### findall()

In [17]:
# Top-level elements
root.findall('.')

# All 'neighbor' grand-children of 'country' children of the top-level elements
root.findall("./country/neighbor")

# elements with name='Singapore' that have a 'year' child
root.findall(".//year/..[@name='Singapore']")

# 'year' elements that are children of elements with name='Singapore'
root.findall(".//*[@name='Singapore']/year")

# All 'neighbor' elements that are the second child of their parent
root.findall(".//neighbor[2]")

[<Element 'data' at 0x7fa534bb5818>]


In [24]:
for test in root.findall(".//year/..[@name='Singapore']"):
    print(test.tag)

country


### Create Pandas DataFrame

In [28]:
import xml.etree.ElementTree as ET
import pandas as pd

tree = ET.parse('data.xml')  # Load from file
root = tree.getroot()

data = {'name': [],
        'rank': [],
        'year': [],
        'gdppc': []}

for country in root.findall('country'):

    data['name'].append(country.get('name'))
    data['rank'].append(country.find('rank').text)
    data['year'].append(country.find('year').text)
    data['gdppc'].append(country.find('gdppc').text)
    
   
df = pd.DataFrame(data)
df    

Unnamed: 0,name,rank,year,gdppc
0,Liechtenstein,1,2008,141100
1,Singapore,4,2011,59900
2,Panama,68,2011,13600


Note: Because all children of the root are countries therefore for country in root: equals for country in root.findall('country')