In [1]:
## Chapter 13: Web services and XML ##

# Example 1: Parsing XML (with a single tag)

import xml.etree.ElementTree as ET

data = '''
<person>
  <name>Chuck</name>
  <phone type="intl">
    +1 734 303 4456
  </phone>
  <email hide="yes" />
</person>'''

# This object stores the tree retrieved from the XML string
tree = ET.fromstring(data)

# Find the tag "name" and retrieve its text
print('Name:', tree.find('name').text)

# Find the tag "email" and retrieve its attribute "hide"
print('Attr:', tree.find('email').get('hide'))

Name: Chuck
Attr: yes


In [2]:
# Example 2: Parsing XML (with multiple tags)

import xml.etree.ElementTree as ET

input = '''
<stuff>
  <users>
    <user x="2">
      <id>001</id>
      <name>Chuck</name>
    </user>
    <user x="7">
      <id>009</id>
      <name>Brent</name>
    </user>
  </users>
</stuff>'''

stuff = ET.fromstring(input)

# List with all the "user" tags below "users", each with its own child information
lst = stuff.findall('users/user')
print('User count:', len(lst))

# Iterate over each of the tags (users) and retrieve their info
for item in lst:
    print('Name', item.find('name').text)
    print('Id', item.find('id').text)
    print('Attribute', item.get('x'))

User count: 2
Name Chuck
Id 001
Attribute 2
Name Brent
Id 009
Attribute 7


In [19]:
# Assignment: This program reads the XML data from a URL, parses it and extracts the comment 
# counts from it , computing the sum of all of them

import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET

# Define URL
url = 'http://python-data.dr-chuck.net/comments_217218.xml'

# Establish socket connection, open URL and get its content as a string
data = urllib.request.urlopen(url).read()

# If the .read() method is not added, the object returned will be a file-like object
# (i.e 'http.client.HTTPResponse')
# If it is added, the object returned will be the response body of that URL as a STRING
# From https://stackoverflow.com/questions/35863595/what-does-read-in-urlopenhttp-read-do-urllib

# data = urllib.request.urlopen(url)
# print(type(data))

# PRINT STATEMENT FOR TAG IDENTIFICATION 
# print(data)

# Store the tree retrieved from the XML string
tree = ET.fromstring(data)

# List with all the "comment" tags, each with an associated name and number of counts
structure = tree.findall('comments/comment')

total_comments = 0

# Iterate over each of the tags (users) and retrieve their info
for item in structure:
    total_comments += int(item.find('count').text)

print('Total comments:', total_comments)

# Another option, iterating directly over the counts (child node "count")
structure_2 = tree.findall('comments/comment/count')

total_comments_2 = 0

for item in structure_2:
    # Note the .find method is no longer needed to identify the child node
    total_comments_2 += int(item.text)
    
print('Total comments:', total_comments_2)

Total comments: 2566
Total comments: 2566
