# XML

In [3]:
import xml.etree.ElementTree as et

In [5]:
from lxml import etree

In [31]:
bookStore = et.Element("bookstore")

book1 = et.Element("book", category = "cooking")
bookStore.insert(0, book1)

title1 = et.Element("title")
title1.attrib["lang"] = "en"
title1.text = "Everyday Italian"
book1.append(title1)

et.SubElement(book1, "author").text = "Giada De Laurentiis"
et.SubElement(book1, "year").text = "2005"
et.SubElement(book1, "pricer").text = "30.00"

In [33]:
book2 = et.Element("book", {"category" : "children"})
bookStore.append(book2)

title2 = et.Element("title")
title2.attrib["lang"] = title1.get("lang")
title2.text = "Harry Potter"
book2.append(title2)

et.SubElement(book2, "author").text = "Giada De Laurentiis"
et.SubElement(book2, "year").text = "2005"
et.SubElement(book2, "pricer").text = "30.00"

In [34]:
et.dump(bookStore)

<bookstore><book category="cooking"><title lang="en">Everyday Italian</title><author>Giada De Laurentiis</author><year>2005</year><pricer>30.00</pricer></book><book category="children"><title lang="en">Harry Potter</title><author>Giada De Laurentiis</author><year>2005</year><pricer>30.00</pricer></book></bookstore>


Parsing

In [35]:
root = et.fromstring(et.tostring(bookStore))

In [36]:
type(book)

NoneType

In [37]:
book = root.find("book")
print(book.tag, book.get("category"))

book cooking


In [39]:
bookList = root.find("book")
for book in bookList:
    print(book.tag, book.get("category"))

title None
author None
year None
pricer None


In [40]:
title = root.find(".//title")
print(type(title), title.text)

<class 'xml.etree.ElementTree.Element'> Everyday Italian


In [41]:
titleList = root.findall(".//title")
print([title.text for title in titleList])

['Everyday Italian', 'Harry Potter']


In [42]:
title = root.findtext(".//title")
print(type(title), title)

<class 'str'> Everyday Italian


In [43]:
book = root.find(".//book[@category = 'children']")
print(book, book.tag)

<Element 'book' at 0x000001EB1C696DB8> book


In [45]:
from xml.etree.ElementTree import ElementTree

tree = ElementTree(root)
tree.write("book_xml.xml", encoding = "utf-8", xml_declaration = "utf-8")

In [47]:
from xml.etree.ElementTree import parse

tree = parse("book_xml.xml")
root = tree.getroot()

for node in root.iter():
    print(node.tag, node.text)

bookstore None
book None
title Everyday Italian
author Giada De Laurentiis
year 2005
pricer 30.00
book None
title Harry Potter
author Giada De Laurentiis
year 2005
pricer 30.00


In [49]:
tree = ElementTree(file = "book_xml.xml")
root = tree.getroot()

for node in root.iter():
    print(node.tag, node.text)

bookstore None
book None
title Everyday Italian
author Giada De Laurentiis
year 2005
pricer 30.00
book None
title Harry Potter
author Giada De Laurentiis
year 2005
pricer 30.00


# lxml 

In [51]:
bookStore = etree.Element("bookstore")

book1 = etree.SubElement(bookStore, "book")
book2 = etree.SubElement(bookStore, "book", attrib = {"category":"children"})

book1.attrib["category"] = "cooking"

title1 = etree.Element("title", lang = "en")
title1.text = "Everyday Italian"
book1.append(title1)

etree.SubElement(book1, "author").text = "Giada De Laurentiis"
etree.SubElement(book1, "year").text = "2005"
etree.SubElement(book1, "pricer").text = "30.00"

title2 = etree.Element("title")
title2.set("lang", title1.get("lang"))
title2.text = "Harry Potter"
book2.append(title2)

etree.SubElement(book2, "author").text = "Giada De Laurentiis"
etree.SubElement(book2, "year").text = "2005"
book2.insert(3, etree.Element("price"))

print(len(book2))
book2[-1].text = "30.00"

xmlBytes = etree.tostring(bookStore, encoding = "utf-8", pretty_print = True, xml_declaration = True)
xmlStr = etree.tounicode(bookStore, pretty_print = True)
print(type(xmlBytes), type(xmlStr))
etree.dump(bookStore)

4
<class 'bytes'> <class 'str'>
<bookstore>
  <book category="cooking">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <pricer>30.00</pricer>
  </book>
  <book category="children">
    <title lang="en">Harry Potter</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <price>30.00</price>
  </book>
</bookstore>


In [53]:
xml = etree.XML(etree.tostring(bookStore))
xmlTree = etree.ElementTree(xml)
xmlRoot = xmlTree.getroot()

print(xmlTree.docinfo.xml_version)
print(xmlTree.docinfo.encoding)
print(xmlTree.docinfo.doctype)
print(xmlTree.docinfo.root_name)

print(len(xmlRoot))
for childNode in xmlRoot:
    print(childNode.tag, childNode.attrib)

1.0
UTF-8

bookstore
2
book {'category': 'cooking'}
book {'category': 'children'}


In [55]:
title = xmlRoot.find(".//title")
print(type(title), title.text)

<class 'lxml.etree._Element'> Everyday Italian


In [56]:
titleList = xmlRoot.findall(".//title")
print([title.text for title in titleList])

['Everyday Italian', 'Harry Potter']


In [57]:
title = xmlRoot.findtext(".//title")
print(type(title), title)

<class 'str'> Everyday Italian


In [58]:
book = xmlRoot.find(".//book[@category = 'children']")
print(book, book.tag)

<Element book at 0x1eb1c65a848> book


In [118]:
import urllib
import xml

In [119]:
url = "http://openapi.airkorea.or.kr/openapi/services/rest/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty"

params = {
    "serviceKey":"MOiHJUIkHVrIoLVw9SpEA%2BxSOFp%2FqvpS9SiV3wRfNTJ9Rgv7azGbY7miXPVGxJDuswJDDxPpu1Vn3wtD7VsSmw%3D%3D",
    "numOfRows": 10,
    "pageNo": 1,
    "sidoName": "서울",
    "dataTerm": "DAILY",
    "ver": 1.3
         }

In [120]:
params["serviceKey"] = urllib.parse.unquote(params["serviceKey"])
print(params);type(params)

{'serviceKey': 'MOiHJUIkHVrIoLVw9SpEA+xSOFp/qvpS9SiV3wRfNTJ9Rgv7azGbY7miXPVGxJDuswJDDxPpu1Vn3wtD7VsSmw==', 'numOfRows': 10, 'pageNo': 1, 'sidoName': '서울', 'dataTerm': 'DAILY', 'ver': 1.3}


dict

In [121]:
params = urllib.parse.urlencode(params)
print(params);type(params)

serviceKey=MOiHJUIkHVrIoLVw9SpEA%2BxSOFp%2FqvpS9SiV3wRfNTJ9Rgv7azGbY7miXPVGxJDuswJDDxPpu1Vn3wtD7VsSmw%3D%3D&numOfRows=10&pageNo=1&sidoName=%EC%84%9C%EC%9A%B8&dataTerm=DAILY&ver=1.3


str

In [122]:
params = params.encode("utf-8")
print(params);type(params)

b'serviceKey=MOiHJUIkHVrIoLVw9SpEA%2BxSOFp%2FqvpS9SiV3wRfNTJ9Rgv7azGbY7miXPVGxJDuswJDDxPpu1Vn3wtD7VsSmw%3D%3D&numOfRows=10&pageNo=1&sidoName=%EC%84%9C%EC%9A%B8&dataTerm=DAILY&ver=1.3'


bytes

In [123]:
req = urllib.request.Request(url, data=params)
res = urllib.request.urlopen(req)

In [124]:
resByte = res.read()

In [132]:
xmlObj = etree.fromstring(resByte)
type(xmlObj)

lxml.etree._Element

In [134]:
xmlRoot = etree.ElementTree(xmlObj).getroot()
type(xmlRoot)


<Element response at 0x1eb1c6ae9c8>


In [127]:
etree.dump(xmlRoot)

<response>
	<header>
		<resultCode>00</resultCode>
		<resultMsg>NORMAL SERVICE.</resultMsg>
	</header>
	<body>
		<items>
			
				<item>
					<stationName>중구</stationName>
					
                        <mangName>도시대기</mangName>
                    
					<dataTime>2019-07-10 16:00</dataTime>
					<so2Value>0.002</so2Value>
					<coValue>0.3</coValue>
					<o3Value>0.029</o3Value>
					<no2Value>0.021</no2Value>
					<pm10Value>11</pm10Value>
					
                        <pm10Value24>10</pm10Value24>
                    
					
				        <pm25Value>6</pm25Value>
				    
				    
                        <pm25Value24>6</pm25Value24>
                    
					<khaiValue>48</khaiValue>
					<khaiGrade>1</khaiGrade>
					<so2Grade>1</so2Grade>
					<coGrade>1</coGrade>
					<o3Grade>1</o3Grade>
					<no2Grade>1</no2Grade>
					<pm10Grade>1</pm10Grade>
					
				        <pm25Grade>1</pm25Grade>
				    
				    
                        <pm10Grade1h>1</pm10Grade1h>
                        <pm

In [128]:
for node in xmlRoot.iter():
    print(node.tag, node.text)

response 
	
header 
		
resultCode 00
resultMsg NORMAL SERVICE.
body 
		
items 
			
				
item 
					
stationName 중구
mangName 도시대기
dataTime 2019-07-10 16:00
so2Value 0.002
coValue 0.3
o3Value 0.029
no2Value 0.021
pm10Value 11
pm10Value24 10
pm25Value 6
pm25Value24 6
khaiValue 48
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationName 한강대로
mangName 도로변대기
dataTime 2019-07-10 16:00
so2Value 0.003
coValue 0.3
o3Value 0.018
no2Value 0.026
pm10Value 17
pm10Value24 21
pm25Value 5
pm25Value24 7
khaiValue 43
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationName 종로구
mangName 도시대기
dataTime 2019-07-10 16:00
so2Value 0.003
coValue 0.4
o3Value 0.027
no2Value 0.012
pm10Value 12
pm10Value24 12
pm25Value 6
pm25Value24 6
khaiValue 45
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationN

In [129]:
itemList = xmlRoot.findall(".//item")

In [130]:
print(len(itemList))
for item in itemList:
    for i in range(len(item)):
        print(item[i].tag, item[i].text)
        

10
stationName 중구
mangName 도시대기
dataTime 2019-07-10 16:00
so2Value 0.002
coValue 0.3
o3Value 0.029
no2Value 0.021
pm10Value 11
pm10Value24 10
pm25Value 6
pm25Value24 6
khaiValue 48
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
stationName 한강대로
mangName 도로변대기
dataTime 2019-07-10 16:00
so2Value 0.003
coValue 0.3
o3Value 0.018
no2Value 0.026
pm10Value 17
pm10Value24 21
pm25Value 5
pm25Value24 7
khaiValue 43
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
stationName 종로구
mangName 도시대기
dataTime 2019-07-10 16:00
so2Value 0.003
coValue 0.4
o3Value 0.027
no2Value 0.012
pm10Value 12
pm10Value24 12
pm25Value 6
pm25Value24 6
khaiValue 45
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
stationName 청계천로
mangName 도로변대기
dataTime 2019-07-10 16:00
so2Value 0.003
coValue 0.6
o3Value 0.026
no2Value 0.023
pm10Value 14
pm10Value24 14

In [131]:
pm25List = xmlRoot.findall(".//item/pm25Value")
for item in pm25List:
    print(item.tag, item.text)

pm25Value 6
pm25Value 5
pm25Value 6
pm25Value 7
pm25Value 5
pm25Value 6
pm25Value -
pm25Value 5
pm25Value 3
pm25Value 6


In [136]:
stationName = xmlRoot.findall(".//item/stationName")
for item in stationName:
    print(item.tag, item.text)

stationName 중구
stationName 한강대로
stationName 종로구
stationName 청계천로
stationName 종로
stationName 용산구
stationName 광진구
stationName 성동구
stationName 강변북로
stationName 중랑구
