### XML 파싱

In [1]:
import xml.etree.ElementTree as et
tree = et.ElementTree(file='menu.xml')
root = tree.getroot()

In [2]:
print(root.tag)

menu


In [3]:
for child in root:
    print('tag:', child.tag, 'attributes:', child.attrib)
    for grandchild in child:
        print('\ttag:', grandchild.tag, 'attributes:', grandchild.attrib)

tag: breakfast attributes: {'hours': '7-11'}
	tag: item attributes: {'price': '$6.00'}
	tag: item attributes: {'price': '$4.00'}
tag: lunch attributes: {'hours': '11-3'}
	tag: item attributes: {'price': '$5.00'}
tag: dinner attributes: {'hours': '3-10'}
	tag: item attributes: {'price': '8.00'}


In [4]:
len(root)

3

In [5]:
len(root[0])

2

In [6]:
type(root[0])

xml.etree.ElementTree.Element

In [7]:
print(root[0])
print(root[0].get("hours"))
print(root[0].keys())
print(root[0].items())

<Element 'breakfast' at 0x107ff5c60>
7-11
['hours']
[('hours', '7-11')]


In [8]:
lunch = root.find("lunch")
print(lunch)
print(lunch.get("hours"))
print(lunch.keys())
print(lunch.items())

<Element 'lunch' at 0x107ff5d50>
11-3
['hours']
[('hours', '11-3')]


In [9]:
print(root[0].findall('item'))

[<Element 'item' at 0x107ff5c10>, <Element 'item' at 0x107ff5d00>]


In [10]:
for item in root[0].findall('item'):
    print(item.get('price'))
    print(item.text)

$6.00
breakfast burritos
$4.00
pancakes


### JSON

In [11]:
import json

j1 = {"name":"홍길동", "birth":"0525", "age":30}
j1

{'name': '홍길동', 'birth': '0525', 'age': 30}

In [12]:
json.dumps(j1)

'{"name": "\\ud64d\\uae38\\ub3d9", "birth": "0525", "age": 30}'

In [13]:
print(json.dumps(j1, indent=2))

{
  "name": "\ud64d\uae38\ub3d9",
  "birth": "0525",
  "age": 30
}


In [14]:
json.dumps([1,2,3])

'[1, 2, 3]'

In [15]:
json.dumps((4,5,6))

'[4, 5, 6]'

In [16]:
j1 = {"name":"홍길동", "birth":"0525", "age":30}
d1 = json.dumps(j1)
json.loads(d1)

{'name': '홍길동', 'birth': '0525', 'age': 30}

In [17]:
with open('./myinfo.json') as f:
    data = json.load(f)

In [18]:
print(type(data))

<class 'dict'>


In [19]:
print(data)

{'name': '홍길동', 'birth': '0525', 'age': 30}


### Web Scraping

In [20]:
from urllib.request import urlopen

In [26]:
html = urlopen("http://pythonscraping.com/pages/page1.html")

In [22]:
print(html.read())

b'<html>\n<head>\n<title>A Useful Page</title>\n</head>\n<body>\n<h1>An Interesting Title</h1>\n<div>\nLorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n</div>\n</body>\n</html>\n'


In [23]:
from bs4 import BeautifulSoup

In [29]:
html = urlopen("http://pythonscraping.com/pages/page1.html")
bsObj = BeautifulSoup(html.read(), "html.parser")

In [30]:
print(bsObj.h1)

<h1>An Interesting Title</h1>


In [31]:
print(bsObj.html.prettify())

<html>
 <head>
  <title>
   A Useful Page
  </title>
 </head>
 <body>
  <h1>
   An Interesting Title
  </h1>
  <div>
   Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  </div>
 </body>
</html>

