# XML JSON

In [1]:
import requests
import lxml.etree

In [2]:
input = '''
<students>
    <student x="1">
        <id>001</id>
        <name>Kim</name>
    </student>
    <student x="2">
        <id>002</id>
        <name>Lee</name>
    </student>
</students>
'''

In [9]:
%%writefile src/ds_open_hello.xml
<students>
    <student x="1">
        <id>001</id>
        <name>Kim</name>
    </student>
    <student x="2">
        <id>002</id>
        <name>Lee</name>
    </student>
</students>

Writing src/ds_open_hello.xml


In [5]:
root = lxml.etree.fromstring(input)
root

<Element students at 0x7fe9b9f0bb80>

In [7]:
from io import StringIO
tree = lxml.etree.parse(StringIO(input))
root = tree.getroot()
root

<Element students at 0x7fe9b9eb8a80>

In [11]:
import os
tree = lxml.etree.parse(os.path.join('src', 'ds_open_hello.xml'))
root = tree.getroot()

In [16]:
for e in root:
    print(e.tag) # root태그의 바로 아래 태그

student
student


In [18]:
print(root.getchildren())
for ee in root.getchildren():
    for e in ee.getchildren():
        if not e.text:
            text = 'None'
        else:
            text = e.text
        print(e.tag + " => " + text)

[<Element student at 0x7fe9b9a27a00>, <Element student at 0x7fe9b9fadf00>]
id => 001
name => Kim
id => 002
name => Lee


In [27]:
root.getchildren()[1].attrib # 해당 블록의 attrib를 가져온다.

{'x': '2'}

In [28]:
(root.getchildren()[1]).getchildren()[0].text 

'002'

In [29]:
for node in root.getiterator():
    print('TAG: {} \tATTRIB: {} \t\t TEXT: {}'.format(node.tag, node.attrib, node.text))

TAG: students 	ATTRIB: {} 		 TEXT: 
    
TAG: student 	ATTRIB: {'x': '1'} 		 TEXT: 
        
TAG: id 	ATTRIB: {} 		 TEXT: 001
TAG: name 	ATTRIB: {} 		 TEXT: Kim
TAG: student 	ATTRIB: {'x': '2'} 		 TEXT: 
        
TAG: id 	ATTRIB: {} 		 TEXT: 002
TAG: name 	ATTRIB: {} 		 TEXT: Lee


## XML FIND

In [31]:
std = root.find('student')
for node in std:
    print(node.text)
print(std.attrib)
print(std.tag)

001
Kim
{'x': '1'}
student


In [32]:
stds = root.findall('student')
for node in stds:
    for item in node:
        print(item.text)

001
Kim
002
Lee


## XPath
- / : 루트 바로 아래 수준만 검색
- // : 계층의 어디에 있던지 상관하지 않고 일치하는 요소 검색
- @ : 속성을 검색

In [37]:
root.xpath('//@x') # 모든 위치의 x라는 속성을 가진 요소를 검색

['1', '2']

In [39]:
root.xpath('//*[@x="1"]/id/text()') # 모든 위치에, 속성 x가 1인 것들만 찾아서, 그들의 id의 text()를 검색한다 -> 시험에 나올 것 같다.

['001']

In [43]:
from lxml.cssselect import CSSSelector

In [46]:
sel = CSSSelector('student')
print(sel.css)

student


In [48]:
root = lxml.etree.fromstring(input)
nodes = sel(root)
nodes

[<Element student at 0x7fe9b9fe3e80>, <Element student at 0x7fe9b9feaa80>]

In [49]:
for e in nodes:
    print(e.get('x'))

1
2


In [51]:
# CSSSelector를 재설정해주어야 다른 쿼리를 사용할 수 있다.
sel = CSSSelector('id')
nodes = sel(root)
for e in nodes:
    print(e.text)

001
002


# JSON

In [52]:
input = '''[
    { "id" : "001", "x" : "2", "name" : "Chuck"},
    { "id" : "009", "x" : "7", "name" : "Brent"}
]'''

In [53]:
import json

info = json.loads(input)

In [54]:
for item in info:
    print('id: {} \tname: {}'.format(item['id'], item['name']))

id: 001 	name: Chuck
id: 009 	name: Brent


In [56]:
my = json.dumps(
    ['foo', {'bar': ('baz', None, 1.0, 2)}]
)

print(type(my), my)

<class 'str'> ["foo", {"bar": ["baz", null, 1.0, 2]}]


# URL Encoding

In [57]:
import urllib

print(urllib.parse.urlencode({'#q' : '한글'}))
print(urllib.parse.urlencode({'#q' : u'한글'.encode('utf-8')}))

%23q=%ED%95%9C%EA%B8%80
%23q=%ED%95%9C%EA%B8%80


# mongodb

In [6]:
import pymongo
Client = pymongo.MongoClient()

In [7]:
db = Client.myDB
print(Client.list_database_names())

['admin', 'config', 'local']


In [8]:
_id = 1
_name = 'js'
_age = 11
_country = 'ko'

db.kritiasTable.insert_one({
    'id': _id,
    'name': _name,
    'age': _age,
    'country': _country
})

<pymongo.results.InsertOneResult at 0x7f850c37cec0>

In [10]:
print(Client.list_database_names())
print(db.list_collection_names())

['admin', 'config', 'local', 'myDB']
['kritiasTable']


In [11]:
results = db.kritiasTable.find()
for r in results:
    print(r['name'])

js
