# Regular Expression

In [1]:
import re

In [2]:
re.search('a[.]{3,}b', 'a......b')

<_sre.SRE_Match object; span=(0, 8), match='a......b'>

In [3]:
pattern = '([0-9]{3}-[0-9]{4})-([0-9]{4})'

In [4]:
re.search(pattern, '010-1111-2222')

<_sre.SRE_Match object; span=(0, 13), match='010-1111-2222'>

In [5]:
re.findall(pattern, '010-1212-3434')

[('010-1212', '3434')]

---

# JSON(JavaScript Object Notation)

In [6]:
import json

In [7]:
%%writefile test.json
{"id": "root", "pw": "1234"}

Overwriting test.json


In [8]:
with open('test.json') as file:
    info = json.load(file)

In [9]:
info

{'id': 'root', 'pw': '1234'}

In [10]:
info = '''
{
    "id": "root",
    "pw": "1234"
}
'''

In [11]:
info

'\n{\n    "id": "root",\n    "pw": "1234"\n}\n'

In [12]:
json.loads(info)

{'id': 'root', 'pw': '1234'}

In [13]:
obj = {"data": [1, 2, 3, 4, '5', True, 'asb']}

In [14]:
with open('test.json', 'w') as file:
    res = json.dump(obj, file)

In [15]:
json.dumps(obj)

'{"data": [1, 2, 3, 4, "5", true, "asb"]}'

In [16]:
with open('test.json') as file:
    info = json.load(file)

In [17]:
info

{'data': [1, 2, 3, 4, '5', True, 'asb']}

In [18]:
uni = ('ㅇ', 'ㅏ', '안녕')

In [19]:
uni

('ㅇ', 'ㅏ', '안녕')

In [20]:
uni_dump = json.dumps(uni)

In [21]:
uni_dump

'["\\u3147", "\\u314f", "\\uc548\\ub155"]'

In [22]:
json.loads(uni_dump)

['ㅇ', 'ㅏ', '안녕']

http://ip.jsontest.com/

In [23]:
with open('Untitled3.ipynb') as file:
    ipynb = json.load(file)

In [24]:
ipynb

{'cells': [{'cell_type': 'code',
   'execution_count': 1,
   'metadata': {},
   'outputs': [{'data': {'text/plain': ["'Hello'"]},
     'execution_count': 1,
     'metadata': {},
     'output_type': 'execute_result'}],
   'source': ["'Hello'"]}],
 'metadata': {'kernelspec': {'display_name': 'Python 3',
   'language': 'python',
   'name': 'python3'},
  'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
   'file_extension': '.py',
   'mimetype': 'text/x-python',
   'name': 'python',
   'nbconvert_exporter': 'python',
   'pygments_lexer': 'ipython3',
   'version': '3.6.7'},
  'latex_envs': {'LaTeX_envs_menu_present': True,
   'autoclose': False,
   'autocomplete': True,
   'bibliofile': 'biblio.bib',
   'cite_by': 'apalike',
   'current_citInitial': 1,
   'eqLabelWithNumbers': True,
   'eqNumInitial': 1,
   'hotkeys': {'equation': 'Ctrl-E', 'itemize': 'Ctrl-I'},
   'labels_anchors': False,
   'latex_user_defs': False,
   'report_style_numbering': False,
   'user_envs_c

In [25]:
url = 'http://ip.jsontest.com/'

In [26]:
import urllib

In [27]:
obj = {"name": 'NAME', "age": 30}

In [28]:
reqObj = json.dumps(obj)

In [29]:
reqObj = reqObj.encode('utf-8')

---

In [30]:
req = urllib.request.Request(url, data=reqObj, headers={
                             'content-type': 'application/json'})
# resp = urllib.request.urlopen(req) ## 서버 터짐 ..

---

# 공공데이터포털 Open API 사용

In [31]:
url = "http://openapi.airkorea.or.kr/openapi/services/rest/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty"

In [32]:
url = "http://openapi.airkorea.or.kr/openapi/services/rest/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty"
params = {
    'ServiceKey': 'eZ7xBqg5C3tQDAwJM8kWRO6XWySXi9yEEPI449XNSbb%2BI9AFpA3wUMeeRVqiXhxpc2BOfeyMump%2FlF0pNYCpNA%3D%3D',
    "numOfRows": 10,
    "pageNo": 1,
    "sidoName": "서울",
    "dataTerm": "DAILY",
    "ver": 1.3,
    "_returnType": "json"
}

In [33]:
params

{'ServiceKey': 'eZ7xBqg5C3tQDAwJM8kWRO6XWySXi9yEEPI449XNSbb%2BI9AFpA3wUMeeRVqiXhxpc2BOfeyMump%2FlF0pNYCpNA%3D%3D',
 'numOfRows': 10,
 'pageNo': 1,
 'sidoName': '서울',
 'dataTerm': 'DAILY',
 'ver': 1.3,
 '_returnType': 'json'}

In [34]:
params['ServiceKey'] = urllib.parse.unquote(params['ServiceKey'])

params = urllib.parse.urlencode(params)
params = params.encode('utf-8')

req = urllib.request.Request(url, data=params)
resp = urllib.request.urlopen(req)

data = resp.read()
data = data.decode('utf-8')
data = json.loads(data)

In [35]:
data

{'list': [{'_returnType': 'json',
   'coGrade': '1',
   'coValue': '0.3',
   'dataTerm': '',
   'dataTime': '2019-07-10 17:00',
   'khaiGrade': '1',
   'khaiValue': '45',
   'mangName': '도시대기',
   'no2Grade': '1',
   'no2Value': '0.018',
   'numOfRows': '10',
   'o3Grade': '1',
   'o3Value': '0.027',
   'pageNo': '1',
   'pm10Grade': '1',
   'pm10Grade1h': '1',
   'pm10Value': '10',
   'pm10Value24': '10',
   'pm25Grade': '1',
   'pm25Grade1h': '1',
   'pm25Value': '6',
   'pm25Value24': '6',
   'resultCode': '',
   'resultMsg': '',
   'rnum': 0,
   'serviceKey': '',
   'sidoName': '',
   'so2Grade': '1',
   'so2Value': '0.002',
   'stationCode': '',
   'stationName': '중구',
   'totalCount': '',
   'ver': ''},
  {'_returnType': 'json',
   'coGrade': '1',
   'coValue': '0.4',
   'dataTerm': '',
   'dataTime': '2019-07-10 17:00',
   'khaiGrade': '1',
   'khaiValue': '48',
   'mangName': '도로변대기',
   'no2Grade': '1',
   'no2Value': '0.029',
   'numOfRows': '10',
   'o3Grade': '1',
   'o3Val

In [36]:
{item['stationName']: item['pm25Value'] for item in data['list']}

{'중구': '6',
 '한강대로': '6',
 '종로구': '5',
 '청계천로': '6',
 '종로': '4',
 '용산구': '4',
 '광진구': '9',
 '성동구': '8',
 '강변북로': '3',
 '중랑구': '4'}

# XML (eXtension Markup Language)

In [37]:
import xml

In [38]:
dir(xml)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'dom']

In [39]:
from xml.etree.ElementTree import ElementTree

In [40]:
%%writefile test.xml
<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

Overwriting test.xml


In [41]:
tree = ElementTree().parse('test.xml')

In [42]:
tree

<Element 'data' at 0x7f236dea39a8>

In [43]:
tags = tree.findall('country')

---

In [44]:
import xml.etree.ElementTree as et
from lxml import etree

In [45]:
bookStore = et.Element('bookstore')

In [46]:
book1 = et.Element('book', category='cooking')
bookStore.insert(0, book1)

In [47]:
et.dump(bookStore)

<bookstore><book category="cooking" /></bookstore>


In [48]:
title1 = et.Element('title')
title1.attrib['lang'] = 'en'
title1.text = 'Everyday Italian'

In [49]:
book1.append(title1)

In [50]:
et.dump(bookStore)

<bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>


In [51]:
book2 = et.Element('book', {'category': 'children'})
bookStore.append(book2)

In [52]:
et.SubElement(book2, 'author').text = 'Giada De Laurentils'

In [53]:
et.dump(bookStore)

<bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book><book category="children"><author>Giada De Laurentils</author></book></bookstore>


In [54]:
bookStore.find('book')

<Element 'book' at 0x7f236d803c28>

In [55]:
bookStore.findall('book')

[<Element 'book' at 0x7f236d803c28>, <Element 'book' at 0x7f236cdfd548>]

In [56]:
bookStore.findall('.//title')

[<Element 'title' at 0x7f236cdf0bd8>]

In [57]:
et.tostring(bookStore)

b'<bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book><book category="children"><author>Giada De Laurentils</author></book></bookstore>'

In [58]:
bookStore = etree.Element('bookStore')

In [59]:
book1 = etree.SubElement(bookStore, 'book')
book2 = etree.SubElement(bookStore, 'book', attrib={'category': 'children'})

In [60]:
book1.attrib['category'] = 'cooking'

In [61]:
title1 = etree.Element('title', lang='en')
title1.text = 'Everyday Italian'
book1.append(title1)

In [62]:
etree.SubElement(book1, 'author').text = 'Giada De Laurentils'
etree.SubElement(book1, 'year').text = '2005'
etree.SubElement(book1, 'price').text = '30.00'

In [63]:
title2 = etree.Element('title')
title2.set('lang', title1.get('lang'))
title2.text = 'Harry Potter'
book2.append(title2)

In [64]:
etree.SubElement(book2, 'author').text = 'Giada De Laurentils'
etree.SubElement(book2, 'year').text = '2005'
book2.insert(3, etree.Element('price'))

In [65]:
xmlBytes = etree.tostring(bookStore, encoding='utf8', pretty_print=True, xml_declaration=True)
xmlStr = etree.tounicode(bookStore, pretty_print=True)

In [66]:
etree.dump(bookStore)

<bookStore>
  <book category="cooking">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentils</author>
    <year>2005</year>
    <price>30.00</price>
  </book>
  <book category="children">
    <title lang="en">Harry Potter</title>
    <author>Giada De Laurentils</author>
    <year>2005</year>
    <price/>
  </book>
</bookStore>


In [67]:
xml = etree.XML(etree.tostring(bookStore))
xmlTree = etree.ElementTree(xml)
xmlRoot = xmlTree.getroot()

In [68]:
xmlTree.docinfo.xml_version

'1.0'

In [69]:
xmlTree.docinfo.encoding

'UTF-8'

In [70]:
for childNode in xmlRoot:
    print(childNode.tag, childNode.attrib)

book {'category': 'cooking'}
book {'category': 'children'}


In [71]:
len(xmlRoot)

2

In [72]:
xmlRoot.find('book')

<Element book at 0x7f236dea51c8>

In [73]:
xmlRoot.findall('book')

[<Element book at 0x7f236dea51c8>, <Element book at 0x7f236ce12fc8>]

In [74]:
xmlRoot.find('.//title')

<Element title at 0x7f236ce1ea08>

In [75]:
xmlRoot.findall('.//title')

[<Element title at 0x7f236ce1ea08>, <Element title at 0x7f236ce1ea48>]

In [76]:
xmlRoot.find(".//book[@category='children']")

<Element book at 0x7f236ce12fc8>

In [77]:
url = "http://openapi.airkorea.or.kr/openapi/services/rest/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty"
params = {
    'ServiceKey': 'eZ7xBqg5C3tQDAwJM8kWRO6XWySXi9yEEPI449XNSbb%2BI9AFpA3wUMeeRVqiXhxpc2BOfeyMump%2FlF0pNYCpNA%3D%3D',
    "numOfRows": 10,
    "pageNo": 1,
    "sidoName": "서울",
    "dataTerm": "DAILY",
    "ver": 1.3
}

In [78]:
params['ServiceKey'] = urllib.parse.unquote(params['ServiceKey'])

params = urllib.parse.urlencode(params)
params = params.encode('utf-8')

req = urllib.request.Request(url, data=params)
resp = urllib.request.urlopen(req)

data = resp.read()

In [79]:
xmlObj = etree.fromstring(data)
xmlRoot = etree.ElementTree(xmlObj).getroot()

In [80]:
etree.dump(xmlRoot)

<response>
	<header>
		<resultCode>00</resultCode>
		<resultMsg>NORMAL SERVICE.</resultMsg>
	</header>
	<body>
		<items>
			
				<item>
					<stationName>중구</stationName>
					
                        <mangName>도시대기</mangName>
                    
					<dataTime>2019-07-10 17:00</dataTime>
					<so2Value>0.002</so2Value>
					<coValue>0.3</coValue>
					<o3Value>0.027</o3Value>
					<no2Value>0.018</no2Value>
					<pm10Value>10</pm10Value>
					
                        <pm10Value24>10</pm10Value24>
                    
					
				        <pm25Value>6</pm25Value>
				    
				    
                        <pm25Value24>6</pm25Value24>
                    
					<khaiValue>45</khaiValue>
					<khaiGrade>1</khaiGrade>
					<so2Grade>1</so2Grade>
					<coGrade>1</coGrade>
					<o3Grade>1</o3Grade>
					<no2Grade>1</no2Grade>
					<pm10Grade>1</pm10Grade>
					
				        <pm25Grade>1</pm25Grade>
				    
				    
                        <pm10Grade1h>1</pm10Grade1h>
                        <pm

In [81]:
for node in xmlRoot.iter():
    print(node.tag, node.text)

response 
	
header 
		
resultCode 00
resultMsg NORMAL SERVICE.
body 
		
items 
			
				
item 
					
stationName 중구
mangName 도시대기
dataTime 2019-07-10 17:00
so2Value 0.002
coValue 0.3
o3Value 0.027
no2Value 0.018
pm10Value 10
pm10Value24 10
pm25Value 6
pm25Value24 6
khaiValue 45
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationName 한강대로
mangName 도로변대기
dataTime 2019-07-10 17:00
so2Value 0.004
coValue 0.4
o3Value 0.015
no2Value 0.029
pm10Value 21
pm10Value24 21
pm25Value 6
pm25Value24 7
khaiValue 48
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationName 종로구
mangName 도시대기
dataTime 2019-07-10 17:00
so2Value 0.003
coValue 0.3
o3Value 0.025
no2Value 0.012
pm10Value 13
pm10Value24 12
pm25Value 5
pm25Value24 6
khaiValue 42
khaiGrade 1
so2Grade 1
coGrade 1
o3Grade 1
no2Grade 1
pm10Grade 1
pm25Grade 1
pm10Grade1h 1
pm25Grade1h 1
item 
					
stationN