# 動態網頁爬蟲 - API Request

* 了解 API Request 用於動態網頁爬蟲的原理
* 能夠使用 API Request 撰寫動態網頁爬蟲






## 作業目標


* 根據範例使用 API 取出 高雄市小港區 2019 年 空氣品質 資料，並用一個適合的資料結構儲存
* Hint: 將所有資料存在一個變數當中


In [6]:
import requests
url = 'https://taqm.epa.gov.tw/taqm/tw/MonthlyAverage.aspx'
str = """ScriptManager1: ctl05$UpdatePanel1|ctl05$btnQuery 
__VIEWSTATE: TFb5vc7pAXqWehzvJXLFe1Ylvwd8xYv0dWo7ElFig0nE/ocOa8YU381qYbUfKwbBhXjufy8xNB1FViCQ/qT87yk1tQJZBcl19R7rOKrfoWj88tKzcqKu7sjEMEbrx0xRwn62Y0uUV9uV4vZ10UI8+x7VWc9tFutaHNY3Fr5sad7rMcVAl+nxzg6Ou4bRtd4kTGpbqyOxA54Z1pR9DshjwXZb6a6W8nXnsWpZERzTvWjjdgTWcj909L7RatIKHLLVScQKvXTocn6k7wYjj2Yhvq2evRB9HTZbYHR2tgi0t56nievRYgj/6DPFdnvagkI4zBYSxe7mjxOgVQzxaqYgyFtvcf/wNvn0gp9K/BrKw+QjIBGhW6OeT/Xmm7W3CcyJJaQgf6WfBTsjCLkAty2QhHJIbUKdlqhbsBj4LdVoxVtSmDfF1Hd6oM+AhevgUCTHwAATdTO1DX3mh1ZxKABEWwCHxlCrnCmFjCvSxY/kfCjusAMUNYK27/HfXzd/aTVIVtE73rdYVcn8A+HSpnS8ou9ZG6J+7fZ/4dDclvoSZSLlOrR7RM867YrCYrMdtvhGWuC1SzNN9q9/nQEkH7wz5/nBxZARHdPQF90fVWgF8ZEdG8N6VReGpsiilz6dzBsJqa8iDcBqLsRtxtQjqF8BExcu7rnSit+anOIaA3ge++3/HBYTFWtE5dcQw3Uf8jbjb9FpUb1BVqPH4EetE+UkgDt6JCYuo45/R1q1m7rbRa8h++K8Rjr2HcEeOOZRNN0FchENNBaSntE1oacUsH1DyfCdiPclFSDwgbnzdB/QVaohLr77nXQd2XeNFFL9LNDrK1IUZq5aMpm0uNmXRMe6eDM5sf7sVszznGnXN0xI3QDK57q+ME5kz0S8Jk8iZBOQex8RQeqkN2kMCjXhzZreRCwYaPNYwdndxV6CEQamX2NfTjqESlNDA6r7OLXfDcZ7Cigw84A9DOVejW6JDQByET/0k7pms5gVV84RWhbrnSFquG9uMDJUBc2PLSUUMY6Swqv0+Ob5i3H5qUX0IYXpnYgn8HggZ1DFfxHIQMrGcVD15/fDfw3pw6we3nqxE5trzNBPnYpuUSqVRiDbxhh/Cd1tXmvGv4GtOyE5DGkxMySTilnooEPxTHNQSTY2Lcmt1ipRrRcgK3c6lsiJ3iSDIbTGO1Xs3DcqzNqkUuynMf6Z9uD3RM9Fjj06f/FPQlSRpzA7SE0DcGsDYkhfGvYkBBKnxL+lVrzHdPSi0gnV1jUc1TTRD4uqWiMpZa/Za9cXUCBv5nmFwG2kUYp5IF2N43LNtkHWaApz50VEFXPfGsf0vUUvmx8I/zUL4Gun7fC2qO2A/2TrPNRx6LSbGJfreFgLLBSg8tphc3WAbluaE/B8zlqZTQPZZd7y3dU+PtycM0nqS5VaUcC4kYgf+h23PqjEw7VAp9ZabgWUcCVU0EVo55xU64dZJ659mJejUW6AQpx0xPqfNmIlBbyMWyHAjLOO2RV0vfB3taRn1eAsbahhDwzuhLSviIB+rxMy8/2JJpwxEh/wOvlheKZMs8sSOXKfKfrtKxapjfvvmx955zV1gu68MifP/8MLokC5yKIRmrVabjQeZHR0hnQHzI4b8ryEQNtoXPlJdeHCdkYM8ysazuxxQSqwTZ71ke4ZcWdIbu+Pt59wlCakJjF3mWluJqdf0LRuydgda5X5rcwHBNaCBFnmwLWmtftmK6rD8+5ysrsgcKdyRL8VvmVBZYtfs5c44dOEu6LKsHv2kd4P6yAb1ifkbeM5hGmha8WS6ViN5nmKvLsrWp/PWCez+wH2d4vy+STigaHP/9b9iVpKo73m56OwHS0D1ay4Vc/9NIcdmoFBkOEOyxsRL98Kj9Y1Ulf2OS07p1cK8n6mvmgHVFKJBQGR0WSwIlg07M7Sb2Hp5UykmddEBCy8HIoeEk6Bkzy9VAlN7ckp/mRJagDar4tyatyR2ipgLh6eLW1OzxRd0ICovoqQdum7hWGkdUX03LaEYyX8fC/ksQ0WzRnBA8oHsOFEgR86rmyqwczULPWvJJZNTanbGmMeF5NRIIRAdn/TgMUhcs1r2eXhAMcwEw8RRP4C6njbUJS0k0tvzn/MK9xrdh1ZJ/TXa4P8HdwB8iiHFEk3yrJhKwvM6aH9NU43Cq4fpOsaVN521oty9zcoTmPyGahfIoqPhsVISe04yCkIi6hHBdT/5Q+qtyVgzOCR2xfePgycKRT0ZfTWg834jh2FXaW1JMqpKWqOQf8MOFhBdkyasBhz0BeqHpxfb9hpalciE3sOti/l9fqOIJM4SdeFy5yTkYm3rIYBcvNIaTSEqVT53WBv6gLiEsygDcn/R03+cWQGrq9VwFxgmh9n44KR41oH7NNg9Qa00yTJg1HORo/bNnCB5mAL3vbGt9mYHlB9Rubxb2OkAUgCZnflHDt/0Y6YZI3rXfegosL8ZUnWDASoN+nc6GDx73sCrr+oGY6wmR2seKDjy9HHQRk41Quhk79txNAnManMg8KJFqVLRGF4IlMb/8STNHvfJDvE/35k9b9wYKOS9PM686bo8qVW3FQ+5s+4eSmJlo+9lTYTtJJpihnKjQ1QaTyD6DYIzQDVuc4VVmcOHHs65vgbvVp/rqQ//rlewsLAyBWxrsdMI8aMr0p+qfQIxITfC6d6Rmfgonqf4bjcpLD8IMUU6IkTIQRu968Wki2Rn2NtG67Ff6UMHh+cqeTsvYLLdCbQAEUDn/SYdJ63YyOLgFlDmduc6YazFXFLiGVFr2bt6Cjf5D8ZmTQhC3toyIaSpbEqhzxOcPgfSKJmjgWNPUSSQtnZjLp2oY7O6C8dmH3Hlo0T30kcmps+7QzYhisSB9ISg81/mR9Pmu4aYNgIlIJtV6vGpvukXX903sL0/jZxLuSisEElrJ0Z8FCuwDEsjm0YNy+Wq5gBcgIWYyOWpKYk02w1oANI98o3yAMpkVuvbrpoV92yQR3DhbZgSbWtlAAj5WyMlmEufL49sgryOfxd8Gh7Uio4JlbJJ3KFuR+wYJ3meQOs3S06sPO9SGoVfjd0sRJO5HeEykqKqx96/hO+eh72GtZNAlS5ECnzs/ANXEPwLz0cCsSTYdmCgO6yngF7G1yI
__VIEWSTATEGENERATOR: 0C858E1A
__EVENTVALIDATION: UY7dw8aadBVAUEzxODFLjokf91bawoDH+GPXAHfPsWOzWIrVCor4Cl02Zkid6g8Q0nrvDcBebd6Wzh6XveEHi26ycSJiqd6nMxQQmYifQC9/dL10ucSaBO1HoHFSMWVJvDKtwy6vtnbOsejK7C9o/fKPlECVGtUfn8VddQ6Uk0Rm5hTcCZ9YT1xaP3Yis/K7g87VVqhx41F88bptko0sy9Odu9A6pSSETlbKZPMRlJRC/dIRTiHImE6bCXgxLiZoKG5fFIPlw5DLH5XByMn7yC8o8GTmWSRcQG42uNI1SgYaDNgAN7FTNxvLL0nc2asocQqDTNafxmNfOA5NJtD1g7jQkTgksvOIBO4hyGmtkpU9HbT/hZbRFC2ICivxb6XAr9Qt2EjESfUgbjvfxTJ4vhx81FeqRD7Ox5OPzGianxE+DFm5PyPzW4Wu+iGjO8YcdwUS7Y0fd/lJS6NNKEweYFMWpxzeZhqaXyyLvt6RVB/oYJk+C3XU5C0urNV7UsE3NOqDUl8c2jkT7zvo4zBnNDbp02pSAjAC9eW2Knggt12qzPLiYgav64FkrrSOOZM+XIwq4b8ZjqkiSrwKpWm8TaBlYyR4hpVHsuAdVz5ID83wJab7s1Djd8ZFRCsqvoBh2+w6vPQiuuIqdIp4jBgNf7R1CgHCsl+o1pi4xMaJOYGRu9QihwAsYdR/nAMW4PYn+quzE6uO5kUaZe9mfC8quGPam6D8eMLjTfr+u7q/CBCt2TogxqPJI4eLh+BLtqqQQkKK9/Bo5masTJ3p43w7AnhlZgf3mfHAoSn+SuM4Jr0ILcG6WFlArsevdLP5H+AHcQk+M1z3XII/lS2t5zHFvATLa6HFAA90ly2Vj7yAd0Krn1tKhFtMO6t8Qy4m7eXcGUxxAZoKktXU2T9X6/UTVlUkGl80UHaGbJnbUjoNgvvMud8jkGgb9xiCwsStNBqY7chjKQ==
ctl05$ddlSite: 58
ctl05$ddlYear: 2019
SearchBox1$txtKeyword: 
ctl05$btnQuery: 查詢"""

strList = str.split("\n")
payload = {}
for s in strList:
    payload[s.split(": ")[0]] = s.split(": ")[1]

r = requests.post(url, data=payload)
r.status_code

200

![](https://imgur.com/obLkbFe.jpg)

In [7]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(r.text, 'html.parser')
table = soup.find('table', class_='TABLE_G')
print(table)

<table align="Center" border="1" cellpadding="3" cellspacing="0" class="TABLE_G" id="ctl05_gv" rules="all" style="border-color:Black;border-width:1px;border-style:None;border-collapse:collapse;">
<tr style="color:Black;font-weight:normal;">
<th scope="col">監測項目</th><th scope="col">單位</th><th scope="col">監測日期</th><th scope="col">監測值</th><th scope="col">標註</th>
</tr><tr style="color:Black;">
<td class="no-alt" rowspan="11" style="white-space:nowrap;" valign="top">SO2</td><td class="no-alt" rowspan="11" valign="top">ppb</td><td>2019/01</td><td>4.10</td><td> </td>
</tr><tr class="ALT" style="color:Black;">
<td>2019/02</td><td>3.30</td><td> </td>
</tr><tr style="color:Black;">
<td>2019/03</td><td>3.90</td><td> </td>
</tr><tr class="ALT" style="color:Black;">
<td>2019/04</td><td>3.40</td><td> </td>
</tr><tr style="color:Black;">
<td>2019/05</td><td>3.30</td><td> </td>
</tr><tr class="ALT" style="color:Black;">
<td>2019/06</td><td>4.10</td><td> </td>
</tr><tr style="color:Black;">
<td>2019/07

In [8]:
trItem = table.find_all("tr")
d = {}
for tr in trItem:
    td = tr.find_all("td", {"class" : "no-alt"})
    if len(td) > 0:
        #print(td[0].text)
        tag = "{0}({1})".format(td[0].text,td[1].text)
        td2 = tr.select("td")
        trInfo = {}
        trInfo[td2[2].text] = td2[3].text
    else:
        td3 = tr.select("td")
        if len(td3) > 0:
            trInfo[td3[0].text] = td3[1].text
            d[tag] = trInfo

d

{'SO2(ppb)': {'2019/01': '4.10',
  '2019/02': '3.30',
  '2019/03': '3.90',
  '2019/04': '3.40',
  '2019/05': '3.30',
  '2019/06': '4.10',
  '2019/07': '4',
  '2019/08': '4.60',
  '2019/09': '3.50',
  '2019/10': '3.20',
  '2019/11': '3.60'},
 'CO(ppm)': {'2019/01': '0.67',
  '2019/02': '0.48',
  '2019/03': '0.50',
  '2019/04': '0.39',
  '2019/05': '0.38',
  '2019/06': '0.24',
  '2019/07': '0.28',
  '2019/08': '0.33',
  '2019/09': '0.32',
  '2019/10': '0.36',
  '2019/11': '0.46'},
 'O3(ppb)': {'2019/01': '24.70',
  '2019/02': '29.40',
  '2019/03': '29.20',
  '2019/04': '30.40',
  '2019/05': '28.60',
  '2019/06': '16.30',
  '2019/07': '15.50',
  '2019/08': '19.70',
  '2019/09': '35.70',
  '2019/10': '44',
  '2019/11': '34.80'},
 'PM10(μg/m3)': {'2019/01': '78',
  '2019/02': '57',
  '2019/03': '52',
  '2019/04': '42',
  '2019/05': '35',
  '2019/06': '20',
  '2019/07': '24',
  '2019/08': '27',
  '2019/09': '35',
  '2019/10': '50',
  '2019/11': '63'},
 'NOx(ppb)': {'2019/01': '38.01',
  '201

In [9]:
import pandas as pd
pd.DataFrame(d)

Unnamed: 0,SO2(ppb),CO(ppm),O3(ppb),PM10(μg/m3),NOx(ppb),NO(ppb),NO2(ppb),THC(ppm),NMHC(ppm),CH4(ppm)
2019/01,4.1,0.67,24.7,78,38.01,7.95,30.06,2.27,0.27,2.0
2019/02,3.3,0.48,29.4,57,25.62,4.76,20.86,2.11,0.17,1.94
2019/03,3.9,0.5,29.2,52,27.84,5.41,22.43,2.12,0.18,1.94
2019/04,3.4,0.39,30.4,42,21.83,4.16,17.68,2.02,0.14,1.88
2019/05,3.3,0.38,28.6,35,22.55,4.58,17.98,2.02,0.16,1.86
2019/06,4.1,0.24,16.3,20,17.15,5.08,12.08,1.92,0.13,1.79
2019/07,4.0,0.28,15.5,24,17.71,5.16,12.54,1.93,0.14,1.8
2019/08,4.6,0.33,19.7,27,19.66,5.17,14.49,1.97,0.15,1.82
2019/09,3.5,0.32,35.7,35,18.66,3.63,15.03,1.98,0.13,1.86
2019/10,3.2,0.36,44.0,50,19.5,3.5,15.99,2.05,0.13,1.92
