## 抓取日本所有都道府縣

In [1]:
# 抓取日本所有都道府縣

import requests
from bs4 import BeautifulSoup

homeurl = 'https://www.data.jma.go.jp/obd/stats/etrn/select/prefecture00.php?'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
payload = {"hl": "ja"}
response = requests.get(homeurl, headers=headers, params=payload)
response.encoding = response.apparent_encoding
html = response.text
soup = BeautifulSoup(html, 'lxml')

res = soup.find('div', id="main")
areas = res.select("area")

In [5]:
# 抓取日本所有都道府縣明細

import pandas as pd

data = []

for area in areas:
    alt = area.get('alt') 
    prec_no = area.get('href').split('prec_no=')[1].split('&')[0]
    data.append({'地區': alt, '代碼': prec_no})

df_areas = pd.DataFrame(data)

df_areas[10:20]

Unnamed: 0,地區,代碼
10,胆振地方,21
11,日高地方,22
12,渡島地方,23
13,檜山地方,24
14,青森県,31
15,秋田県,32
16,岩手県,33
17,宮城県,34
18,山形県,35
19,福島県,36


## 抓取各地區所有城市
### 將主要城市存入df

In [263]:
import pandas as pd

data_city = []

for area in range(df_areas.shape[0]):
    
    area_code = int(df_areas.iloc[area][2])


    # 抓取該地區所有城市
    homeurl = 'https://www.data.jma.go.jp/obd/stats/etrn/select/prefecture.php?prec_no='+str(area_code)+'&block_no=&year=&month=&day=&view='
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
    }
    payload = {"hl": "ja"}
    response = requests.get(homeurl, headers=headers, params=payload)
    response.encoding = response.apparent_encoding
    html = response.text
    soup = BeautifulSoup(html, 'lxml')

    res_city = soup.find('div', id="ncontents2")
    area_city = res_city.select("area")

    # 抓取城市資料
    for city in area_city:
        try:
            onmouseover = city['onmouseover']
            scale_city = onmouseover[21:-2].strip("'").split(",")[0].strip("'") # 規模
            # 判斷規模為雙圈城市(主要城市)
            if scale_city == 's':
                alt_city = city.get('alt') 
                href_city = city.get('href')
                block_no_city = href_city.split('block_no=')[1].split('&')[0]
                latitude_d = onmouseover[21:-2].strip("'").split(",")[4].strip("'") # 緯度-度
                latitude_m = onmouseover[21:-2].strip("'").split(",")[5].strip("'") # 緯度-分
                longitude_d = onmouseover[21:-2].strip("'").split(",")[6].strip("'") # 經度-度
                longitude_m = onmouseover[21:-2].strip("'").split(",")[7].strip("'") # 經度-分
                altitude = onmouseover[21:-2].strip("'").split(",")[8] .strip("'") # 海拔
                
                data_city.append({'地區': df_areas.iloc[area][0],
                                  '地區代碼': area_code,
                                  '城市': alt_city,
                                  '城市代碼': block_no_city,
                                  '網址': href_city,
                                  '規模': scale_city,
                                  '緯度': "北緯" + latitude_d + "度" + latitude_m + "分",
                                  '經度': "東經" + longitude_d + "度" + longitude_m + "分",
                                  '海拔': altitude +"m"
                                 })

        except:
            scale_city = 0

            
df_city = pd.DataFrame(data_city)
df_city.drop_duplicates(inplace=True)

df_city

Unnamed: 0,地區,地區代碼,城市,城市代碼,網址,規模,緯度,經度,海拔
0,宗谷地方,11,稚内,47401,../index.php?prec_no=11&block_no=47401&year=&m...,s,北緯45度24.9分,東經141度40.7分,2.8m
2,宗谷地方,11,北見枝幸,47402,../index.php?prec_no=11&block_no=47402&year=&m...,s,北緯44度56.4分,東經142度35.1分,6.7m
4,上川地方,12,旭川,47407,../index.php?prec_no=12&block_no=47407&year=&m...,s,北緯43度45.4分,東經142度22.3分,119.8m
6,留萌地方,13,羽幌,47404,../index.php?prec_no=13&block_no=47404&year=&m...,s,北緯44度21.8分,東經141度42.0分,7.9m
8,留萌地方,13,留萌,47406,../index.php?prec_no=13&block_no=47406&year=&m...,s,北緯43度56.7分,東經141度37.9分,23.6m
...,...,...,...,...,...,...,...,...,...
310,沖縄県,91,久米島,47929,../index.php?prec_no=91&block_no=47929&year=&m...,s,北緯26度20.2分,東經126度48.2分,4.6m
312,沖縄県,91,西表島,47917,../index.php?prec_no=91&block_no=47917&year=&m...,s,北緯24度25.6分,東經123度45.9分,10.3m
314,沖縄県,91,与那国島,47912,../index.php?prec_no=91&block_no=47912&year=&m...,s,北緯24度28.0分,東經123度00.6分,30m
316,沖縄県,91,南大東（南大東島）,47945,../index.php?prec_no=91&block_no=47945&year=&m...,s,北緯25度49.7分,東經131度13.7分,15.3m


In [264]:
#將各城市資料存成csv檔

outputpath = 'japanmaincity.csv'
 
df_city.to_csv(outputpath,sep=',',index=False,header=True, encoding='utf_8_sig') 