# 猛暑日・真夏日の観測地点数（全国）

* データソース
    * https://www.data.jma.go.jp/obd/stats/etrn/view/summer.php
<br><br>
* 作業内容
    * 過去データを取得する
        * 2012/5~2023/8/7まで
    * 直近データと合わせる
    
* データ更新
    * 基本的に初回のみで、以降は行わない
        * 過去分のcsv, tableは30, 35度以上のみで40度以上の表記などがないため
        * tableデータのスクレイピングは不安定なため
    * 以降の更新は日々＜直近7日＋当日のリアルタイム更新＞のデータから算出
    
* 観測地点数
    * ＜最新の気象データ＞の観測地点数とは合致
        * [最新の気象データ > 気温の状況 > 日最高気温一覧表(詳細)](https://www.data.jma.go.jp/stats/data/mdrr/tem_rct/alltable/mxtemsadext00_rct.csv)
    *＜過去の気象データ＞とは観測地点数が合致しないのでデータ統合不可
        * [過去の気象データ検索](https://www.data.jma.go.jp/stats/etrn/index.php)

In [1]:
import pandas as pd
from datetime import datetime, date, timedelta

In [2]:
#過去の年のデータ
heatpoint = pd.DataFrame()

start_y = 2012
end_y = date.today().year

for year in range(start_y, end_y):
    url = f"https://www.data.jma.go.jp/obd/stats/etrn/view/temp/{year}_hp.csv"
    data = pd.read_csv(url, encoding='shift_jis')
    heatpoint = pd.concat([heatpoint, data])

In [3]:
heatpoint.columns = ['date', 'over30', 'over35']

In [4]:
heatpoint['date'] = pd.to_datetime(heatpoint['date'])

In [5]:
heatpoint = heatpoint.set_index('date').sort_index()

In [6]:
heatpoint

Unnamed: 0_level_0,over30,over35
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-01,5.0,
2012-05-02,10.0,
2012-05-03,,
2012-05-04,,
2012-05-05,13.0,
...,...,...
2022-10-27,0.0,0.0
2022-10-28,0.0,0.0
2022-10-29,0.0,0.0
2022-10-30,0.0,0.0


In [7]:
#今年のデータ
heatpoint_cur = pd.DataFrame()

start_m = 5
end_m = (date.today() - timedelta(days=1)).month + 1

for month in range(start_m, end_m):
    url = f"https://www.data.jma.go.jp/obd/stats/etrn/view/summer.php?month={month}"
    try:
        tables = pd.read_html(url)
        table = tables[2]
        table.columns = ['day','over30','over35']
        table['date'] = pd.to_datetime(str(end_y)+'-'+str(month).zfill(2)+'-'+table['day'].astype(str), format='%Y-%m-%d')
        heatpoint_cur = pd.concat([heatpoint_cur, table])
        print('Retrieved:', month)
    except:
        print('Not Available:', month)
        pass

Retrieved: 5
Retrieved: 6
Retrieved: 7
Retrieved: 8


In [8]:
heatpoint_cur = heatpoint_cur.set_index('date').drop('day',axis=1)

In [9]:
heatpoint_cur

Unnamed: 0_level_0,over30,over35
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-05-01,0,0
2023-05-02,0,0
2023-05-03,0,0
2023-05-04,1,0
2023-05-05,11,0
...,...,...
2023-08-03,714,229
2023-08-04,780,290
2023-08-05,695,274
2023-08-06,665,223


In [10]:
heatpoint = pd.concat([heatpoint, heatpoint_cur]).sort_index().reset_index()

In [11]:
heatpoint

Unnamed: 0,date,over30,over35
0,2012-05-01,5.0,
1,2012-05-02,10.0,
2,2012-05-03,,
3,2012-05-04,,
4,2012-05-05,13.0,
...,...,...,...
2118,2023-08-03,714.0,229.0
2119,2023-08-04,780.0,290.0
2120,2023-08-05,695.0,274.0
2121,2023-08-06,665.0,223.0


In [12]:
filepath = './data-maxtemp/timeseries-data/jma-maxtemp-heatpoints-ts.csv'

In [13]:
heatpoint_ts = pd.read_csv(filepath)

In [14]:
#直近データのtime series
heatpoint_ts['date'] = pd.to_datetime(heatpoint_ts['date'])

In [15]:
heatpoint_ts = pd.concat([heatpoint_ts, heatpoint])

In [16]:
a = heatpoint_ts[heatpoint_ts.duplicated(subset='date', keep='last')].over30.reset_index(drop=True)
b = heatpoint_ts[heatpoint_ts.duplicated(subset='date', keep='first')].over30.reset_index(drop=True)
a - b

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
15    0.0
16    0.0
Name: over30, dtype: float64

In [17]:
a = heatpoint_ts[heatpoint_ts.duplicated(subset='date', keep='last')].over35.reset_index(drop=True)
b = heatpoint_ts[heatpoint_ts.duplicated(subset='date', keep='first')].over35.reset_index(drop=True)
a - b

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     0.0
6     0.0
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
12    0.0
13    0.0
14    0.0
15    0.0
16    0.0
Name: over35, dtype: float64

In [18]:
heatpoint_ts = heatpoint_ts[~heatpoint_ts.duplicated(subset='date', keep='first')]

In [22]:
heatpoint_ts = heatpoint_ts.sort_values(by='date').reset_index(drop=True)

In [23]:
heatpoint_ts

Unnamed: 0,date,over30,over35,over40,total,null_values
0,2012-05-01,5.0,,,,
1,2012-05-02,10.0,,,,
2,2012-05-03,,,,,
3,2012-05-04,,,,,
4,2012-05-05,13.0,,,,
...,...,...,...,...,...,...
2119,2023-08-04,780.0,290.0,0.0,915.0,1.0
2120,2023-08-05,695.0,274.0,1.0,915.0,1.0
2121,2023-08-06,665.0,223.0,0.0,915.0,1.0
2122,2023-08-07,527.0,100.0,0.0,915.0,1.0


In [24]:
#heatpoint.to_csv(filepath, index=False)