In [1]:
import requests
import pathlib

pathlib.Path('data').mkdir(exist_ok=True)

# 楓葉紅葉日期
if not pathlib.Path('data/maple_foliage.csv').exists():
    response = requests.get('https://www.data.jma.go.jp/sakura/ruinenchi/015.csv')
    response.encoding = 'shift-jis'
    with open('data/maple_foliage.csv', 'w', encoding='utf-8') as file:
        file.write(response.text)


# 楓葉落葉日期
if not pathlib.Path('data/maple_fall.csv').exists():
    response = requests.get('https://www.data.jma.go.jp/sakura/ruinenchi/016.csv')
    response.encoding = 'shift-jis'
    with open('data/maple_shedding.csv', 'w', encoding='utf-8') as file:
        file.write(response.text)

In [2]:
import pandas as pd

foliage_data = pd.read_csv('data/maple_foliage.csv', skiprows=1, encoding='utf-8')

foliage_data.head()

Unnamed: 0,番号,地点名,1953,rm,1954,rm.1,1955,rm.2,1956,rm.3,...,2032,rm.79,平年値,rm.80,最早値,rm.81,最早年,最晩値,rm.82,最晩年
0,401,稚内,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,406,留萌,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,407,旭川,0,0,0,0,0,0,0,0,...,0,0,1023,6,1008,6,1993,1105,6,2002
3,409,網走,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,412,札幌,1008,6,1015,6,1017,6,1024,6,...,0,0,1028,6,926,6,1963,1110,6,2023


In [3]:
def drop_unused_columns(df):
    columns = ['番号', '平年値', '最早値', '最早年', '最晩値', '最晩年']
    remark_columns = [col for col in df.columns if col.startswith('rm')]
    return df.drop(columns=columns + remark_columns)

foliage_data = drop_unused_columns(foliage_data)

foliage_data.head()


Unnamed: 0,地点名,1953,1954,1955,1956,1957,1958,1959,1960,1961,...,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032
0,稚内,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,留萌,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,旭川,0,0,0,0,0,0,0,0,0,...,1027,0,0,0,0,0,0,0,0,0
3,網走,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,札幌,1008,1015,1017,1024,1020,1012,0,0,1013,...,1110,0,0,0,0,0,0,0,0,0


In [4]:
foliage_data['地点名'] = foliage_data['地点名'].str.strip()
location_names = foliage_data['地点名'].unique()
pd.Series(location_names)

0        稚内
1        留萌
2        旭川
3        網走
4        札幌
       ... 
97      宮古島
98      久米島
99       那覇
100      名護
101    南大東島
Length: 102, dtype: object

In [5]:
from time import sleep

def fetch_history_weather_data(latitude, longtitude, start_date, end_date, variables):
    url = (
        f'https://archive-api.open-meteo.com/v1/archive?'
        f'latitude={latitude}&'
        f'&longitude={longtitude}&'
        f'start_date={start_date}&'
        f'end_date={end_date}&'
        f'{variables}&'
        'timezone=Asia/Tokyo'
    )
    for _ in range(3):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.Timeout:
            print('Timeout, retrying...')
            sleep(5)
        except requests.exceptions.RequestException as e:
            print(f'Request failed: {e}')
            break
    return None
    

In [6]:
from tqdm import tqdm
import json

def download_weather_data(geolocator, location_names, directory):
    for location_name in (pbar := tqdm(location_names)):
        pbar.set_description('Downloading')
        pbar.set_postfix_str(location_name)
        
        pathlib.Path(f'{directory}/{location_name}').mkdir(exist_ok=True, parents=True)

        location = geolocator.geocode(location_name)
        if location is None:
            print(f'{location_name} not found')
            continue

        for year in range(1953, 2024):
            if pathlib.Path(f'{directory}/{location_name}/{year}.json').exists():
                continue
            response = fetch_history_weather_data(
                location.latitude, 
                location.longitude, 
                f'{year}-09-01', 
                f'{year}-12-31', 
                'daily=temperature_2m_max,temperature_2m_min,temperature_2m_mean,daylight_duration,precipitation_sum'
            )

            if response is None:
                print(f'Failed to fetch data for {location_name} in {year}')
                return

            # 確認是否有錯誤 ( 如: API 每小時的限制 )
            if response.get('error'):
                print(response.get('reason'))
                return

            with open(f'{directory}/{location_name}/{year}.json', 'w', encoding='utf-8') as file:
                file.write(json.dumps(response))
            
            sleep(1)

In [7]:
from geopy.geocoders import Nominatim
import numpy as np


geolocator = Nominatim(user_agent="kaede")

download_completed = False

location_names = np.array_split(location_names, 4)

id = 2
if not download_completed:
    download_weather_data(geolocator, location_names[id], 'data/weather')

Downloading: 100%|██████████| 25/25 [00:42<00:00,  1.70s/it, 厳原]  


In [8]:
# 獲取日期在某段時間的 index
def get_date_index(year, date):
    start_date = f'{year}-09-01'
    end_date = f'{year}-12-31'
    date_range = pd.date_range(start_date, end_date)

    return date_range.get_loc(date)

def number_to_date(num):
    return f'{num // 100:02d}-{num % 100:02d}'


In [9]:
data = pd.DataFrame(columns=[
    'latitude', # 緯度
    'daylight', # 日照時間
    'precipitation', # 降水量
    't_max', 't_min', 't_mean', # 氣溫
    'd_1w_max', 'd_1w_min', 'd_1w_mean', # 一周日照時間
    'p_1w_max', 'p_1w_min', 'p_1w_mean', # 一周降水量
    't_1w_max', 't_1w_min', 't_1w_mean', # 一周氣溫
    't_2w_max', 't_2w_min', 't_2w_mean', # 兩周氣溫
    'd_2w_max', 'd_2w_min', 'd_2w_mean', # 兩周日照時間
    'p_2w_max', 'p_2w_min', 'p_2w_mean', # 兩周降水量
])

record_data = foliage_data.to_dict(orient='records')
for record in record_data:
    location_name = record['地点名']
    location = geolocator.geocode(location_name)

    for year in range(1953, 2024):
        weather_data = json.load(open(f'data/weather/{location_name}/{year}.json', 'r', encoding='utf-8'))
        foliage_day = record[str(year)]
        if foliage_day == 0 or len(str(foliage_day)) == 3: # 沒有資料
            continue

        data_index = get_date_index(year, f'{year}-{number_to_date(foliage_day)}')
        def get_data(data, label, start_index, end_index=None, mode=None):
            if end_index is not None:
                if mode == 'mean':
                    return sum(data['daily'][label][start_index:end_index]) / (end_index - start_index + 1)
                elif mode == 'max':
                    return max(data['daily'][label][start_index:end_index])
                elif mode == 'min':
                    return min(data['daily'][label][start_index:end_index])
            else:
                return data['daily'][label][start_index]
        
        new_row = pd.DataFrame({
            'latitude': location.latitude,
            'daylight': get_data(weather_data, 'daylight_duration', data_index),
            'precipitation': get_data(weather_data, 'precipitation_sum', data_index),
            't_max': get_data(weather_data, 'temperature_2m_max', data_index),
            't_min': get_data(weather_data, 'temperature_2m_min', data_index),
            't_mean': get_data(weather_data, 'temperature_2m_mean', data_index),
            'd_1w_max': get_data(weather_data, 'daylight_duration', data_index - 6, data_index, 'max'),
            'd_1w_min': get_data(weather_data, 'daylight_duration', data_index - 6, data_index, 'min'),
            'd_1w_mean': get_data(weather_data, 'daylight_duration', data_index - 6, data_index, 'mean'),
            'p_1w_max': get_data(weather_data, 'precipitation_sum', data_index - 6, data_index, 'max'),
            'p_1w_min': get_data(weather_data, 'precipitation_sum', data_index - 6, data_index, 'min'),
            'p_1w_mean': get_data(weather_data, 'precipitation_sum', data_index - 6, data_index, 'mean'),
            't_1w_max': get_data(weather_data, 'temperature_2m_max', data_index - 6, data_index, 'max'),
            't_1w_min': get_data(weather_data, 'temperature_2m_min', data_index - 6, data_index, 'min'),
            't_1w_mean': get_data(weather_data, 'temperature_2m_mean', data_index - 6, data_index, 'mean'),
            't_2w_max': get_data(weather_data, 'temperature_2m_max', data_index - 13, data_index, 'max'),
            't_2w_min': get_data(weather_data, 'temperature_2m_min', data_index - 13, data_index, 'min'),
            't_2w_mean': get_data(weather_data, 'temperature_2m_mean', data_index - 13, data_index, 'mean'),
            'd_2w_max': get_data(weather_data, 'daylight_duration', data_index - 13, data_index, 'max'),
            'd_2w_min': get_data(weather_data, 'daylight_duration', data_index - 13, data_index, 'min'),
            'd_2w_mean': get_data(weather_data, 'daylight_duration', data_index - 13, data_index, 'mean'),
            'p_2w_max': get_data(weather_data, 'precipitation_sum', data_index - 13, data_index, 'max'),
            'p_2w_min': get_data(weather_data, 'precipitation_sum', data_index - 13, data_index, 'min'),
            'p_2w_mean': get_data(weather_data, 'precipitation_sum', data_index - 13, data_index, 'mean'),
        }, index=[0])
        data = pd.concat([data, new_row], ignore_index=True)
        
data.to_csv('data/data.csv', index=False)

  data = pd.concat([data, new_row], ignore_index=True)
