In [41]:
import requests
import pathlib

pathlib.Path('data').mkdir(exist_ok=True)

# 楓葉紅葉日期
if not pathlib.Path('data/maple/maple_foliage.csv').exists():
    response = requests.get('https://www.data.jma.go.jp/sakura/ruinenchi/015.csv')
    response.encoding = 'shift-jis'
    with open('data/maple/maple_foliage.csv', 'w', encoding='utf-8') as file:
        file.write(response.text)


# 楓葉落葉日期
if not pathlib.Path('data/maple/maple_fall.csv').exists():
    response = requests.get('https://www.data.jma.go.jp/sakura/ruinenchi/016.csv')
    response.encoding = 'shift-jis'
    with open('data/maple/maple_shedding.csv', 'w', encoding='utf-8') as file:
        file.write(response.text)

In [42]:
import pandas as pd

foliage_data = pd.read_csv('data/maple/maple_foliage.csv', skiprows=1, encoding='utf-8')
shedding_data = pd.read_csv('data/maple/maple_shedding.csv', skiprows=1, encoding='utf-8')

foliage_data.head()

Unnamed: 0,番号,地点名,1953,rm,1954,rm.1,1955,rm.2,1956,rm.3,...,2032,rm.79,平年値,rm.80,最早値,rm.81,最早年,最晩値,rm.82,最晩年
0,401,稚内,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,406,留萌,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,407,旭川,0,0,0,0,0,0,0,0,...,0,0,1023,6,1008,6,1993,1105,6,2002
3,409,網走,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,412,札幌,1008,6,1015,6,1017,6,1024,6,...,0,0,1028,6,926,6,1963,1110,6,2023


In [43]:
def drop_unused_columns(df):
    columns = ['番号', '平年値', '最早値', '最早年', '最晩値', '最晩年']
    remark_columns = [col for col in df.columns if col.startswith('rm')]
    return df.drop(columns=columns + remark_columns)

foliage_data = drop_unused_columns(foliage_data)
shedding_data = drop_unused_columns(shedding_data)

foliage_data.head()


Unnamed: 0,地点名,1953,1954,1955,1956,1957,1958,1959,1960,1961,...,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032
0,稚内,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,留萌,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,旭川,0,0,0,0,0,0,0,0,0,...,1027,0,0,0,0,0,0,0,0,0
3,網走,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,札幌,1008,1015,1017,1024,1020,1012,0,0,1013,...,1110,0,0,0,0,0,0,0,0,0


In [44]:
foliage_data['地点名'] = foliage_data['地点名'].str.strip()
location_names = foliage_data['地点名'].unique()
pd.Series(location_names)

0        稚内
1        留萌
2        旭川
3        網走
4        札幌
       ... 
97      宮古島
98      久米島
99       那覇
100      名護
101    南大東島
Length: 102, dtype: object

In [45]:
from time import sleep

def fetch_history_weather_data(latitude, longtitude, start_date, end_date, variables):
    url = (
        f'https://archive-api.open-meteo.com/v1/archive?'
        f'latitude={latitude}&'
        f'&longitude={longtitude}&'
        f'start_date={start_date}&'
        f'end_date={end_date}&'
        f'{variables}&'
        'timezone=Asia/Tokyo'
    )
    for _ in range(3):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.Timeout:
            print('Timeout, retrying...')
            sleep(5)
        except requests.exceptions.RequestException as e:
            print(f'Request failed: {e}')
            break
    return None
    

In [46]:
from tqdm import tqdm
import json

def download_weather_data(geolocator, location_names, directory):
    for location_name in (pbar := tqdm(location_names)):
        pbar.set_description('Downloading')
        pbar.set_postfix_str(location_name)
        
        pathlib.Path(f'{directory}/{location_name}').mkdir(exist_ok=True, parents=True)

        location = geolocator.geocode(location_name)
        if location is None:
            print(f'{location_name} not found')
            continue

        for year in range(1953, 2024):
            if pathlib.Path(f'{directory}/{location_name}/{year}.json').exists():
                continue
            response = fetch_history_weather_data(
                location.latitude, 
                location.longitude, 
                f'{year}-09-01', 
                f'{year}-12-31', 
                'daily=temperature_2m_max,temperature_2m_min,temperature_2m_mean,daylight_duration,precipitation_sum'
            )

            if response is None:
                print(f'Failed to fetch data for {location_name} in {year}')
                return

            # 確認是否有錯誤 ( 如: API 每小時的限制 )
            if response.get('error'):
                print(response.get('reason'))
                return

            with open(f'{directory}/{location_name}/{year}.json', 'w', encoding='utf-8') as file:
                file.write(json.dumps(response))
            
            sleep(1)

In [47]:
from geopy.geocoders import Nominatim
import numpy as np


geolocator = Nominatim(user_agent="kaede")

download_completed = True

id = 2
if not download_completed:
    location_names = np.array_split(location_names, 4)
    download_weather_data(geolocator, location_names[id], 'data/weather')

In [48]:
from datetime import datetime

def number_to_time(year, num):
    """
    將數字轉換為時間

    Args:
        num (int): 數字
    Returns:
        month (int): 月份
        day (int): 日期
    """
    # 如果是 1XX 代表是下一年
    if num // 100 == 1:
        year += 1
        
    return datetime.strptime(f'{year}-{num//100}-{num%100}', '%Y-%m-%d')

In [49]:
data = pd.DataFrame()
for location_name_index, location_name in enumerate(tqdm(location_names)):  
    for year in range(1953, 2024):
        if not pathlib.Path(f'data/weather/{location_name}/{year}.json').exists():
            continue

        with open(f'data/weather/{location_name}/{year}.json', 'r', encoding='utf-8') as file:
            weather_data = json.load(file)

        status = 0
        for time_index, time in enumerate(weather_data['daily']['time']):
            foliage_date = foliage_data[str(year)].iloc[location_name_index]
            shedding_date = shedding_data[str(year)].iloc[location_name_index]
            if foliage_date == 0 or shedding_date == 0:
                continue
            time = datetime.strptime(time, '%Y-%m-%d')
            if time == number_to_time(year, foliage_date):
                status = 1
            elif time == number_to_time(year, shedding_date):
                status = 2
                
            new_row = pd.DataFrame({
                'year': [year],
                'month': [time.month],
                'day': [time.day],
                'latitude': [weather_data['latitude']],
                'longitude': [weather_data['longitude']],
                'elevation': [weather_data['elevation']],
                'max_temperature': [weather_data['daily']['temperature_2m_max'][time_index]],
                'min_temperature': [weather_data['daily']['temperature_2m_min'][time_index]],
                'mean_temperature': [weather_data['daily']['temperature_2m_mean'][time_index]],
                'daylight_duration': [weather_data['daily']['daylight_duration'][time_index]],
                'precipitation_sum': [weather_data['daily']['precipitation_sum'][time_index]],
                'status': [status],
            })
            data = pd.concat([data, new_row], ignore_index=True)

data.to_csv('data/data.csv', index=False)
        

100%|██████████| 102/102 [30:16<00:00, 17.81s/it]
