In [145]:
import openmeteo_requests
import requests_cache
from retry_requests import retry

from datetime import datetime
import numpy as np
import pandas as pd
from time import sleep
import os

In [2]:
hazard_df = pd.read_csv('./dataset/ddpm_amphoe_hazard_dataset.csv')

In [76]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure to be inside GIS_MANIPULATION directory
hazard_df = pd.read_csv('./dataset/ddpm_amphoe_hazard_dataset.csv')

start_date = datetime(2011, 1, 1)
end_date = datetime(2020, 12, 31)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": [],
	"longitude": [],
	"start_date": datetime.strftime(start_date, "%Y-%m-%d"),
	"end_date": datetime.strftime(end_date, "%Y-%m-%d"),
	"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "precipitation_sum", "rain_sum", "precipitation_hours", "wind_speed_10m_max", "wind_gusts_10m_max"],
	"timezone": "Asia/Bangkok"
}
output_folder = './dataset/open-meteo'

# _______________ SYSTEM CONFIG ___________________

In [None]:
step = 150
responses_list = []

for index in range(0, len(hazard_df), step):
    
    if len(responses_list) != 0 and len(responses_list) % 300 == 0:
        sleep(65) # Preventing Minutely API request limit exceede
    
    location_start = index
    location_stop = index + step

    latitude = hazard_df['latitude'][location_start:location_stop]
    longitude = hazard_df['longitude'][location_start:location_stop] 
    
    params['latitude'] = latitude
    params['longitude'] = longitude

    try:
        responses = openmeteo.weather_api(url, params=params)
        responses_list.extend(responses)
        sleep(1)
    
    except Exception as e:
        print(f"Error downloading at point Start Point ({location_start}), Stop Point ({location_stop}): {e}")

In [149]:
def generate_month_dates(start_date, end_date):
    # generate first date of every month between start_date and end_date

    current = start_date
    dates = []
    while current <= end_date:
        dates.append(current.strftime('%Y-%m-%d'))
        if current.month == 12:
            current = current.replace(year=current.year + 1, month=1, day=1)
        else:
            current = current.replace(month=current.month + 1, day=1)
            
    return dates

def generate_last_date_of_year(start_year, end_year):
    # generate first date of every year between start_date and end_date

    dates = []
    
    for year in range(int(start_year), int(end_year) + 1):
        last_day = datetime(year, 12, 31)
        dates.append(last_day.strftime('%Y-%m-%d'))
    
    return dates

def create_variable_template(df, time_series = 'yearly'):
    # create df template for output variables
    
    valid_time_series = ['monthly', 'yearly']
    if time_series not in valid_time_series:
        raise ValueError(f"Invalid time series frequency. Choose from: {', '.join(valid_time_series)}")

    if time_series == 'yearly':
        export_template = pd.DataFrame(index=pd.MultiIndex.from_frame(df[['latitude', 'longitude']]),
                                    columns=[year.split('-')[0] for year in generate_last_date_of_year(start_date.year, end_date.year)])
    else:
        export_template = pd.DataFrame(index=pd.MultiIndex.from_frame(df[['latitude', 'longitude']]),
                                    columns=[month for month in generate_month_dates(start_date, end_date)])
    
    variable_dict = dict.fromkeys(params['daily'])

    for variable in variable_dict.keys():
        variable_dict[variable] = export_template.copy()
    
    return variable_dict

def export_csv(variable_dict, start_date=start_date, end_date=end_date, time_series='yearly'):
    # export each variable to csv
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)  # Create the folder if it doesn't exist
        
    start_date = datetime.strftime(start_date, '%Y-%m-%d')
    end_date = datetime.strftime(end_date, '%Y-%m-%d')
    
    for variable in variable_dict.keys():
        file = f"{variable}_{start_date}_{end_date}_{time_series}.csv"
        output_file = os.path.join(output_folder, file)
        variable_dict[variable].to_csv(output_file, index=True)

In [None]:
variable_dict = create_variable_template(hazard_df, time_series='monthly')

# Process first location. Add a for-loop for multiple locations or weather models
for response in responses_list:

	# Process daily data. The order of variables needs to be the same as requested.
	daily = response.Daily()
	daily_weather_code = daily.Variables(0).ValuesAsNumpy()
	daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
	daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
	daily_temperature_2m_mean = daily.Variables(3).ValuesAsNumpy()
	daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
	daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
	daily_precipitation_hours = daily.Variables(6).ValuesAsNumpy()
	daily_wind_speed_10m_max = daily.Variables(7).ValuesAsNumpy()
	daily_wind_gusts_10m_max = daily.Variables(8).ValuesAsNumpy()

	daily_data = {"date": pd.date_range(
		start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
		end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = daily.Interval()),
		inclusive = "left"
	)}
	daily_data["weather_code"] = daily_weather_code
	daily_data["temperature_2m_max"] = daily_temperature_2m_max
	daily_data["temperature_2m_min"] = daily_temperature_2m_min
	daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
	daily_data["precipitation_sum"] = daily_precipitation_sum
	daily_data["rain_sum"] = daily_rain_sum
	daily_data["precipitation_hours"] = daily_precipitation_hours
	daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
	daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max

	daily_dataframe = pd.DataFrame(data = daily_data)
	daily_dataframe = daily_dataframe.groupby(pd.Grouper(key='date', freq='1M')).median()
	daily_dataframe = daily_dataframe[(daily_dataframe.index.tz_localize(None).to_pydatetime() >= start_date) &
								   	  (daily_dataframe.index.tz_localize(None).to_pydatetime() <= end_date)]
	daily_dataframe = daily_dataframe.T

	for variable in variable_dict.keys():
		variable_dict[variable].iloc[responses_list.index(response), ] = daily_dataframe.loc[variable]
	
	print(f"Inputing {response.Latitude()}°N {response.Longitude()}°E, Elevation {response.Elevation()} m asl")


In [None]:
variable_dict['temperature_2m_mean']

In [150]:
export_csv(variable_dict, start_date, end_date, time_series='monthly')