Testing getting data from OpenMeteo

In [None]:
from datetime import datetime, timezone
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

from cron.jobs.toDataFrame import toDataFrame
from cron.settings import settings

In [None]:
models_df = pd.read_csv(settings.model_ids_path)
models = {row['id']: row['name'] for _, row in models_df.iterrows()}
hourly_fields_df = pd.read_csv(settings.hourly_fields_path)
hourly_fields = [row['field'] for _, row in hourly_fields_df.iterrows()]

In [None]:
utc_dt = datetime.now(timezone.utc)
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [None]:
url = "https://api.open-meteo.com/v1/forecast"
params = {
    "latitude": settings.latitude,
    "longitude": settings.longitude,
    "hourly": hourly_fields,
    "timezone": "GMT",
    "models": models.values(),
    "forecast_days": 16
}
responses = openmeteo.weather_api(url, params=params)

In [None]:
def extract_model_data(response, hourly_fields: list[str]):
    df = toDataFrame(response, hourly_fields)

    # Determine forecast horizon by detecting a significant increase in missing forecast data.
    # Compute the number of NaNs in each row for forecast columns.
    nan_counts = df[hourly_fields].isnull().sum(axis=1)

    horizon_index = None
    diff_threshold = 10  # threshold for a significant jump in missing values
    for i in range(1, len(nan_counts)):
        diff = nan_counts.iloc[i] - nan_counts.iloc[i-1]
        if diff >= diff_threshold:
            horizon_index = df.index[i]
            break

    if horizon_index is not None:
        df = df.loc[:horizon_index-1]

    # Additionally, drop any row that is entirely NaN in the forecast columns.
    # Remove only trailing rows where all forecast columns are NaN
    for idx in df.index[::-1]:
        if df.loc[idx, hourly_fields].isnull().all():
            df = df.drop(idx)
        else:
            break
    
    return df

In [None]:
old_icon_eu_df = None

for (i, response) in enumerate(responses):
    model_id = response.Model()
    model_name = models.get(model_id, "Unknown")
    if model_name == "icon_eu":
        old_icon_eu_df = extract_model_data(response, hourly_fields)
        break

In [None]:
old_icon_eu_df.head()

In [None]:
old_forecast_length = len(old_icon_eu_df)
print("Old method forecast length: ", old_forecast_length)
old_number_complete_nan_rows = old_icon_eu_df.isna().all(axis=1).sum()
print("Old method number of complete NaN rows: ", old_number_complete_nan_rows)

In [None]:
def new_extract_model_data(response, hourly_fields: list[str]):
    df = toDataFrame(response, hourly_fields)
    # Cut the dataframe when the most important values are not forecasted anymore (NaN)
    df = df.dropna(subset=["temperature_2m", "relative_humidity_2m", "dew_point_2m"])
    return df

In [None]:
new_icon_eu_df = None

for (i, response) in enumerate(responses):
    model_id = response.Model()
    model_name = models.get(model_id, "Unknown")
    if model_name == "icon_eu":
        new_icon_eu_df = new_extract_model_data(response, hourly_fields)
        break

In [None]:
new_forecast_length = len(new_icon_eu_df)
print("New method forecast length: ", new_forecast_length)
new_number_complete_nan_rows = new_icon_eu_df.isna().all(axis=1).sum()
print("New method number of complete NaN rows: ", new_number_complete_nan_rows)
new_forecast_length_days = new_forecast_length / 24
print("New method forecast length in days: ", new_forecast_length_days)

In [None]:
new_icon_eu_df.head(10)

In [None]:
new_icon_eu_df.tail(10)