In [None]:
import os
import time
import datetime
import pandas as pd

import openmeteo_requests
import requests_cache
from retry_requests import retry

# ─── CONFIGURATION ───────────────────────────────────────────────────────────────
CENTRAL_INFO_CSV = "../data/lookup/central_info.csv"
OUTPUT_FOLDER     = "../data/raw/open_mete_data"
MAX_DAILY_CALLS   = 10000
HOURLY_VARS = [
    "temperature_2m","shortwave_radiation","diffuse_radiation","global_tilted_irradiance",
    "shortwave_radiation_instant","diffuse_radiation_instant","global_tilted_irradiance_instant",
    "direct_radiation","direct_normal_irradiance","terrestrial_radiation",
    "direct_radiation_instant","direct_normal_irradiance_instant","terrestrial_radiation_instant",
    "relative_humidity_2m","dew_point_2m","apparent_temperature","precipitation","rain",
    "pressure_msl","surface_pressure","et0_fao_evapotranspiration",
    "vapour_pressure_deficit","cloud_cover","cloud_cover_low","cloud_cover_mid",
    "cloud_cover_high","wind_speed_10m","wind_direction_10m",
    "wind_gusts_10m"
]
PLANTS_OF_INTEREST = [
    "parque eolico agua clara","parque eolico de matafongo","parque eolico guanillo",
    "parque eolico larimar","parque eolico larimar ii","parque eolico los guzmancitos",
    "parque eolico los guzmancitos 2","parque fotovoltaico bayasol","parque fotovoltaico calabaza",
    "parque fotovoltaico cumayasa 1","parque fotovoltaico cumayasa 2","parque fotovoltaico la victoria",
    "parque fotovoltaico los negros","parque fotovoltaico maranatha fase i","parque fotovoltaico mata de palma",
    "parque fotovoltaico matrisol","parque fotovoltaico mirasol","parque fotovoltaico montecristi solar 1",
    "parque fotovoltaico sajoma","parque fotovoltaico santanasol","parque fotovoltaico washington capital 2",
    "parque fotovoltaico washington capital 3"
]

# ─── SETUP Open‑Meteo CLIENT ──────────────────────────────────────────────────────
cache = requests_cache.CachedSession('.cache', expire_after=-1)
session = retry(cache, retries=3, backoff_factor=0.2)
client = openmeteo_requests.Client(session=session)

# ─── LOAD AND FILTER CENTRAL INFO ─────────────────────────────────────────────────
df = pd.read_csv(CENTRAL_INFO_CSV)
df_filtered = df[df["CENTRAL"].str.lower().isin([p.lower() for p in PLANTS_OF_INTEREST])].copy()

os.makedirs(OUTPUT_FOLDER, exist_ok=True)
today = datetime.datetime.utcnow().strftime("%Y-%m-%d")
calls_today = 0
pending = df_filtered.copy()

# ─── MAIN LOOP ───────────────────────────────────────────────────────────────────
while not pending.empty:
    for idx, row in pending.iterrows():
        if calls_today >= MAX_DAILY_CALLS:
            print("✅ Daily API call limit reached — stopping.")
            pending = pending.iloc[0:0]
            break

        name = row["CENTRAL"]
        lat, lon = row["Latitud"], row["Longitud"]
        start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
        print(f"▶️ Processing {name}: {start_date} → {today}")

        params = {
            "latitude": lat,
            "longitude": lon,
            "start_date": start_date,
            "end_date": today,
            "hourly": HOURLY_VARS
        }

        retry_count = 0
        while retry_count <= 5:
            try:
                response = client.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)[0]
                calls_today += 1

                hourly = response.Hourly()
                times = pd.date_range(
                    start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                    end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                    freq=pd.Timedelta(seconds=hourly.Interval()), inclusive="left"
                )

                data = {"date": times}
                for i, var in enumerate(HOURLY_VARS):
                    var_obj = hourly.Variables(i)
                    data[var] = var_obj.ValuesAsNumpy() if var_obj else [float("nan")] * len(times)

                df_out = pd.DataFrame(data)
                filename = f"{name.lower().replace(' ','_')}.parquet"
                df_out.to_parquet(os.path.join(OUTPUT_FOLDER, filename), index=False)
                print(f"✅ Saved {filename} — calls today: {calls_today}")
                pending = pending.drop(idx)
                break

            except Exception as e:
                message = str(e)
                if "Hourly API request limit exceeded" in message:
                    now = datetime.datetime.utcnow()
                    next_hour = (now.replace(minute=0, second=0, microsecond=0) + datetime.timedelta(hours=1))
                    wait_secs = (next_hour - now).total_seconds() + 5
                    print(f"⚠️ Hourly rate limit hit — sleeping {int(wait_secs/60)} minutes until {next_hour} UTC")
                    time.sleep(wait_secs)
                    retry_count += 1
                else:
                    print(f"❌ Unrecoverable error for {name}: {message}")
                    pending = pending.drop(idx)
                    break

print("✅ All plants processed.")


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


▶️ Processing parque eolico agua clara: 2019-02-22 → 2025-03-22
✅ Saved parque_eolico_agua_clara.parquet — calls today: 1
▶️ Processing parque eolico de matafongo: 2019-04-26 → 2025-03-22
✅ Saved parque_eolico_de_matafongo.parquet — calls today: 2
▶️ Processing parque eolico guanillo: 2019-04-26 → 2025-03-22
✅ Saved parque_eolico_guanillo.parquet — calls today: 3
▶️ Processing parque eolico larimar: 2016-07-05 → 2025-03-22
✅ Saved parque_eolico_larimar.parquet — calls today: 4
▶️ Processing parque eolico larimar ii: 2018-10-16 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_eolico_larimar_ii.parquet — calls today: 5
▶️ Processing parque eolico los guzmancitos: 2019-04-26 → 2025-03-22
✅ Saved parque_eolico_los_guzmancitos.parquet — calls today: 6
▶️ Processing parque eolico los guzmancitos 2: 2022-04-11 → 2025-03-22
✅ Saved parque_eolico_los_guzmancitos_2.parquet — calls today: 7
▶️ Processing parque fotovoltaico bayasol: 2021-12-03 → 2025-03-22
✅ Saved parque_fotovoltaico_bayasol.parquet — calls today: 8
▶️ Processing parque fotovoltaico calabaza: 2023-05-26 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_fotovoltaico_calabaza.parquet — calls today: 9
▶️ Processing parque fotovoltaico cumayasa 1: 2023-05-09 → 2025-03-22
✅ Saved parque_fotovoltaico_cumayasa_1.parquet — calls today: 10
▶️ Processing parque fotovoltaico cumayasa 2: 2023-05-09 → 2025-03-22
✅ Saved parque_fotovoltaico_cumayasa_2.parquet — calls today: 11
▶️ Processing parque fotovoltaico la victoria: 2024-11-22 → 2025-03-22
✅ Saved parque_fotovoltaico_la_victoria.parquet — calls today: 12
▶️ Processing parque fotovoltaico los negros: 2023-10-18 → 2025-03-22
✅ Saved parque_fotovoltaico_los_negros.parquet — calls today: 13
▶️ Processing parque fotovoltaico maranatha fase i: 2024-06-17 → 2025-03-22
✅ Saved parque_fotovoltaico_maranatha_fase_i.parquet — calls today: 14
▶️ Processing parque fotovoltaico mata de palma: 2019-04-26 → 2025-03-22
✅ Saved parque_fotovoltaico_mata_de_palma.parquet — calls today: 15
▶️ Processing parque fotovoltaico matrisol: 2023-07-27 → 2025-03-22
✅ Saved parque_fotovoltaico_matrisol.par

  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_fotovoltaico_mirasol.parquet — calls today: 17
▶️ Processing parque fotovoltaico montecristi solar 1: 2018-08-28 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_fotovoltaico_montecristi_solar_1.parquet — calls today: 18
▶️ Processing parque fotovoltaico sajoma: 2024-08-13 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_fotovoltaico_sajoma.parquet — calls today: 19
▶️ Processing parque fotovoltaico santanasol: 2022-05-27 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


✅ Saved parque_fotovoltaico_santanasol.parquet — calls today: 20
▶️ Processing parque fotovoltaico washington capital 2: 2024-11-29 → 2025-03-22
❌ Unrecoverable error for parque fotovoltaico washington capital 2: {'error': True, 'reason': 'Minutely API request limit exceeded. Please try again in one minute.'}
▶️ Processing parque fotovoltaico washington capital 3: 2024-11-29 → 2025-03-22


  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")
  start_date = pd.to_datetime(row["FirstAppearance"], dayfirst=True).strftime("%Y-%m-%d")


❌ Unrecoverable error for parque fotovoltaico washington capital 3: {'error': True, 'reason': 'Minutely API request limit exceeded. Please try again in one minute.'}
✅ All plants processed.
