In [None]:
import pandas as pd
from pathlib import Path
from datetime import datetime
from src.power_api import PowerAPI

In [None]:
df = pd.read_csv('data/table_mountain_sightings.csv')

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.head()

In [None]:
coordinates_df = df[['id', 'latitude', 'longitude']].dropna().copy()
coordinates_df['id'] = coordinates_df['id'].astype(str)

# Output folder for individual weather files
OUTPUT_DIR = Path("data/weather_10yr")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Date range: last 10 years from today
end = pd.Timestamp.today().normalize()
start = end - pd.DateOffset(years=10)

start_date_str = start.strftime('%Y%m%d')
end_date_str = end.strftime('%Y%m%d')

# Fetch weather data for each coordinate
for idx, row in coordinates_df.iterrows():
    obs_id = row['id']
    lat = row['latitude']
    lon = row['longitude']

    output_file = OUTPUT_DIR / f"{obs_id}_{start_date_str}_{end_date_str}.csv"
    if output_file.exists():
        print(f"[{obs_id}] Already exists. Skipping.")
        continue

    try:
        print(f"[{obs_id}] Fetching weather from {start_date_str} to {end_date_str} at ({lat}, {lon})")

        weather = PowerAPI(
            start=start,
            end=end,
            long=lon,
            lat=lat
        )
        weather_df = weather.get_weather()

        if not weather_df.empty:
            weather_df.to_csv(output_file, sep=";")
            print(f"[{obs_id}] Saved to {output_file}")
        else:
            print(f"[{obs_id}] No weather data returned.")

    except Exception as e:
        print(f"[{obs_id}] Error: {e}")

# Now combine all files into one DataFrame
INPUT_DIR = OUTPUT_DIR  # Directory where all files are saved
OUTPUT_FILE = Path("data/all_weather_10yr_combined.csv")

all_weather_dfs = []

for file in INPUT_DIR.glob("*.csv"):
    try:
        weather_df = pd.read_csv(file, sep=';')
        obs_id = file.stem.split('_')[0]
        weather_df['id'] = obs_id

        # Merge coordinates to each weather data row
        merged_df = weather_df.merge(coordinates_df, on='id', how='left')
        all_weather_dfs.append(merged_df)

    except Exception as e:
        print(f"Error reading {file.name}: {e}")

if all_weather_dfs:
    final_df = pd.concat(all_weather_dfs, ignore_index=True)
    final_df.to_csv(OUTPUT_FILE, index=False)
    print(f"Combined weather data saved to {OUTPUT_FILE}")
else:
    print("No weather data found to combine.")