In [1]:
import pandas as pd

In [3]:
df = pd.read_csv("..\\data\\kepler_KOI_full_dataset.csv")

In [6]:
df = df[df["kepler_name"].notna()]

In [9]:
df["kepler_name"].isnull().sum()

np.int64(0)

In [None]:
import requests
import os
from typing import List, Tuple, Dict, Any
import pandas as pd

BASE_URL = "https://archive-api.open-meteo.com/v1/archive"

def fetch_point_history(
    latitude: float,
    longitude: float,
    start_date: str,
    end_date: str,
    variables: List[str],
    timezone: str = "auto"
) -> Dict[str, Any]:
    """
    Fetch historical data from Open-Meteo for one location.
    Throws on HTTP or API error.
    """
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ",".join(variables),
        "timezone": timezone
    }
    resp = requests.get(BASE_URL, params=params, timeout=60)
    resp.raise_for_status()
    data = resp.json()
    if "error" in data:
        # API sometimes returns an “error” field
        raise RuntimeError(f"API error for ({latitude},{longitude}): {data['error']}")
    return data

def data_to_dataframe(
    data: Dict[str, Any],
    variables: List[str]
) -> pd.DataFrame:
    """
    Convert JSON result for a point into a DataFrame, with columns: time + variables.
    """
    times = data.get("hourly", {}).get("time", [])
    d: Dict[str, Any] = {"time": times}
    for var in variables:
        arr = data.get("hourly", {}).get(var)
        if arr is not None:
            d[var] = arr
        else:
            # if missing, fill with None or NaN
            d[var] = [None] * len(times)
    df = pd.DataFrame(d)
    return df

def save_dataframe_to_csv(
    df: pd.DataFrame,
    filepath: str
) -> None:
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    df.to_csv(filepath, index=False, encoding="utf-8")

def fetch_countries(
    country_points: Dict[str, List[Tuple[str, float, float]]],
    start_date: str,
    end_date: str,
    variables: List[str],
    output_base_dir: str = "output"
) -> Dict[str, Dict[str, pd.DataFrame]]:

    all_results: Dict[str, Dict[str, pd.DataFrame]] = {}
    
    for country, points in country_points.items():
        print(f"Processing country: {country}")
        country_results: Dict[str, pd.DataFrame] = {}
        for city, lat, lon in points:
            safe_city = city.replace(" ", "_")
            print(f"  Fetching {city} (lat={lat}, lon={lon}) …")
            try:
                data = fetch_point_history(lat, lon, start_date, end_date, variables)
            except Exception as e:
                print(f"    Error for {city}: {e}")
                continue
            df = data_to_dataframe(data, variables)

            dirpath = os.path.join(output_base_dir, country)
            filename = f"{safe_city}.csv"
            fullpath = os.path.join(dirpath, filename)
            save_dataframe_to_csv(df, fullpath)
            print(f"    Saved CSV: {fullpath} ({len(df)} rows)")
            country_results[city] = df
        all_results[country] = country_results
    return all_results

if __name__ == "__main__":
    country_points = {
        "Greece": [
            ("Athens", 37.9838, 23.7275),
            ("Thessaloniki", 40.6401, 22.9444),
            ("Heraklion", 35.3387, 25.1442),
        ],
        "Spain": [
            ("Madrid", 40.4168, -3.7038),
            ("Barcelona", 41.3851, 2.1734),
            ("Seville", 37.3891, -5.9845),
        ],
        "Italy": [
            ("Rome", 41.9028, 12.4964),
            ("Milan", 45.4642, 9.1900),
            ("Naples", 40.8518, 14.2681),
        ],
    }
    start = "2021-01-01"
    end   = "2021-12-31"
    variables = [
        "temperature_2m",
        "relative_humidity_2m",
        "wind_speed_10m",
        "wind_direction_10m",
        "vapour_pressure_deficit",
        "precipitation",
        "shortwave_radiation",
        "soil_moisture_0_to_7cm",
        "et0_fao_evapotranspiration"
    ]
    
    results = fetch_countries(country_points, start, end, variables, output_base_dir="weather_data")
    
    for country, city_dict in results.items():
        print(f"\nSample data in {country}:")
        for city, df in city_dict.items():
            print(f"  {city} → first rows:")
            print(df.head())
            break


Processing country: Greece
  Fetching Athens (lat=37.9838, lon=23.7275) …
    Saved CSV: weather_data\Greece\Athens.csv (8760 rows)
  Fetching Thessaloniki (lat=40.6401, lon=22.9444) …
    Saved CSV: weather_data\Greece\Thessaloniki.csv (8760 rows)
  Fetching Heraklion (lat=35.3387, lon=25.1442) …
    Saved CSV: weather_data\Greece\Heraklion.csv (8760 rows)
Processing country: Spain
  Fetching Madrid (lat=40.4168, lon=-3.7038) …
    Saved CSV: weather_data\Spain\Madrid.csv (8760 rows)
  Fetching Barcelona (lat=41.3851, lon=2.1734) …
    Saved CSV: weather_data\Spain\Barcelona.csv (8760 rows)
  Fetching Seville (lat=37.3891, lon=-5.9845) …
    Saved CSV: weather_data\Spain\Seville.csv (8760 rows)
Processing country: Italy
  Fetching Rome (lat=41.9028, lon=12.4964) …
    Saved CSV: weather_data\Italy\Rome.csv (8760 rows)
  Fetching Milan (lat=45.4642, lon=9.19) …
    Saved CSV: weather_data\Italy\Milan.csv (8760 rows)
  Fetching Naples (lat=40.8518, lon=14.2681) …
    Saved CSV: weather_