In [16]:
import pandas as pd
import requests
import time
from datetime import datetime
import os

In [17]:
# Configuration
API_KEY = "51a1a1385amsh8a2062112967694p11c6c0jsncc63f327d026"
BASE_URL = "https://meteostat.p.rapidapi.com/point/monthly"
HEADERS = {
    "X-RapidAPI-Key": API_KEY,
    "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

In [18]:
# Coordinates for each country (example - adjust as needed)
LOCATIONS = {
    'France': {'lat': 46.2276, 'lon': 2.2137},
    'Italy': {'lat': 41.8719, 'lon': 12.5674},
    'Spain': {'lat': 40.4637, 'lon': -3.7492}
}

In [19]:
def fetch_weather_chunk(lat, lon, start, end):
    """Fetch one month of hourly data with error handling"""
    params = {
        "lat": lat,
        "lon": lon,
        "start": start,
        "end": end,
        "tz": "UTC",
        "model": "true"  # Fill missing with modeled data
    }
    
    try:
        response = requests.get(BASE_URL, headers=HEADERS, params=params)
        response.raise_for_status()
        data = response.json()
        return pd.DataFrame(data.get("data", []))
    except Exception as e:
        print(f"Failed {start} to {end}: {str(e)}")
        return pd.DataFrame()



In [20]:
def fetch_country_weather(country, lat, lon, start_year, end_year):
    """Fetch all years for one country"""
    all_data = []
    current_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31, 23)
    
    while current_date <= end_date:
        chunk_end = min(current_date + timedelta(days=30), end_date)
        
        chunk = fetch_weather_chunk(
            lat, lon,
            current_date.strftime("%Y-%m-%d"),
            chunk_end.strftime("%Y-%m-%d")
        )
        
        if not chunk.empty:
            all_data.append(chunk)
            print(f"{country}: {current_date.date()} to {chunk_end.date()}")
        else:
            print(f"{country}: Failed {current_date.date()}")
        
        current_date = chunk_end + timedelta(days=1)
        time.sleep(1.2)  # Stay under API rate limits
    
    if all_data:
        df = pd.concat(all_data)
        # Keep only essential columns if they exist
        cols_to_keep = []
        if 'time' in df.columns:
            cols_to_keep.append('time')
        if 'temp' in df.columns:
            cols_to_keep.append('temp')
        if 'rhum' in df.columns:
            cols_to_keep.append('rhum')
        if 'tsun' in df.columns:
            cols_to_keep.append('tsun')
        
        return df[cols_to_keep] if cols_to_keep else pd.DataFrame()
    return pd.DataFrame()



In [21]:
# --- Execution ---
def main():
    os.makedirs("../data/raw/weather", exist_ok=True)
    
    for country, coords in COUNTRIES.items():
        print(f"\n🌍 Fetching {country} (2016-2022)...")
        df = fetch_country_weather(
            country=country,
            lat=coords["lat"],
            lon=coords["lon"],
            start_year=2016,
            end_year=2022
        )
        
        if not df.empty:
            # Rename columns
            column_map = {
                'time': 'datetime',
                'temp': 'temperature_c',
                'rhum': 'humidity_pct',
                'tsun': 'solar_radiation_min'
            }
            df = df.rename(columns={k: v for k, v in column_map.items() if k in df.columns})
            
            # Add country column
            df['country'] = country
            
            # Save raw data
            raw_path = f"../data/raw/weather/{country}_hourly_weather_2016_2022.csv"
            df.to_csv(raw_path, index=False)
            print(f"Saved {len(df)} records to {raw_path}")
            print("Columns saved:", df.columns.tolist())
        else:
            print(f"No data saved for {country}!")

In [22]:
if __name__ == "__main__":
    main()

NameError: name 'COUNTRIES' is not defined