In [16]:
import pandas as pd
import requests
import time
from datetime import datetime
import os

In [17]:
# Configuration
API_KEY = "51a1a1385amsh8a2062112967694p11c6c0jsncc63f327d026"
BASE_URL = "https://meteostat.p.rapidapi.com/point/monthly"
HEADERS = {
    "X-RapidAPI-Key": API_KEY,
    "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

In [18]:
# Coordinates for each country (example - adjust as needed)
LOCATIONS = {
    'France': {'lat': 46.2276, 'lon': 2.2137},
    'Italy': {'lat': 41.8719, 'lon': 12.5674},
    'Spain': {'lat': 40.4637, 'lon': -3.7492}
}

In [19]:
def fetch_hourly_weather(lat, lon, start_date, end_date):
    """Fetch hourly weather data with proper column validation"""
    params = {
        "lat": lat,
        "lon": lon,
        "start": start_date,
        "end": end_date,
        "tz": "UTC",
        "model": "true"
    }
    
    try:
        response = requests.get(BASE_URL, headers=HEADERS, params=params)
        response.raise_for_status()
        data = response.json()
        
        if 'data' not in data:
            print(f"No data key in response for {start_date} to {end_date}")
            return pd.DataFrame()
            
        return pd.DataFrame(data['data'])
    except Exception as e:
        print(f"Error fetching {start_date} to {end_date}: {str(e)}")
        return pd.DataFrame()



In [20]:
def process_weather_df(df, country):
    """Process raw weather DataFrame with safe column handling"""
    if df.empty:
        return df
        
    # Available columns in API response
    available_cols = df.columns.tolist()
    
    # Expected columns with their API names
    column_mapping = {
        'time': 'datetime',
        'temp': 'temperature_c',
        'rhum': 'humidity_pct',
        'tsun': 'solar_radiation_min',
        'coco': 'weather_code'
    }
    
    # Only process columns that exist in the response
    valid_columns = {api_col: new_name 
                    for api_col, new_name in column_mapping.items()
                    if api_col in available_cols}
    
    # Rename and select columns
    processed_df = df.rename(columns=valid_columns)[list(valid_columns.values())]
    
    # Add country identifier
    processed_df['country'] = country
    
    # Convert datetime if present
    if 'datetime' in processed_df.columns:
        processed_df['datetime'] = pd.to_datetime(processed_df['datetime'])
    
    return processed_df



In [21]:
def main():
    start_year, end_year = 2016, 2022
    os.makedirs('../data/processed/weather', exist_ok=True)
    
    for country, coords in LOCATIONS.items():
        print(f"\nFetching HOURLY data for {country}...")
        df = fetch_multi_year_hourly(coords['lat'], coords['lon'], start_year, end_year)
        
        processed_df = process_weather_df(df, country)
        
        if processed_df.empty:
            print(f"No valid data processed for {country}!")
            continue
            
        output_path = f'../data/processed/weather/{country}_hourly_weather.csv'
        processed_df.to_csv(output_path, index=False)
        print(f"Saved {len(processed_df)} records to {output_path}")
        print("Columns saved:", processed_df.columns.tolist())



In [22]:
if __name__ == "__main__":
    main()


Fetching HOURLY data for France...
Fetched 2016-01-01 to 2016-01-31
Fetched 2016-02-01 to 2016-03-02
Fetched 2016-03-03 to 2016-04-02
Fetched 2016-04-03 to 2016-05-03
Fetched 2016-05-04 to 2016-06-03
Fetched 2016-06-04 to 2016-07-04
Fetched 2016-07-05 to 2016-08-04
Fetched 2016-08-05 to 2016-09-04
Fetched 2016-09-05 to 2016-10-05
Fetched 2016-10-06 to 2016-11-05
Fetched 2016-11-06 to 2016-12-06
Fetched 2016-12-07 to 2017-01-06
Fetched 2017-01-07 to 2017-02-06
Fetched 2017-02-07 to 2017-03-09
Fetched 2017-03-10 to 2017-04-09
Fetched 2017-04-10 to 2017-05-10
Fetched 2017-05-11 to 2017-06-10
Fetched 2017-06-11 to 2017-07-11
Fetched 2017-07-12 to 2017-08-11
Fetched 2017-08-12 to 2017-09-11
Fetched 2017-09-12 to 2017-10-12
Fetched 2017-10-13 to 2017-11-12
Fetched 2017-11-13 to 2017-12-13
Fetched 2017-12-14 to 2018-01-13
Fetched 2018-01-14 to 2018-02-13
Fetched 2018-02-14 to 2018-03-16
Fetched 2018-03-17 to 2018-04-16
Fetched 2018-04-17 to 2018-05-17
Fetched 2018-05-18 to 2018-06-17
Fetched

  return pd.concat(all_data) if all_data else pd.DataFrame()


Saved 162 records to ../data/processed/weather/France_hourly_weather.csv
Columns saved: ['solar_radiation_min', 'country']

Fetching HOURLY data for Italy...
Fetched 2016-01-01 to 2016-01-31
Fetched 2016-02-01 to 2016-03-02
Fetched 2016-03-03 to 2016-04-02
Fetched 2016-04-03 to 2016-05-03
Fetched 2016-05-04 to 2016-06-03
Fetched 2016-06-04 to 2016-07-04
Fetched 2016-07-05 to 2016-08-04
Fetched 2016-08-05 to 2016-09-04
Fetched 2016-09-05 to 2016-10-05
Fetched 2016-10-06 to 2016-11-05
Fetched 2016-11-06 to 2016-12-06
Fetched 2016-12-07 to 2017-01-06
Fetched 2017-01-07 to 2017-02-06
Fetched 2017-02-07 to 2017-03-09
Fetched 2017-03-10 to 2017-04-09
Fetched 2017-04-10 to 2017-05-10
Fetched 2017-05-11 to 2017-06-10
Fetched 2017-06-11 to 2017-07-11
Fetched 2017-07-12 to 2017-08-11
Fetched 2017-08-12 to 2017-09-11
Fetched 2017-09-12 to 2017-10-12
Fetched 2017-10-13 to 2017-11-12
Fetched 2017-11-13 to 2017-12-13
Fetched 2017-12-14 to 2018-01-13
Fetched 2018-01-14 to 2018-02-13
Fetched 2018-02-1

  return pd.concat(all_data) if all_data else pd.DataFrame()


Saved 162 records to ../data/processed/weather/Italy_hourly_weather.csv
Columns saved: ['solar_radiation_min', 'country']

Fetching HOURLY data for Spain...
Fetched 2016-01-01 to 2016-01-31
Fetched 2016-02-01 to 2016-03-02
Fetched 2016-03-03 to 2016-04-02
Fetched 2016-04-03 to 2016-05-03
Fetched 2016-05-04 to 2016-06-03
Fetched 2016-06-04 to 2016-07-04
Fetched 2016-07-05 to 2016-08-04
Fetched 2016-08-05 to 2016-09-04
Fetched 2016-09-05 to 2016-10-05
Fetched 2016-10-06 to 2016-11-05
Fetched 2016-11-06 to 2016-12-06
Fetched 2016-12-07 to 2017-01-06
Fetched 2017-01-07 to 2017-02-06
Fetched 2017-02-07 to 2017-03-09
Fetched 2017-03-10 to 2017-04-09
Fetched 2017-04-10 to 2017-05-10
Fetched 2017-05-11 to 2017-06-10
Fetched 2017-06-11 to 2017-07-11
Fetched 2017-07-12 to 2017-08-11
Fetched 2017-08-12 to 2017-09-11
Fetched 2017-09-12 to 2017-10-12
Fetched 2017-10-13 to 2017-11-12
Fetched 2017-11-13 to 2017-12-13
Fetched 2017-12-14 to 2018-01-13
Fetched 2018-01-14 to 2018-02-13
Fetched 2018-02-14

  return pd.concat(all_data) if all_data else pd.DataFrame()
