In [2]:
import pandas as pd
import requests
from datetime import datetime, timedelta

# Load the energy consumption data
data_path = 'Pylaia/all_data.csv'
data = pd.read_csv(data_path)

# Define the function to fetch weather data from Open Meteo API
def get_historical_weather_open_meteo(date, latitude=40.6401, longitude=22.9444):
    """
    Get weather data for a specific date using Open Meteo API.

    Args:
        date (datetime.date): The date for which weather data is required.
        latitude (float): Latitude for the location.
        longitude (float): Longitude for the location.

    Returns:
        float: The mean temperature for the day.
        float: Total precipitation for the day (mm).
        bool: Whether the day was sunny or not (cloud cover < 50%).
    """
    url = "https://archive-api.open-meteo.com/v1/era5"
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'start_date': date.strftime('%Y-%m-%d'),
        'end_date': date.strftime('%Y-%m-%d'),
        'daily': 'temperature_2m_mean,precipitation_sum,cloudcover_mean'
    }
    response = requests.get(url, params=params)
    data = response.json()

    if 'daily' in data:
        daily_data = data['daily']
        avg_temp = daily_data.get('temperature_2m_mean', [None])[0]
        total_precipitation = daily_data.get('precipitation_sum', [0])[0]
        avg_cloud_cover = daily_data.get('cloudcover_mean', [100])[0]
        sunny = avg_cloud_cover < 50  # Relaxed criteria for sunny days

        return avg_temp, total_precipitation, sunny
    else:
        return None, None, None

# Add new columns to the dataset
data["Total Rainfall (mm)"] = 0
data["Mean Temperature (°C)"] = 0
data["Total Sunny Days"] = 0

# Fetch weather data for each row in the dataset
for index, row in data.iterrows():
    year = int(row["Year"])
    month = int(row["Month"])
    days_in_month = 28 if month == 2 else 30
    total_temp = 0
    total_rainfall = 0
    total_sunny_days = 0
    valid_days = 0

    print(f"Processing year: {year}, month: {month}")

    for day in range(1, days_in_month + 1):
        date = datetime(year, month, day).date()
        try:
            mean_temp, daily_rainfall, sunny = get_historical_weather_open_meteo(date)
            if mean_temp is not None:
                total_temp += mean_temp
                total_rainfall += daily_rainfall
                total_sunny_days += 1 if sunny else 0
                valid_days += 1
        except Exception as e:
            print(f"Error fetching data for {year}-{month}-{day}: {e}")

    if valid_days > 0:
        data.at[index, "Mean Temperature (°C)"] = total_temp / valid_days
        data.at[index, "Total Rainfall (mm)"] = total_rainfall
        data.at[index, "Total Sunny Days"] = total_sunny_days

# Save the enriched dataset
enriched_data_path = 'all_data_with_weather.csv'
data.to_csv(enriched_data_path, index=False)

print(f"Enriched data saved to {enriched_data_path}")

Processing year: 2014, month: 4
Error fetching data for 2014-4-8: Expecting value: line 1 column 1 (char 0)


  data.at[index, "Mean Temperature (°C)"] = total_temp / valid_days
  data.at[index, "Total Rainfall (mm)"] = total_rainfall


Processing year: 2014, month: 4
Processing year: 2014, month: 4


KeyboardInterrupt: 

In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta

# Load the energy consumption data
data_path = 'Pylaia/all_data.csv'
data = pd.read_csv(data_path)

# Define the function to fetch weather data from Open Meteo API
def get_historical_weather_open_meteo(date, latitude=40.6401, longitude=22.9444):
    """
    Get weather data for a specific date using Open Meteo API.

    Args:
        date (datetime.date): The date for which weather data is required.
        latitude (float): Latitude for the location.
        longitude (float): Longitude for the location.

    Returns:
        float: The mean temperature for the day.
        float: Total precipitation for the day (mm).
        bool: Whether the day was sunny or not (cloud cover < 50%).
    """
    url = "https://archive-api.open-meteo.com/v1/era5"
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'start_date': date.strftime('%Y-%m-%d'),
        'end_date': date.strftime('%Y-%m-%d'),
        'daily': 'temperature_2m_mean,precipitation_sum,cloudcover_mean'
    }
    response = requests.get(url, params=params)
    data = response.json()

    if 'daily' in data:
        daily_data = data['daily']
        avg_temp = daily_data.get('temperature_2m_mean', [None])[0]
        total_precipitation = daily_data.get('precipitation_sum', [0])[0]
        avg_cloud_cover = daily_data.get('cloudcover_mean', [100])[0]
        sunny = avg_cloud_cover < 50  # Relaxed criteria for sunny days

        return avg_temp, total_precipitation, sunny
    else:
        return None, None, None

# Add new columns to the dataset if they are not already present
if "Total Rainfall (mm)" not in data.columns:
    data["Total Rainfall (mm)"] = 0
if "Mean Temperature (°C)" not in data.columns:
    data["Mean Temperature (°C)"] = 0
if "Total Sunny Days" not in data.columns:
    data["Total Sunny Days"] = 0

# Dictionary to store computed weather data for month-year pairs
weather_cache = {}

# Fetch weather data for each row in the dataset
for index, row in data.iterrows():
    year = int(row["Year"])
    month = int(row["Month"])
    month_year_key = (year, month)

    # Check if the data for this month and year has already been computed
    if month_year_key in weather_cache:
        mean_temp, total_rainfall, total_sunny_days = weather_cache[month_year_key]
    else:
        days_in_month = 28 if month == 2 else 30
        total_temp = 0
        total_rainfall = 0
        total_sunny_days = 0
        valid_days = 0

        print(f"Processing year: {year}, month: {month}")

        for day in range(1, days_in_month + 1):
            date = datetime(year, month, day).date()
            try:
                mean_temp, daily_rainfall, sunny = get_historical_weather_open_meteo(date)
                if mean_temp is not None:
                    total_temp += mean_temp
                    total_rainfall += daily_rainfall
                    total_sunny_days += 1 if sunny else 0
                    valid_days += 1
            except Exception as e:
                print(f"Error fetching data for {year}-{month}-{day}: {e}")

        if valid_days > 0:
            mean_temp = total_temp / valid_days
        else:
            mean_temp = 0

        # Store calculated values in the cache
        weather_cache[month_year_key] = (mean_temp, total_rainfall, total_sunny_days)

    # Assign cached or newly computed values to the dataset
    data.at[index, "Mean Temperature (°C)"] = mean_temp
    data.at[index, "Total Rainfall (mm)"] = total_rainfall
    data.at[index, "Total Sunny Days"] = total_sunny_days

# Save the enriched dataset
enriched_data_path = 'all_data_with_weather.csv'
data.to_csv(enriched_data_path, index=False)

print(f"Enriched data saved to {enriched_data_path}")


Processing year: 2014, month: 4


  data.at[index, "Mean Temperature (°C)"] = mean_temp
  data.at[index, "Total Rainfall (mm)"] = total_rainfall


Processing year: 2014, month: 8
Processing year: 2014, month: 12
Processing year: 2014, month: 2
Processing year: 2014, month: 1
Processing year: 2014, month: 7
Processing year: 2014, month: 6
Processing year: 2014, month: 3
Processing year: 2014, month: 5
Processing year: 2014, month: 11
Processing year: 2014, month: 10
Processing year: 2014, month: 9
Processing year: 2015, month: 4
Processing year: 2015, month: 8
Processing year: 2015, month: 12
Processing year: 2015, month: 2
Processing year: 2015, month: 1
Processing year: 2015, month: 7
Processing year: 2015, month: 6
Processing year: 2015, month: 3
Processing year: 2015, month: 5
Processing year: 2015, month: 11
Processing year: 2015, month: 10
Processing year: 2015, month: 9
Processing year: 2016, month: 4
Processing year: 2016, month: 8
Processing year: 2016, month: 12
Processing year: 2016, month: 2
Processing year: 2016, month: 1
Processing year: 2016, month: 7
Processing year: 2016, month: 6
Processing year: 2016, month: 3
P