In [2]:
# Import necessary packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
plt.rcParams["figure.figsize"] = (16,5)
import shap
shap.initjs()
from IPython.display import Latex
from IPython.display import Image
from helper_functions import Helper
from agents import GreenEnergy_Recommendation_Agent
from copy import deepcopy
import interpret
from interpret.glassbox import ExplainableBoostingClassifier 
from interpret import show
import warnings
warnings.filterwarnings("ignore")
import os
from datetime import datetime
helper = Helper()

In [None]:
shiftable_devices = ['predicted_consumption_washing_machine', 'predicted_consumption_dish_washer'] 

In [5]:
# Load pickle data
activity_df = pd.read_pickle('activity_df.pkl')
load_df = pd.read_pickle('load_df.pkl')
usage_df = pd.read_pickle('usage_df.pkl')
price_df = pd.read_pickle('price_df.pkl')

In [6]:
# the generation mix dataset
generation_mix_file = pd.read_csv("df_fuel_ckan.csv")

In [7]:
generation_mix_file['DATETIME'] = pd.to_datetime(generation_mix_file['DATETIME'],  errors='coerce')

In [9]:
# Instantiate the new recommendation agent
green_energy_agent = GreenEnergy_Recommendation_Agent(
    activity_input=activity_df, 
    usage_input=usage_df, 
    load_input=load_df, 
    energy_data=generation_mix_file,  # This is your green energy dataset
    shiftable_devices=shiftable_devices
)




In [181]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_date_range(start_date_str, end_date_str):
    """Generates a list of dates between start and end (inclusive)."""
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date()
    date_list = []
    current_date = start_date
    while current_date <= end_date:
        date_list.append(current_date.strftime('%Y-%m-%d'))
        current_date += timedelta(days=1)
    return date_list



def generate_recommendations(
    agent,
    start_date: str,
    end_date: str,
    activity_prob_threshold: float = 0.5,
    usage_prob_threshold: float = 0.5,
    output_file: str = "recommendations.csv"
):
    """
    Generate green energy recommendations over a date range and save to a CSV file.

    Parameters:
        agent: The green energy agent instance with a `pipeline` method.
        start_date (str): The start date in 'YYYY-MM-DD' format.
        end_date (str): The end date in 'YYYY-MM-DD' format.
        activity_prob_threshold (float): Threshold for activity probability.
        usage_prob_threshold (float): Threshold for usage probability.
        output_file (str): The name of the output CSV file.

    Returns:
        pd.DataFrame: A DataFrame containing all recommendations.
    """
    # Create a date range
    date_range = pd.date_range(start=start_date, end=end_date)

    # Placeholder for recommendations
    all_recommendations = []

    # Iterate through the date range and generate recommendations
    for date in date_range:
       
        try:
            recommendations = agent.pipeline(
                date=date.strftime('%Y-%m-%d'),  # Format date as a string
                activity_prob_threshold=activity_prob_threshold,
                usage_prob_threshold=usage_prob_threshold
            )
            if recommendations is not None:
                all_recommendations.append(recommendations)
        except Exception as e:
            print(f"Error generating recommendations for {date.strftime('%Y-%m-%d')}: {e}")

    # Combine all recommendations into a single DataFrame
    if all_recommendations:
        final_recommendations = pd.concat(all_recommendations, ignore_index=True)
       

        # Save to a CSV file
        final_recommendations.to_csv(output_file, index=False)
       
        return final_recommendations
    else:
        print("No recommendations generated.")
        return pd.DataFrame()  # Return an empty DataFrame if no recommendations


def calculate_hourly_co2_emissions(green_energy_agent, date, device, load_df, shiftable_devices, energy_data, evaluation=False):
    """
    Calculate CO2 emissions for a given date and device, aggregating energy data hourly.

    Parameters:
        green_energy_agent: Instance of GreenEnergy_Recommendation_Agent
        date: str, date in the format 'YYYY-MM-DD'
        device: str, name of the device
        load_df: DataFrame, load input data for the agent
        shiftable_devices: list, list of devices for which to calculate emissions
        energy_data: DataFrame, dataset containing energy source contributions and carbon intensity
        evaluation: bool, whether to use evaluation data

    Returns:
        hourly_emissions: DataFrame, CO2 emissions aggregated hourly
        total_co2_emissions: float, total CO2 emissions for the device on the given date in kg
    """
    import numpy as np
    import pandas as pd

    # Step 1: Filter the energy dataset for the specific date
    energy_data['DATETIME'] = pd.to_datetime(energy_data['DATETIME'])
    date = pd.to_datetime(date)
    daily_energy_data = energy_data[energy_data['DATETIME'].dt.date == date.date()]

    if daily_energy_data.empty:
        print(f"No energy data available for {date}. Cannot calculate CO2 emissions.")
        return None, None

    # Step 2: Aggregate energy data hourly
    hourly_data = (
        daily_energy_data
        .set_index('DATETIME')
        .resample('H')
        .mean()  # Aggregate using mean for carbon intensity
        .reset_index()
    )

    # Step 3: Extract hourly carbon intensity
    carbon_intensity = hourly_data['CARBON_INTENSITY'].values.reshape(-1, 1)  # Reshape to match load
    carbon_intensity_kwh = carbon_intensity / 1000  # Divide by 1000
    # Step 4: Get device load for the day
    if not evaluation:
        device_load = green_energy_agent.Load_Agent.pipeline(load_df, date, shiftable_devices).loc[device]
    else:
        device_load = evaluation["load"][date].loc[device]

    # Resample device load to hourly if needed
    device_load = device_load.values.reshape(-1, 1)  # Ensure correct shape

    # Step 5: Calculate hourly CO2 emissions
    co2_emissions = device_load * carbon_intensity_kwh

    # Aggregate emissions for the day
    hourly_emissions = pd.DataFrame({
        'Hour': hourly_data['DATETIME'].dt.hour,
        'CO2_Emissions_kg': co2_emissions.flatten()  # Flatten for easier visualization
    })

    total_co2_emissions = np.sum(co2_emissions)

    # Debugging: Output intermediate results
    print(f"Hourly Carbon Intensity (kg CO₂/kWh):\n{carbon_intensity_kwh}")
    print(f"Hourly Device Load (kWh):\n{device_load}")
    print(f"Hourly CO2 Emissions (kg):\n{hourly_emissions}")
    print(f"Total CO2 Emissions for {device} on {date}: {total_co2_emissions} kg")

    return hourly_emissions, total_co2_emissions

def process_emissions_for_date_range(start_date, end_date, device, green_energy_agent, load_df, shiftable_devices, energy_data):
    """Calculates and stores emissions for a range of dates."""
    date_range = generate_date_range(start_date, end_date)
    all_emissions_data = []

    for date in date_range:
        hourly_emissions, total_emissions = calculate_hourly_co2_emissions(
            green_energy_agent=green_energy_agent,
            date=date,
            device=device,
            load_df=load_df,
            shiftable_devices=shiftable_devices,
            energy_data=energy_data,
            evaluation=False
        )

        if hourly_emissions is not None:
            for index, row in hourly_emissions.iterrows():
                all_emissions_data.append({
                    'Date': date,
                    'Hour': int(row['Hour']),  # Ensure Hour is an integer
                    'Hourly CO2 Emissions (kg)': row['CO2_Emissions_kg']
                })
        else:
            print(f"No emissions data found for {date}.")

    return pd.DataFrame(all_emissions_data)


start_date = '2013-06-05'
end_date = '2014-12-05' 
device = "predicted_consumption_washing_machine"
activity_threshold = 0.5
usage_threshold = 0.5
output_file = "recommendations_2013-06-01_to_2014-12-05.csv"


final_recommendations = generate_recommendations(
    agent=green_energy_agent,
    start_date=start_date,
    end_date=end_date,
    activity_prob_threshold=activity_threshold,
    usage_prob_threshold=usage_threshold,
    output_file=output_file
)
emissions_df = process_emissions_for_date_range(start_date, end_date, device, green_energy_agent, load_df, shiftable_devices, generation_mix_file)
emissions_df.to_csv("emissions_washine_maching_corrected")


Hourly Carbon Intensity (kg CO₂/kWh):
[[0.5335]
 [0.5305]
 [0.536 ]
 [0.533 ]
 [0.5425]
 [0.529 ]
 [0.508 ]
 [0.491 ]
 [0.487 ]
 [0.482 ]
 [0.4815]
 [0.4825]
 [0.4885]
 [0.489 ]
 [0.4915]
 [0.49  ]
 [0.492 ]
 [0.496 ]
 [0.5025]
 [0.506 ]
 [0.5095]
 [0.5195]
 [0.531 ]
 [0.527 ]]
Hourly Device Load (kWh):
[[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]
Hourly CO2 Emissions (kg):
    Hour  CO2_Emissions_kg
0      0               0.0
1      1               0.0
2      2               0.0
3      3               0.0
4      4               0.0
5      5               0.0
6      6               0.0
7      7               0.0
8      8               0.0
9      9               0.0
10    10               0.0
11    11               0.0
12    12               0.0
13    13               0.0
14    14               0.0
15    15               0.0
16    16               0.0
17    17               0.0
18    18               0.0
19    1

In [274]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta



def compare_recommended_to_average(emissions_df, final_recommendations):
    """Compares CO2 emissions at the recommended hour to the daily average."""

    # Filter final_recommendations for 'predicted_consumption_washing_machine'
    final_recommendations = final_recommendations[final_recommendations['device'] == 'predicted_consumption_washing_machine'].copy()

    # Convert recommendation_date to datetime objects for merging
    final_recommendations['recommendation_date'] = pd.to_datetime(final_recommendations['recommendation_date'])
    emissions_df['Date'] = pd.to_datetime(emissions_df['Date'])
    # Merge the two DataFrames on date and device
    merged_df = pd.merge(emissions_df, final_recommendations, left_on=['Date'], right_on=['recommendation_date'], how='inner')

    # Calculate daily average emissions
    
    merged_df['Date'] = pd.to_datetime(merged_df['Date'])
    merged_df['Daily_Average_CO2'] = merged_df.groupby(['Date'])['Hourly CO2 Emissions (kg)'].transform('mean')

    # Filter for rows with recommendations
    recommended_df = merged_df[merged_df['best_launch_hour'].notna()].copy()

    # Calculate emissions at the recommended hour
    recommended_df['best_launch_hour'] = recommended_df['best_launch_hour'].astype(int)
    recommended_df['Emissions_at_Recommendation'] = recommended_df.apply(lambda row: row['Hourly CO2 Emissions (kg)'] if row['Hour'] == row['best_launch_hour'] else None, axis=1)
    recommended_df = recommended_df.dropna(subset=['Emissions_at_Recommendation'])
    
    # Calculate the difference between recommended emissions and daily average
    recommended_df['Difference_from_Average'] = recommended_df['Emissions_at_Recommendation'] - recommended_df['Daily_Average_CO2']
    recommended_df['Percentage_Difference'] = (recommended_df['Difference_from_Average'] / recommended_df['Daily_Average_CO2']) * 100

    return recommended_df




comparison_df = compare_recommended_to_average(emissions_df, final_recommendations)
   

In [249]:
#washine machine every recommendation
followed_recommendations = comparison_df  

#Check if the dataframe is empty
if not followed_recommendations.empty:
    average_percentage_difference = followed_recommendations['Percentage_Difference'].mean()
    print(f"Average percentage difference (every other): {average_percentage_difference:.2f}%")
else:
    print("No recommendations followed")

Average percentage difference (every other): -28.16%


In [248]:
#washine machine every other recommendation
followed_recommendations = comparison_df.iloc[::2]  # Select every other row

#Check if the dataframe is empty
if not followed_recommendations.empty:
    average_percentage_difference = followed_recommendations['Percentage_Difference'].mean()
    print(f"Average percentage difference (every other): {average_percentage_difference:.2f}%")
else:
    print("No recommendations followed")

Average percentage difference (every other): -14.92%


In [250]:
emissions_df = pd.read_csv("emissions_dishwasher_corrected")

def compare_recommended_to_average(emissions_df, final_recommendations,  device_to_analyze='predicted_consumption_dish_washer'): #added device to analyze
    """Compares CO2 emissions at the recommended hour to the daily average with acceptance rate.

    Args:
        emissions_df: DataFrame with hourly emissions data.
        final_recommendations: DataFrame with recommendations.
        acceptance_rate: Acceptance rate (default 1.0).
        device_to_analyze: the device to analyze

    Returns:
        DataFrame with comparison results.
    """
    # Filter final_recommendations for the specified device
    final_recommendations = final_recommendations[final_recommendations['device'] == device_to_analyze].copy()

    # Convert recommendation_date to datetime objects for merging
    final_recommendations['recommendation_date'] = pd.to_datetime(final_recommendations['recommendation_date'])

    # Merge the two DataFrames on date and device
    merged_df = pd.merge(emissions_df, final_recommendations, left_on=['Date'], right_on=['recommendation_date'], how='inner')
    if merged_df.empty:
        return pd.DataFrame()

    # Calculate daily average emissions
    merged_df['Date'] = pd.to_datetime(merged_df['Date'])
    merged_df['Daily_Average_CO2'] = merged_df.groupby(['Date'])['Hourly CO2 Emissions (kg)'].transform('mean')

    # Filter for rows with recommendations
    recommended_df = merged_df[merged_df['best_launch_hour'].notna()].copy()
    if recommended_df.empty:
        return pd.DataFrame()
    # Apply acceptance rate
  

    # Calculate emissions at the recommended hour
    recommended_df['best_launch_hour'] = recommended_df['best_launch_hour'].astype(int)
    recommended_df['Emissions_at_Recommendation'] = recommended_df.apply(
        lambda row: row['Hourly CO2 Emissions (kg)'] if row['Hour'] == row['best_launch_hour'] else None, axis=1
    )
    recommended_df = recommended_df.dropna(subset=['Emissions_at_Recommendation'])
    if recommended_df.empty:
        return pd.DataFrame()
    recommended_df = recommended_df[recommended_df['Emissions_at_Recommendation'] != 0].copy() #added line
    if recommended_df.empty:
        return pd.DataFrame()
     #Remove zeroes from Emissions_at_Recommendation
   
    # Calculate the difference between recommended emissions and daily average
    recommended_df['Difference_from_Average_kg'] = recommended_df['Emissions_at_Recommendation'] - recommended_df['Daily_Average_CO2']
    recommended_df['Percentage_Difference'] = (recommended_df['Difference_from_Average_kg'] / recommended_df['Daily_Average_CO2']) * 100

    return recommended_df

comparison_df = compare_recommended_to_average(emissions_df, final_recommendations)

In [252]:
#dish washer every recommendation
followed_recommendations = comparison_df  # Select every other row

#Check if the dataframe is empty
if not followed_recommendations.empty:
    average_percentage_difference = followed_recommendations['Percentage_Difference'].mean()
    print(f"Average percentage difference (every other): {average_percentage_difference:.2f}%")
else:
    print("No recommendations followed")

Average percentage difference (every other): -19.67%


In [258]:
#dish washer every other recommendation
followed_recommendations = comparison_df.iloc[::2]  # Select every other row

#Check if the dataframe is empty
if not followed_recommendations.empty:
    average_percentage_difference = followed_recommendations['Percentage_Difference'].mean()
    print(f"Mean Percentage Difference (every other): {average_percentage_difference:.2f}%")
else:
    print("No recommendations followed")


Mean Percentage Difference (Subset): -7.82%
