In [None]:
import os

import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

BASE_DIR = "2023-12-03"
WITH_SCHEDULER_DIR = os.path.join(BASE_DIR, "with_scheduler")
WITHOUT_SCHEDULER_DIR = os.path.join(BASE_DIR, "without_scheduler")

SEASONS = ["spring", "summer", "fall", "winter"]
EMISSIONS_KEY = "Carbon Intensity gCO₂eq/kWh (LCA) mean"
ARRIVAL_TIMES = [(0, 0), (4, 0), (8, 0), (12, 0), (16, 0), (20, 0)]
# ARRIVAL_TIMES = [(4, 0)]

sns.set_theme('notebook')
sns.set(font="Verdana", font_scale=1.5)

matplotlib.rcParams['figure.figsize'] = (16, 9)
matplotlib.rcParams['figure.dpi'] = 200
matplotlib.rcParams['lines.linewidth'] = 2.5

np.random.seed(62)

In [None]:
fall_emissions_df = pd.read_csv("2021_fall_emissions.csv", index_col=["hour", "minute"])
spring_emissions_df = pd.read_csv("2021_spring_emissions.csv", index_col=["hour", "minute"])
summer_emissions_df = pd.read_csv("2021_summer_emissions.csv", index_col=["hour", "minute"])
winter_emissions_df = pd.read_csv("2021_winter_emissions.csv", index_col=["hour", "minute"])

fall_lowest_emissions = fall_emissions_df[EMISSIONS_KEY].min()
spring_lowest_emissions = spring_emissions_df[EMISSIONS_KEY].min()
summer_lowest_emissions = summer_emissions_df[EMISSIONS_KEY].min()
winter_lowest_emissions = winter_emissions_df[EMISSIONS_KEY].min()

In [None]:
# TODO, currently using the same run, just setting times differently.
without_scheduler = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "data.csv"), parse_dates=["start_time", "end_time"], index_col=["source_file_id"])
without_scheduler_kwh = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "kwh.csv"), parse_dates=["read_time"])

with_scheduler = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "data.csv"), parse_dates=["start_time", "end_time"], index_col=["source_file_id"])
with_scheduler_kwh = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "kwh.csv"), parse_dates=["read_time"])

In [None]:
# Verify that for every row in without_scheduler, the total_kwh_used is the sum of the values in the kwh.csv file
without_scheduler["summed_kwh"] = without_scheduler_kwh.groupby("metric")["kwh"].sum()
without_scheduler["kwh_difference"] = np.abs(without_scheduler["total_kwh_used"] - without_scheduler["summed_kwh"])

with_scheduler["summed_kwh"] = with_scheduler_kwh.groupby("metric")["kwh"].sum()
with_scheduler["kwh_difference"] = np.abs(with_scheduler["total_kwh_used"] - with_scheduler["summed_kwh"])
for row in without_scheduler.iterrows():
    if row[1]["kwh_difference"] > 1e-6:
        raise ValueError(f"kwh_difference is not 0, it is {row[1]['kwh_difference']} for row {row[0]}")
    
for row in with_scheduler.iterrows():
    if row[1]["kwh_difference"] > 1e-6:
        raise ValueError(f"kwh_difference is not 0, it is {row[1]['kwh_difference']} for row {row[0]}")

In [None]:
for df in [without_scheduler, with_scheduler]:
    df.sort_values("start_time", inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.index.names = ["Test number"]
    df["expected_duration_at_schedule_time"] = pd.to_timedelta(df["expected_duration_at_schedule_time"])
    df["duration"] = pd.to_timedelta(df["duration"])
    df["Test duration (s)"] = df["duration"].apply(lambda x: x.total_seconds()).astype(float)
    df["difference_with_deadline"] = pd.to_timedelta(df["difference_with_deadline"])
    df["File name"] = df["file_path"].apply(lambda x: os.path.basename(x))
    df["Synthetic test date"] = df["File name"].apply(lambda x: pd.to_datetime(x.split("_")[0].split(".")[0]))
    df["Total kWh consumed (cumulative)"] = df["total_kwh_used"].cumsum()

In [None]:
without_scheduler_per_arrival_time_df = pd.DataFrame()
for season in SEASONS:
    for arrival_time in ARRIVAL_TIMES:
        arrival_times = list(arrival_time for _ in range(len(without_scheduler)))
        
        for using_scheduler in ["Without scheduler", "With scheduler"]:
            new_df = without_scheduler.copy()
            new_df["Season"] = season.capitalize()
            new_df["Arrival time"] = arrival_times
            new_df["Arrival time (HH:mm)"] = pd.to_datetime(
                new_df["Arrival time"].apply(lambda x: f"{x[0]:02d}:{x[1]:02d}"),
                format="%H:%M"
            ).dt.strftime("%H:%M")
            new_df["Using scheduler"] = using_scheduler
            
            if using_scheduler == "Without scheduler":
                new_df["Emissions (gCO₂eq)"] = new_df["Arrival time"].apply(lambda x: eval(f"{season}_emissions_df.loc[x, EMISSIONS_KEY]")) * new_df["total_kwh_used"]
            if using_scheduler == "With scheduler":
                new_df["Emissions (gCO₂eq)"] = new_df["total_kwh_used"] * eval(f"{season}_lowest_emissions")
            new_df["Cumulative emissions (gCO₂eq)"] = new_df["Emissions (gCO₂eq)"].cumsum()
            
            # 04:00 & Spring & 442.76 & 452.38 & +9.62  & +2.17 \\
            # 04:00 & Summer & 435.06 & 456.61 & +21.55 & +4.95 \\
            # 04:00 & Fall   & 464.43 & 461.52 & -2.91  & -0.63 \\
            # 04:00 & Winter & 443.25 & 475.19 & +31.95 & +7.21 \\
            
            without_scheduler_per_arrival_time_df = pd.concat([without_scheduler_per_arrival_time_df, new_df])
            
            # without_scheduler["Without scheduler arrival time"] = arrival_times
            # without_scheduler["Without scheduler arrival time HH:mm"] = pd.to_datetime(
            #     without_scheduler["Without scheduler arrival time"].apply(lambda x: f"{x[0]:02d}:{x[1]:02d}"),
            #     format="%H:%M"
            # )
            # 
            # without_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"] = without_scheduler["Without scheduler arrival time"].apply(lambda x: eval(f"{season}_emissions_df.loc[x, EMISSIONS_KEY]")) * without_scheduler["total_kwh_used"]
            # without_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"] = without_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"].cumsum()
            # 
            # with_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"] = with_scheduler["total_kwh_used"] * eval(f"{season}_lowest_emissions")
            # with_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"] = with_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"].cumsum()

In [None]:
for arrival_time in ARRIVAL_TIMES:
    for season in SEASONS:
        without_scheduler_emissions = without_scheduler_per_arrival_time_df[
            (without_scheduler_per_arrival_time_df["Arrival time"] == arrival_time) &
            (without_scheduler_per_arrival_time_df["Season"] == season.capitalize()) &
            (without_scheduler_per_arrival_time_df["Using scheduler"] == "Without scheduler")
        ]["Cumulative emissions (gCO₂eq)"].iloc[-1]
        with_scheduler_emissions = without_scheduler_per_arrival_time_df[
            (without_scheduler_per_arrival_time_df["Arrival time"] == arrival_time) &
            (without_scheduler_per_arrival_time_df["Season"] == season.capitalize()) &
            (without_scheduler_per_arrival_time_df["Using scheduler"] == "With scheduler")
        ]["Cumulative emissions (gCO₂eq)"].iloc[-1]
        difference = with_scheduler_emissions - without_scheduler_emissions
        percentage_difference = (difference / without_scheduler_emissions) * 100
        arrival_time_hh_mm = f"{arrival_time[0]:02d}:{arrival_time[1]:02d}"
        print(f"{arrival_time_hh_mm} & "
              f"{season.capitalize().ljust(6)} & "
              f"{without_scheduler_emissions:>5.2f} & "
              f"{with_scheduler_emissions:>5.2f} & "
              f"{difference:>7.2f} & "
              f"{percentage_difference:>6.2f} \\\\")

In [None]:
g = sns.FacetGrid(
    data=without_scheduler_per_arrival_time_df,
    col="Arrival time (HH:mm)",
    col_wrap=2,
    sharey=True,
    sharex=True,
    height=5,
    aspect=1,
    # legend_out=True,
)

g.map_dataframe(
    sns.lineplot,
    x="Synthetic test date",
    y="Cumulative emissions (gCO₂eq)",
    style="Using scheduler",
    hue="Season",
    units=None,
    errorbar=None,
    lw=2.5,
)

# Set xticks
xticks = pd.date_range(start="2018-03-01", end="2018-06-25", periods=10)

for ax in g.axes:
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticks.strftime("%m-%d"), rotation=45, horizontalalignment='right')

g.fig.tight_layout()
g.add_legend()

In [None]:
fig, axs = plt.subplots(ncols=2, sharey=True, figsize=(15, 5))
for season in SEASONS:
    without_scheduler_total_emissions = without_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"].iloc[-1]
    with_scheduler_total_emissions = with_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"].iloc[-1]
    print(f"{season.capitalize()} total emissions without scheduler:", without_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions with scheduler:", with_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions saved:", without_scheduler_total_emissions - with_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions saved (%)", ((without_scheduler_total_emissions - with_scheduler_total_emissions) / without_scheduler_total_emissions) * 100)
    print()
    
    sns.lineplot(
        data=without_scheduler,
        x="Synthetic test date",
        y=f"{season.capitalize()} Cumulative Emissions (gCO₂eq)",
        ax=axs[0],
        label=season.capitalize(),
        errorbar=None,
    )
    sns.lineplot(
        data=with_scheduler,
        x="Synthetic test date",
        y=f"{season.capitalize()} Cumulative Emissions (gCO₂eq)",
        ax=axs[1],
        label=season.capitalize(),
        errorbar=None,
    )
    
xticks = pd.date_range(start="2018-03-01", end="2018-06-25", periods=10)

for ax in axs:
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticks.strftime("%m-%d"), rotation=45, horizontalalignment='right')
    ax.set_ylabel("Cumulative emissions (gCO₂eq)")
    
axs[0].set_title("Without scheduler")
axs[1].set_title("With scheduler")
