In [None]:
import os

import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

BASE_DIR = "2023-12-03"
WITH_SCHEDULER_DIR = os.path.join(BASE_DIR, "with_scheduler")
WITHOUT_SCHEDULER_DIR = os.path.join(BASE_DIR, "without_scheduler")

SEASON_FOR_BIG_PLOTS = "summer"
SEASONS = ["spring", "summer", "fall", "winter"]
EMISSIONS_KEY = "Carbon Intensity gCO₂eq/kWh (LCA) mean"

sns.set_theme('notebook')
sns.set(font="Verdana", font_scale=1.5)

matplotlib.rcParams['figure.figsize'] = (16, 9)
matplotlib.rcParams['figure.dpi'] = 200
matplotlib.rcParams['lines.linewidth'] = 2.5

np.random.seed(62)

In [None]:
fall_emissions_df = pd.read_csv("2021_fall_emissions.csv", index_col=["hour", "minute"])
spring_emissions_df = pd.read_csv("2021_spring_emissions.csv", index_col=["hour", "minute"])
summer_emissions_df = pd.read_csv("2021_summer_emissions.csv", index_col=["hour", "minute"])
winter_emissions_df = pd.read_csv("2021_winter_emissions.csv", index_col=["hour", "minute"])

fall_lowest_emissions = fall_emissions_df[EMISSIONS_KEY].min()
spring_lowest_emissions = spring_emissions_df[EMISSIONS_KEY].min()
summer_lowest_emissions = summer_emissions_df[EMISSIONS_KEY].min()
winter_lowest_emissions = winter_emissions_df[EMISSIONS_KEY].min()

In [None]:
# TODO, currently using the same run, just setting times differently.
without_scheduler = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "data.csv"), parse_dates=["start_time", "end_time"], index_col=["source_file_id"])
without_scheduler_kwh = pd.read_csv(os.path.join(WITHOUT_SCHEDULER_DIR, "kwh.csv"), parse_dates=["read_time"])

with_scheduler = pd.read_csv(os.path.join(WITH_SCHEDULER_DIR, "data.csv"), parse_dates=["start_time", "end_time"], index_col=["source_file_id"])
with_scheduler_kwh = pd.read_csv(os.path.join(WITH_SCHEDULER_DIR, "kwh.csv"), parse_dates=["read_time"])

In [None]:
# Verify that for every row in without_scheduler, the total_kwh_used is the sum of the values in the kwh.csv file
without_scheduler["summed_kwh"] = without_scheduler_kwh.groupby("metric")["kwh"].sum()
without_scheduler["kwh_difference"] = np.abs(without_scheduler["total_kwh_used"] - without_scheduler["summed_kwh"])

with_scheduler["summed_kwh"] = with_scheduler_kwh.groupby("metric")["kwh"].sum()
with_scheduler["kwh_difference"] = np.abs(with_scheduler["total_kwh_used"] - with_scheduler["summed_kwh"])
for row in without_scheduler.iterrows():
    if row[1]["kwh_difference"] > 1e-6:
        raise ValueError(f"kwh_difference is not 0, it is {row[1]['kwh_difference']} for row {row[0]}")
    
for row in with_scheduler.iterrows():
    if row[1]["kwh_difference"] > 1e-6:
        raise ValueError(f"kwh_difference is not 0, it is {row[1]['kwh_difference']} for row {row[0]}")

In [None]:
for df in [without_scheduler, with_scheduler]:
    df.sort_values("start_time", inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.index.names = ["Test number"]
    df["expected_duration_at_schedule_time"] = pd.to_timedelta(df["expected_duration_at_schedule_time"])
    df["duration"] = pd.to_timedelta(df["duration"])
    df["Test duration (s)"] = df["duration"].apply(lambda x: x.total_seconds()).astype(float)
    df["difference_with_deadline"] = pd.to_timedelta(df["difference_with_deadline"])
    df["File name"] = df["file_path"].apply(lambda x: os.path.basename(x))
    df["Synthetic test date"] = df["File name"].apply(lambda x: pd.to_datetime(x.split("_")[0].split(".")[0]))
    df["Total kWh consumed (cumulative)"] = df["total_kwh_used"].cumsum()

In [None]:
emissions_index_options = summer_emissions_df.index
# Pick a random emissions index for each row
emissions_index = np.random.choice(emissions_index_options, size=len(without_scheduler))
# emissions_index = [(8,0) for _ in range(len(without_scheduler))]
without_scheduler["Without scheduler arrival time"] = emissions_index
without_scheduler["Without scheduler arrival time HH:mm"] = pd.to_datetime(
    without_scheduler["Without scheduler arrival time"].apply(lambda x: f"{x[0]:02d}:{x[1]:02d}"),
    format="%H:%M"
)

for season in SEASONS:
    without_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"] = without_scheduler["Without scheduler arrival time"].apply(lambda x: eval(f"{season}_emissions_df.loc[x, EMISSIONS_KEY]")) * without_scheduler["total_kwh_used"]
    without_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"] = without_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"].cumsum()
    
    with_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"] = with_scheduler["total_kwh_used"] * eval(f"{season}_lowest_emissions")
    with_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"] = with_scheduler[f"{season.capitalize()} Emissions (gCO₂eq)"].cumsum()

In [None]:
fig, ax = plt.subplots()
sns.histplot(
    data=with_scheduler,
    x="Test duration (s)",
    kde=True,
    ax=ax,
)

In [None]:
fig, axs = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(15, 5))
sns.histplot(
    data=without_scheduler,
    x="Test duration (s)",
    ax=axs[0],
)
sns.histplot(
    data=with_scheduler,
    x="Test duration (s)",
    ax=axs[1],
)

axs[0].set_title("Without scheduler")
axs[1].set_title("With scheduler")

In [None]:
fig, ax = plt.subplots()
sns.lineplot(
    data=without_scheduler,
    x="Synthetic test date",
    y="Test duration (s)",
    label="Without scheduler",
    ax=ax,
)
sns.lineplot(
    data=with_scheduler,
    x="Synthetic test date",
    y="Test duration (s)",
    label="With scheduler",
    ax=ax,
)

xticks = pd.date_range(start="2018-03-01", end="2018-06-25", periods=10)
ax.set_xticks(xticks)
ax.set_xticklabels(xticks.strftime("%m-%d"), rotation=45, horizontalalignment='right')

In [None]:
fig, ax = plt.subplots(figsize=(16, 5))
sns.lineplot(
    data=without_scheduler,
    x="Synthetic test date",
    y="Total kWh consumed (cumulative)",
    label="Without concurrency",
)
sns.lineplot(
    data=with_scheduler,
    x="Synthetic test date",
    y="Total kWh consumed (cumulative)",
    label="With concurrency",
)
xticks = pd.date_range(start="2018-03-01", end="2018-06-25", periods=10)
ax.set_xticks(xticks)
ax.set_xticklabels(xticks.strftime("%m-%d"), rotation=45, horizontalalignment='right')
ax.set_ylabel("kWh consumed (cumulative)")

In [None]:
fig, axs = plt.subplots(ncols=2, sharey=True, figsize=(15, 5))
for season in SEASONS:
    without_scheduler_total_emissions = without_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"].iloc[-1]
    with_scheduler_total_emissions = with_scheduler[f"{season.capitalize()} Cumulative Emissions (gCO₂eq)"].iloc[-1]
    print(f"{season.capitalize()} total emissions without scheduler:", without_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions with scheduler:", with_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions saved:", without_scheduler_total_emissions - with_scheduler_total_emissions)
    print(f"{season.capitalize()} total emissions saved (%)", ((without_scheduler_total_emissions - with_scheduler_total_emissions) / without_scheduler_total_emissions) * 100)
    print()
    
    sns.lineplot(
        data=without_scheduler,
        x="Synthetic test date",
        y=f"{season.capitalize()} Cumulative Emissions (gCO₂eq)",
        ax=axs[0],
        label=season.capitalize(),
        errorbar=None,
    )
    sns.lineplot(
        data=with_scheduler,
        x="Synthetic test date",
        y=f"{season.capitalize()} Cumulative Emissions (gCO₂eq)",
        ax=axs[1],
        label=season.capitalize(),
        errorbar=None,
    )
    
xticks = pd.date_range(start="2018-03-01", end="2018-06-25", periods=10)

for ax in axs:
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticks.strftime("%m-%d"), rotation=45, horizontalalignment='right')
    ax.set_ylabel("Cumulative emissions (gCO₂eq)")
    
axs[0].set_title("Without scheduler and no concurrency")
axs[1].set_title("With scheduler and concurrency")
