In [None]:
import os
from datetime import timedelta

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt, dates

BASE_DIR = "2023-11-29_23-04-26"

sns.set_theme('notebook')
sns.set(font="Verdana")

matplotlib.rcParams['figure.figsize'] = (16, 9)

In [None]:
files = pd.read_csv(os.path.join(BASE_DIR, "files.csv"))
files.head()

In [None]:
with_scheduler = pd.read_csv(os.path.join(BASE_DIR, "with_scheduler.csv"), parse_dates=["start_time", "end_time"])
with_scheduler["expected_duration_at_schedule_time"] = pd.to_timedelta(
    with_scheduler["expected_duration_at_schedule_time"])
with_scheduler["duration"] = pd.to_timedelta(with_scheduler["duration"])
with_scheduler["difference_with_deadline"] = pd.to_timedelta(with_scheduler["difference_with_deadline"])
with_scheduler.head()

In [None]:
with_scheduler_carbon_intensity = pd.read_csv(os.path.join(BASE_DIR, "with_scheduler_carbon_intensity.csv"),
                                              parse_dates=["datetime"])
with_scheduler_carbon_intensity.sort_values("datetime", inplace=True)
with_scheduler_carbon_intensity.head()

In [None]:
with_scheduler_kwh = pd.read_csv(os.path.join(BASE_DIR, "with_scheduler_kwh.csv"), parse_dates=["read_time"])
with_scheduler_kwh.sort_values("read_time", inplace=True)
with_scheduler_kwh.head()

In [None]:
without_scheduler = pd.read_csv(os.path.join(BASE_DIR, "without_scheduler.csv"))
without_scheduler["expected_duration_at_schedule_time"] = pd.to_timedelta(
    without_scheduler["expected_duration_at_schedule_time"])
without_scheduler["duration"] = pd.to_timedelta(without_scheduler["duration"])
without_scheduler["difference_with_deadline"] = pd.to_timedelta(without_scheduler["difference_with_deadline"])
without_scheduler_carbon_intensity = pd.read_csv(os.path.join(BASE_DIR, "without_scheduler_carbon_intensity.csv"),
                                                 parse_dates=["datetime"])
without_scheduler_carbon_intensity.sort_values("datetime", inplace=True)
without_scheduler_kwh = pd.read_csv(os.path.join(BASE_DIR, "without_scheduler_kwh.csv"), parse_dates=["read_time"])
without_scheduler_kwh.sort_values("read_time", inplace=True)

In [None]:
print("Unique files:", len(files.file_name.unique()))

In [None]:
def calculate_emissions(kwh_df: pd.DataFrame, carbon_intensity_df: pd.DataFrame) -> pd.DataFrame:
    emissions_df = pd.merge_asof(kwh_df, carbon_intensity_df, left_on="read_time", right_on="datetime",
                                 direction="nearest")
    emissions_df["time_error"] = (emissions_df["read_time"] - emissions_df["datetime"]).abs()
    emissions_df.drop(columns=["datetime"], inplace=True)
    emissions_df["emissions (gCO₂eq)"] = emissions_df["kwh"] * emissions_df["Carbon Intensity gCO₂eq/kWh (LCA) median"]
    emissions_df["emissions (gCO₂eq) cumsum"] = emissions_df["emissions (gCO₂eq)"].cumsum()
    return emissions_df

In [None]:
with_scheduler_emissions = calculate_emissions(with_scheduler_kwh, with_scheduler_carbon_intensity)
with_scheduler_emissions

In [None]:
without_scheduler_emissions = calculate_emissions(without_scheduler_kwh, without_scheduler_carbon_intensity)
without_scheduler_emissions

In [None]:
def resample_to_24h(carbon_intensity_df: pd.DataFrame, df: pd.DataFrame) -> pd.DataFrame:
    timeframe_start = carbon_intensity_df["datetime"].min()
    timeframe_end = carbon_intensity_df["datetime"].max()
    timeframe_range = timeframe_end - timeframe_start
    
    print("timeframe_start", timeframe_start)
    print("timeframe_end", timeframe_end)
    print("timeframe_range", timeframe_range)
    
    date = timeframe_start.to_period("D").to_timestamp()
    
    res_df = df.copy()
    
    res_df["resampled_read_time"] = pd.to_datetime(date + ((res_df["read_time"] - timeframe_start) / timeframe_range * pd.Timedelta(hours=24)))
    # res_df["resampled_read_time"] = res_df["resampled_read_time"].dt.strftime("%H:%M")

    return res_df


with_scheduler_emissions_24h = resample_to_24h(with_scheduler_carbon_intensity, with_scheduler_emissions)
without_scheduler_emissions_24h = resample_to_24h(without_scheduler_carbon_intensity, without_scheduler_emissions)

with_scheduler_emissions_24h


In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4), sharex=True, sharey=True)
sns.lineplot(without_scheduler_emissions_24h, x="resampled_read_time", y="emissions (gCO₂eq)", ax=axs[0])
sns.lineplot(with_scheduler_emissions_24h, x="resampled_read_time", y="emissions (gCO₂eq)", ax=axs[1])

# axs[0].set_xticklabels(without_scheduler_emissions_24h["resampled_read_time"], rotation=45, horizontalalignment='right')

for ax in axs:
    ax.set_xlabel("Time")
    ax.set_xlim(pd.to_datetime("2023-11-12 00:00:00"), pd.to_datetime("2023-11-13 00:00:00"))
    ax.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')



axs[0].set_title("Without scheduler")
axs[1].set_title("With scheduler")

In [None]:
test_df = pd.DataFrame([
    {"read_time": pd.to_datetime("2023-01-01T14:00:00Z"), "kwh": 1},
    {"read_time": pd.to_datetime("2023-01-01T15:00:00Z"), "kwh": 2},
    {"read_time": pd.to_datetime("2023-01-01T16:00:00Z"), "kwh": 3},
])
test_df.set_index("read_time", inplace=True)

new_index = pd.date_range(pd.to_datetime("2023-01-01T00:00:00Z"), pd.to_datetime("2023-01-02T00:00:00Z"), freq="1H")

test_df = test_df.reindex(new_index)

test_df