In [7]:
import pandas as pd
import os

directory = '../results/simulation/different_lengths'

work_phase_index_to_name = {
    '0': 'balanced',
    '1': 'long-high',
    '2': 'short-high',
}

columns = [
    'filename',
    'scheduling_policy',
    'work_type',
    'work_phases',
    'startup_length',
    'startup_power',
    'waiting_time',
    'id',
    'arrival_time',
    'length',
    'carbon_cost',
    'start_time',
    'waiting_time',
    'exit_time'
]

all_results = pd.DataFrame([])

print(len(os.listdir(directory)))

def read_trace(filename: str): 
        # this is based on the mapping in generate_evaluation_jobs.sh
        parameters = filename.split('_')

        parameter_dict = {
            'scheduling_policy': parameters[0],
            'work_type': parameters[1],
            'work_phases': work_phase_index_to_name[parameters[2]],
            'startup_length': parameters[3],
            'startup_power': parameters[4],
            'waiting_time': parameters[5],
        }

        df = pd.read_csv(f"{directory}/{filename}")
        df = df.drop(df.index[-1])

        if (len(df) < 9):
            print(filename)

        for key, value in parameter_dict.items():
            df[key] = value

        df['filename'] = filename

        return df


# List all files in the directory
for filename in os.listdir(directory):
    if os.path.isfile(os.path.join(directory, filename)):
        parameters = filename.split('_')

        if parameters[-1] != 'details':
            continue

        df = read_trace(filename)

        all_results = pd.concat([
            all_results, 
            df
        ], ignore_index=True)

print(all_results.head())
print(f"Read {len(all_results)} entries")

768
   ID  arrival_time  length  cpus length_class  resource_class  carbon_cost  \
0   0             0    3600     1          0-2             1.0       8.4135   
1   2             0    9600     1          2-6             1.0      11.4845   
2   1             0    7200     1          0-2             1.0      11.4845   
3   8          3600    9600     1          2-6             1.0      11.4845   
4   6          3600    3600     1          0-2             1.0       8.4135   

   dollar_cost  start_time waiting_time  exit_time     reason  \
0       0.0624       28800           12      32400  completed   
1       0.1664       28800           12      38400  completed   
2       0.1248       28800           12      36000  completed   
3       0.1664       28800           12      38400  completed   
4       0.0624       28800           12      32400  completed   

  scheduling_policy        work_type work_phases startup_length startup_power  \
0            carbon  periodic-phases  short-high 

In [10]:
import plotly.express as px

"""
Let's first compare the same job across different scheduling approaches,
deducing how much carbon is emitted under each scheduler
"""

same_job_different_schedulers = all_results.groupby(["work_type", "work_phases", "ID", "arrival_time", "waiting_time", "startup_length", "startup_power"])

index = 0

missing_jobs = 0

same_job_different_schedulers_plot_df = pd.DataFrame([])

for category, group_df in same_job_different_schedulers:

    group_df["job_index"] = index

    carbon_cost_non_interrupted = group_df[group_df["scheduling_policy"] == "carbon"]["carbon_cost"].sum()
    carbon_cost_suspend_resume = group_df[group_df["scheduling_policy"] == "suspend-resume"]["carbon_cost"].sum()

    if (abs(carbon_cost_non_interrupted - carbon_cost_suspend_resume) < 1):
        #print(f"{category} had no savings")

        same_job_different_schedulers_plot_df = pd.concat([
            same_job_different_schedulers_plot_df,
            pd.DataFrame([{"job_index": index, "carbon_cost":carbon_cost_non_interrupted, "scheduling_policy": "same"}])
        ])

    else:
        print(f"{category} had savings")

        same_job_different_schedulers_plot_df = pd.concat([
            same_job_different_schedulers_plot_df,
            group_df
        ])

    index += 1
# print(same_job_different_schedulers_plot_df)

"""
Scrolling through by eye, there are some cases where the suspend-resume strategy performed 
better but there are also some cases where it performed worse.

Lets do a graph just plotting each experiment, with the carbon emissions on the y axis
"""


same_job_different_schedulers_fig = px.scatter(
    same_job_different_schedulers_plot_df, x="job_index", y="carbon_cost", color="scheduling_policy",
    hover_data=same_job_different_schedulers_plot_df.columns
    )
same_job_different_schedulers_fig.show()



('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '0', '100') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '0', '200') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '1800', '100') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '1800', '200') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '300', '100') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '300', '200') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '600', '100') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '12', '600', '200') had savings
('constant-from-periodic-phases', 'balanced', np.int64(6), np.int64(3600), '24', '0', '100') had savings
('constant-from-periodic-phases', 'balanc

In [17]:
from plotly.subplots import make_subplots
from datetime import datetime
import pytz
import sys

sys.path.append('../src/')

import carbon

"""
For debugging purposes, plot some schedules, to see whats up
"""

traces_to_plot = ["suspend-resume_periodic-phases_2_0_200_4_details", "carbon_periodic-phases_2_0_200_4_details"]

carbon_trace = carbon.get_carbon_model("AU-SA", 7000, extra_columns=True)
start_date_in_carbon_trace_as_timestamp = carbon_trace.df.iloc[0]["timestamp"]

fig_carbon = px.scatter(carbon_trace.df, x='datetime', y="carbon_intensity_avg", color="carbon_intensity_avg", color_continuous_scale=px.colors.sequential.speed)

def time_to_dates(seconds_since_simulation_start) -> str:
    adjusted_timestamp = seconds_since_simulation_start + start_date_in_carbon_trace_as_timestamp
    date = datetime.fromtimestamp(adjusted_timestamp, pytz.timezone('UTC'))
    return date 

def plot_trace(filename: str):
    df = read_trace(filename)

    df["start_time_date"] = df["start_time"].apply(time_to_dates)
    df["submission_date"] = df["arrival_time"].apply(time_to_dates)
    # df["deadline"] = (df["arrival_time"] + (int(waiting_time) * 3600)).apply(time_to_dates)
    df["exit_time_date"] = df["exit_time"].apply(time_to_dates)

    min_date_in_trace = time_to_dates(df["start_time"].min())
    max_date_in_trace = time_to_dates(df["exit_time"].max())

    fig_gantt = px.timeline(df, x_start="start_time_date", x_end="exit_time_date", y="ID", hover_data=["start_time", "arrival_time"])

    submission_markers = []

    for row in df.itertuples(index=False):
        submission_markers.append({'type': 'line', 'x0': row.submission_date, 'x1': row.start_time_date, 'y0': row.ID, 'y1': row.ID, 'xref': 'x1', 'yref':'y1', 'line': dict(color="MediumPurple", width=2, dash="dot")})

    fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

    fig.add_trace(fig_gantt.data[0], row=1, col=1)
    fig.add_trace(fig_carbon.data[0], row=2, col=1)

    fig.update_layout(
        title_text = f"schedule of {filename}",
        xaxis=dict(
            type='date',
        ),
        xaxis2=dict(
            type='date'
        ), 
        shapes=submission_markers
    )
    fig.update_xaxes(title_text="Date", range=[min_date_in_trace, max_date_in_trace])

    fig.update_yaxes(title_text="Job ID", fixedrange=True, row=1, col=1)
    fig.update_yaxes(title_text="Carbon intensity in gCO₂eq/kWh", fixedrange=True, row=2, col=1)

    yaxis2 = fig.layout.yaxis2

    fig.update_layout({'yaxis': {'range': [-0.5,df['ID'].max() + 1], 'tickmode': 'linear'}})
    fig.update_layout({'yaxis2': {'range': [0,0.5]}})
    fig.show()

for sussy_files in traces_to_plot:
    plot_trace(sussy_files)