In [14]:
import pandas as pd

carbon_policy_labels = {
    "carbon_waiting": "Lowest-\nWindow",
    "carbon_oracle":  "Lowest-\nWindow*",
    "carbon_lowest": "Lowest-\nSlot",
    "suspend-resume_oracle": "Wait AWhile",
    "suspend-resume-threshold_oracle": "Ecovisor",
    "carbon_cst_oracle": "Carbon-\nTime*",
    "carbon_cst_average": "Carbon-\nTime",
    "cost_oracle": "AllWait-\nThreshold",
    "carbon-cost_cst_average": "RES-First-\nCarbon-Time",
    "carbon-cost_waiting": "RES-First-\nLowest-\nWindow",
    "carbon-spot_cst_average": "Spot-First-\nCarbon-Time",
    "suspend-resume-spot_oracle": "Spot-First-\nWaitAwhile",
    "suspend-resume-spot-threshold_oracle": "Spot-First-\nEcovisor",
    "carbon-cost-spot_cst_average": "SPOT-RES-\nCarbon-Time"
}

# steal / copy this one from the existing GAIA notebooks

def load_task_details(cluster_type, task_trace, scheduling_policy, carbon_start_index, carbon_policy, carbon_trace, reserved, waiting_times_str):
    if cluster_type =="slurm":
        file_name = f"../results/{cluster_type}/{task_trace}/slurm-details-{scheduling_policy}-{carbon_start_index}-{carbon_policy}-{carbon_trace}-{reserved}-{waiting_times_str}.csv"             
    else:
        file_name = f"../results/{cluster_type}/{task_trace}/details-{scheduling_policy}-{carbon_start_index}-{carbon_policy}-{carbon_trace}-{reserved}-{waiting_times_str}.csv"             
    df = pd.read_csv(file_name)
    df["carbon_policy"] = carbon_policy_labels[scheduling_policy+"_"+carbon_policy]
    df["scheduling_policy"] = scheduling_policy
    df["start_index"] = carbon_start_index
    df["task_trace"] = task_trace    
    df = df[df['ID'] != -1]
    return df

In [15]:
import plotly.express as px
from datetime import datetime
from carbon import get_carbon_model, CarbonModel
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz

traces = ['phased'] # , "pai_200"

waiting_times = ["48", "6x24", "4"]

scheduling_policies = [ 
   #  ("carbon", "lowest"),
    ("carbon", "oracle"),
   #  ("carbon", "cst_average"),
    # ("suspend-resume-threshold", "oracle"),
    # ("suspend-resume-threshold", "oracle"),
    # ("suspend-resume", "oracle"),
]


carbon_trace = get_carbon_model("AU-SA", 7000)
start_date_in_carbon_trace_as_timestamp = carbon_trace.df.iloc[0]["timestamp"]

fig_carbon = px.scatter(carbon_trace.df, x='datetime', y="carbon_intensity_avg", color="carbon_intensity_avg", color_continuous_scale=px.colors.sequential.speed)

def time_to_dates(seconds_since_simulation_start) -> str:
    adjusted_timestamp = seconds_since_simulation_start + start_date_in_carbon_trace_as_timestamp
    date = datetime.fromtimestamp(adjusted_timestamp, pytz.timezone('UTC'))
    return date 

for trace in traces:
    
    for policy in scheduling_policies:
        for waiting_time in waiting_times:

            scheduling_policy = policy[0]
            carbon_policy = policy[1]
            df = load_task_details("simulation", trace, policy[0], 7000, policy[1], "AU-SA", 0, waiting_time).sort_values(by=["start_time", "length"])

            df["start_time_date"] = df["start_time"].apply(time_to_dates)
            df["submission_date"] = df["arrival_time"].apply(time_to_dates)
            # df["deadline"] = (df["arrival_time"] + (int(waiting_time) * 3600)).apply(time_to_dates)
            df["exit_time_date"] = df["exit_time"].apply(time_to_dates)

            min_date_in_trace = time_to_dates(df["start_time"].min())
            max_date_in_trace = time_to_dates(df["exit_time"].max())

            fig_gantt = px.timeline(df, x_start="start_time_date", x_end="exit_time_date", y="ID", hover_data=["start_time", "arrival_time"])

            submission_markers = []

            for row in df.itertuples(index=False):
                submission_markers.append({'type': 'line', 'x0': row.submission_date, 'x1': row.start_time_date, 'y0': row.ID, 'y1': row.ID, 'xref': 'x1', 'yref':'y1', 'line': dict(color="MediumPurple", width=2, dash="dot")})

            fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

            fig.add_trace(fig_gantt.data[0], row=1, col=1)
            fig.add_trace(fig_carbon.data[0], row=2, col=1)

            title_key = f"{scheduling_policy}_{carbon_policy}"
            title = f"{scheduling_policy}_{carbon_policy} ({carbon_policy_labels.get(title_key, '')})"

            fig.update_layout(
                title_text = f"{trace}'s scheduling via {title}, {waiting_time}",
                xaxis=dict(
                    type='date',
                ),
                xaxis2=dict(
                    type='date'
                ), 
                shapes=submission_markers
            )
            fig.update_xaxes(title_text="Date", range=[min_date_in_trace, max_date_in_trace])

            fig.update_yaxes(title_text="Job ID", fixedrange=True, row=1, col=1)
            fig.update_yaxes(title_text="Carbon intensity in gCO₂eq/kWh", fixedrange=True, row=2, col=1)

            yaxis2 = fig.layout.yaxis2

            fig.update_layout({'yaxis': {'range': [-0.5,df['ID'].max() + 1], 'tickmode': 'linear'}})
            fig.update_layout({'yaxis2': {'range': [0,0.5]}})

            fig.show()