In [29]:
import pandas as pd
import os

directory = '../results/simulation/different_lengths'

work_phase_index_to_name = {
    '0': 'balanced',
    '1': 'long-high',
    '2': 'short-high',
}

columns = [
    'filename',
    'scheduling_policy',
    'work_type',
    'work_phases',
    'startup_length',
    'startup_power',
    'waiting_time',
    'id',
    'arrival_time',
    'length',
    'carbon_cost',
    'start_time',
    'waiting_time',
    'exit_time'
]

all_results = pd.DataFrame([])

# List all files in the directory
for filename in os.listdir(directory):
    if os.path.isfile(os.path.join(directory, filename)):
        # ths is based on the mapping in generate_evaluation_jobs.sh
        parameters = filename.split('_')

        if parameters[-1] != 'details':
            continue

        parameter_dict = {
            'scheduling_policy': parameters[0],
            'work_type': parameters[1],
            'work_phases': work_phase_index_to_name[parameters[2]],
            'startup_length': parameters[3],
            'startup_power': parameters[4],
            'waiting_time': parameters[5],
        }

        df = pd.read_csv(f"{directory}/{filename}")
        df = df.drop(df.index[-1])

        for key, value in parameter_dict.items():
            df[key] = value

        df['filename'] = filename

        all_results = pd.concat([
            all_results, 
            df
        ], ignore_index=True)

print(all_results.head())
print(f"Read {len(all_results)} entries")

   ID  arrival_time  length  cpus length_class  resource_class  carbon_cost  \
0   0             0    3600     1          0-2             1.0       8.4135   
1   2             0    9600     1          2-6             1.0      11.4845   
2   1             0    7200     1          0-2             1.0      11.4845   
3   8          3600    9600     1          2-6             1.0      11.4845   
4   6          3600    3600     1          0-2             1.0       8.4135   

   dollar_cost  start_time waiting_time  exit_time     reason  \
0       0.0624       28800           12      32400  completed   
1       0.1664       28800           12      38400  completed   
2       0.1248       28800           12      36000  completed   
3       0.1664       28800           12      38400  completed   
4       0.0624       28800           12      32400  completed   

  scheduling_policy        work_type work_phases startup_length startup_power  \
0            carbon  periodic-phases  short-high     

In [49]:
import plotly.express as px

"""
Let's first compare the same job across different scheduling approaches,
deducing how much carbon is emitted under each scheduler
"""

same_job_different_schedulers = all_results.groupby(["work_type", "work_phases", "length", "arrival_time", "waiting_time", "startup_length", "startup_power"])

index = 0

same_job_different_schedulers_plot_df = pd.DataFrame([])

for category, group_df in same_job_different_schedulers:

    group_df["job_index"] = index

    same_job_different_schedulers_plot_df = pd.concat([
        same_job_different_schedulers_plot_df,
        group_df
    ])

    index += 1

    if len(group_df ==1):
        print(category)

# print(same_job_different_schedulers_plot_df)

"""
Scrolling through by eye, there are some cases where the suspend-resume strategy performed 
better but there are also some cases where it performed worse.

Lets do a graph just plotting each experiment, with the carbon emissions on the y axis
"""


same_job_different_schedulers_fig = px.scatter(
    same_job_different_schedulers_plot_df, x="job_index", y="carbon_cost", color="scheduling_policy",
    hover_data=same_job_different_schedulers_plot_df.columns
    )
same_job_different_schedulers_fig.show()




('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '24', '1800', '100')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '24', '1800', '200')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '24', '300', '100')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '24', '300', '200')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '48', '0', '100')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '48', '0', '200')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '48', '300', '100')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(0), '48', '300', '200')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(1800), '24', '1800', '100')
('constant-from-periodic-phases', 'balanced', np.int64(2400), np.int64(1800), '24', '1800', '200')
('constant-from-periodic-phases', 'balanced'