In [1]:
import os
from pathlib import Path
from typing import cast

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
from IPython.display import display

import modfs.utils.plotting as plot
from modfs.data.result_load import add_derived_columns, load_results

ROOT_PATH = Path("../../..").resolve()
PATH_DATA = ROOT_PATH / "data"
FIGS_DIR = PATH_DATA / "figs/paper/distributed_scheduling/analysis"
FIGS_DIR.mkdir(exist_ok=True, parents=True)

results_path = PATH_DATA / "run/"

df_original = load_results(results_path)
df_original.loc[
    (df_original["modular_algorithm"] == "cocktail") & (df_original["algorithm"] == "simple"),
    "modular_algorithm",
] = "Cocktail (simple)"
df_original.loc[
    (df_original["modular_algorithm"] == "broadcast") & (df_original["algorithm"] == "simple"),
    "modular_algorithm",
] = "Broadcast (simple)"
df_original.replace(
    {
        "modular_algorithm": {
            "cocktail": "Cocktail",
            "broadcast": "Broadcast",
            "constraint": "Constraint",
        }
    },
    inplace=True,
)
df_original.loc[
    (df_original["modular_algorithm"] == "Constraint")
    & (df_original["error"] == "")
    & (df_original["solved"] == False),
    "error",
] = "time-out"

# Import heterogeneous bookletA and bookletAB as computational-orig to be able to compare
# with computational that has a run time of 1 hour instead of 10 minutes. We add os.sep at the end
# to avoid matching with bookletABUniform or bookletAUniform.
generic_names = [f"generic/mixed/duplex/booklet{a}" for a in ["A", "AB"]]
vals = tuple(str(results_path / p) + os.sep for p in generic_names)
df_tmp = df_original[df_original["run_file_path"].str.startswith(vals)].copy()
df_tmp["run_file_path"] = df_tmp["run_file_path"].str.replace(
    str(Path("generic/mixed/duplex")), "computational"
)
df_tmp["original_path"] = df_tmp["original_path"].str.replace(
    "generic/mixed/duplex", "computational"
)

df_original = pd.concat([df_original, df_tmp], ignore_index=True)

df_original_p = add_derived_columns(
    df_original,
    groups={
        "homogeneous": {"generic/printer_cases"},
        "heterogeneous": {"generic/mixed"},
        "computational": {"computational"},
    },
    gen_subpath="data/gen",
    group_source="run_file_path",
    run_subpath=results_path,
)

selector = df_original_p["original_path"].str.startswith(("printer_cases", "mixed"))
df_original_p.loc[selector, "original_path"] = (
    "generic/" + df_original_p.loc[selector, "original_path"]
)

df_normal = df_original_p.copy()
df_solved = df_normal.query("solved").copy()
df_comp = df_normal.query("group == 'computational'").copy()

df_lb = df_solved.query("modular_algorithm == 'Constraint'")[
    ["group", "original_path", "file_id", "lower_bound"]
]

df_bounds = df_normal.merge(
    df_lb, on=["group", "original_path", "file_id"], how="left", suffixes=("_x", None)
).drop(columns=["lower_bound_x"])
df_bounds["optimality_gap"] = (df_bounds["makespan"] - df_bounds["lower_bound"]) / np.maximum(
    1e-10, df_bounds["makespan"]
)
df_bounds["optimality_ratio"] = df_bounds["makespan"] / np.maximum(1e-10, df_bounds["lower_bound"])

df_grouped = df_bounds.groupby(
    ["group", "modular_algorithm", "algorithm", "time_limit"], as_index=False, sort=False
)

print("Original")
display(df_original_p.dtypes)

print("Normal")
display(df_normal.dtypes)

print("Bounds")
display(df_bounds.dtypes)


figs = {}

Looking for files to load


Loading info files:   0%|          | 0/90 [00:00<?, ?it/s]

All files loaded!
Original


group                 object
run_file_path         object
original_path         object
file_id                int64
modules                int64
jobs                   int64
modular_algorithm     object
algorithm             object
time_limit             int64
deadline             float64
iterations           float64
total_time           float64
time_per_job         float64
makespan             float64
optimality_gap       float64
lower_bound          float64
timeout                 bool
solved                  bool
optimal                 bool
error                 object
dtype: object

Normal


group                 object
run_file_path         object
original_path         object
file_id                int64
modules                int64
jobs                   int64
modular_algorithm     object
algorithm             object
time_limit             int64
deadline             float64
iterations           float64
total_time           float64
time_per_job         float64
makespan             float64
optimality_gap       float64
lower_bound          float64
timeout                 bool
solved                  bool
optimal                 bool
error                 object
dtype: object

Bounds


group                 object
run_file_path         object
original_path         object
file_id                int64
modules                int64
jobs                   int64
modular_algorithm     object
algorithm             object
time_limit             int64
deadline             float64
iterations           float64
total_time           float64
time_per_job         float64
makespan             float64
optimality_gap       float64
timeout                 bool
solved                  bool
optimal                 bool
error                 object
lower_bound          float64
optimality_ratio     float64
dtype: object

In [2]:
fig = plot.plot_boxy(
    df_grouped["makespan"].mean(),
    x="group",
    y="makespan",
    color="modular_algorithm",
    pattern_shape="time_limit",
    barmode="group",
)
figs["makespan_by_group_algorithm"] = fig
fig.show()

In [6]:
df_compared = plot.baseline_compare(df_bounds, "Constraint", "makespan")

df_compared.rename(columns={"makespan": "relative_makespan"}, inplace=True)

display(
    pd.DataFrame(
        df_compared.groupby(["group", "modular_algorithm", "time_limit"])[
            "relative_makespan"
        ].mean()
    )
)

fig = plot.plot_boxy(
    df_compared,
    x="modular_algorithm",
    y="relative_makespan",
    color="time_limit",
    facet_col="group",
    plot_type=plot.PlotType.BOX,
)
fig.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,relative_makespan
group,modular_algorithm,time_limit,Unnamed: 3_level_1
computational,Broadcast,600,1.463653
computational,Broadcast,3600,1.468301
computational,Broadcast (simple),600,3.3825
computational,Broadcast (simple),3600,3.3825
computational,Cocktail,600,1.377604
computational,Cocktail,3600,1.377495
computational,Cocktail (simple),600,3.3825
computational,Cocktail (simple),3600,3.3825
heterogeneous,Broadcast,600,1.238687
heterogeneous,Broadcast (simple),600,4.143947


In [None]:
fig = plot.plot_boxy(
    df_grouped["total_time"].mean(),
    x="group",
    y="total_time",
    color="modular_algorithm", 
    pattern_shape="time_limit",
    barmode="group",
    title="Average run time per benchmark group and algorithm"
)
figs["time_by_group_algorithm"] = fig
fig.show()

In [None]:
fig = plot.plot_boxy(
    df_grouped["optimality_gap"].mean(),
    x="group",
    y="optimality_gap",
    color="modular_algorithm",
    pattern_shape="time_limit",
    barmode="group",
    title="Optimality gap<br><sup>(Makespan - Lower bound) / Makespan</sup>"
)
figs["optimality_gap"] = fig
fig.show()

fig = plot.plot_boxy(
    df_grouped["optimality_ratio"].mean(),
    x="group",
    y="optimality_ratio",
    color="modular_algorithm",
    pattern_shape="time_limit",
    barmode="group",
    title="Optimality ratio<br><sup>Makespan / Lower bound</sup>",
)
figs["optimality_ratio"] = fig
fig.show()

In [None]:
fig = plot.plot_boxy(
    df_grouped["solved"].mean(),
    x="group",
    y="solved",
    color="modular_algorithm",
    pattern_shape="time_limit",
    barmode="group",
    title="Solved ratio<br><sup>Solved / Total</sup>",
)
fig.show()

In [None]:
# display(df_bounds.groupby(["group", "modular_algorithm", "algorithm", "time_limit", "error"],as_index=True)["error"].count())
df_tmp = df_bounds.value_counts(subset=["group", "modular_algorithm", "algorithm", "time_limit", "error"],sort=False).reset_index()
df_tmp.sort_values(by=list(df_tmp.columns))
display(df_tmp)

Unnamed: 0,group,modular_algorithm,algorithm,time_limit,error,count
0,computational,Broadcast,bhcs,600,local-scheduler,39
1,computational,Broadcast,bhcs,600,none,953
2,computational,Broadcast,bhcs,600,time-out,88
3,computational,Broadcast,bhcs,3600,local-scheduler,43
4,computational,Broadcast,bhcs,3600,none,1030
5,computational,Broadcast,bhcs,3600,time-out,7
6,computational,Broadcast (simple),simple,600,none,1080
7,computational,Broadcast (simple),simple,3600,none,1080
8,computational,Cocktail,bhcs,600,local-scheduler,49
9,computational,Cocktail,bhcs,600,none,1028


In [None]:
df_tcomp = df_bounds.query("group == 'computational'")

df_tmp = df_tcomp.query("optimal==True")[
    ["group", "original_path", "file_id", "makespan"]
]

df_tmp = df_tcomp.merge(
    df_tmp, on=["group", "original_path", "file_id"], how="left", suffixes=(None, "_optimal")
)
df_tmp2 = df_tmp[["group", "modular_algorithm", "original_path", "file_id", "algorithm", "time_limit", "optimal", "makespan", "makespan_optimal"]].copy()
df_tmp2.sort_values(by=["group", "original_path", "file_id", "modular_algorithm", "algorithm", "time_limit"], inplace=True)
display(df_tmp)

# fig = plot.plot_boxy(
#     x="group",
#     y="solved",
#     color="modular_algorithm",
#     pattern_shape="time_limit",
#     barmode="group",
#     title="Solved ratio<br><sup>Solved / Total</sup>",
# )
# fig.show()

Unnamed: 0,group,run_file_path,original_path,file_id,modules,jobs,modular_algorithm,algorithm,time_limit,deadline,...,time_per_job,makespan,optimality_gap,timeout,solved,optimal,error,lower_bound,optimality_ratio,makespan_optimal
0,computational,computational-orig/bookletA/broadcast/bhcs/tim...,generic/mixed/duplex/bookletA,0,2,5,Broadcast,bhcs,600,,...,0.200000,5.903746e+07,,False,True,False,none,,,
1,computational,computational-orig/bookletA/broadcast/bhcs/tim...,generic/mixed/duplex/bookletA,1,3,5,Broadcast,bhcs,600,,...,0.400000,8.398253e+07,,False,True,False,none,,,
2,computational,computational-orig/bookletA/broadcast/bhcs/tim...,generic/mixed/duplex/bookletA,2,4,5,Broadcast,bhcs,600,,...,0.400000,1.089276e+08,,False,True,False,none,,,
3,computational,computational-orig/bookletA/broadcast/bhcs/tim...,generic/mixed/duplex/bookletA,3,5,5,Broadcast,bhcs,600,,...,0.600000,1.338727e+08,,False,True,False,none,,,
4,computational,computational-orig/bookletA/broadcast/bhcs/tim...,generic/mixed/duplex/bookletA,4,6,5,Broadcast,bhcs,600,,...,0.600000,1.588177e+08,,False,True,False,none,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10795,computational,computational/bookletAB/constraint,computational/bookletAB,535,6,300,Constraint,,3600,,...,12038.813866,1.031108e+09,0.405458,True,True,False,none,613036646.0,1.681967,
10796,computational,computational/bookletAB/constraint,computational/bookletAB,536,7,300,Constraint,,3600,,...,12030.741950,1.069578e+09,0.403520,True,True,False,none,637981718.0,1.676503,
10797,computational,computational/bookletAB/constraint,computational/bookletAB,537,8,300,Constraint,,3600,,...,13246.448827,1.133420e+09,0.414598,True,True,False,none,663506088.0,1.708228,
10798,computational,computational/bookletAB/constraint,computational/bookletAB,538,9,300,Constraint,,3600,,...,12038.945068,1.210787e+09,0.431402,True,True,False,none,688451160.0,1.758712,


In [None]:
FIGS_DIR.mkdir(exist_ok=True)
for path, fig in figs.items():
    print(path)
    pio.write_html(fig, str(FIGS_DIR / f"{path}.html"))
    # fig.update_layout(font={"size": 20})
    # pio.write_image(fig, str(FIGS_DIR / f"{path}.pdf"), width=1600, height=900)


makespan_by_group_algorithm
time_by_group_algorithm
optimality_gap
optimality_ratio
