## Init

In [1]:
import os
import sys
from pathlib import Path

cwd = Path(os.getcwd())
if cwd.name != "backend":
    repo = cwd.parent.parent
    sys.path.append(str(cwd))
    sys.path.append(str(repo))
    sys.path.append(str(repo / "drafts"))
    os.chdir(repo / "src" / "backend")

from data.evaluation.src.jupyter_utils.preamble import *
from importlib import reload

import src.eva_util as eva

### List available stats

In [None]:
reload(export)
reload(eva)
all_params = dict(
    ocel_key=None,
    target_otypes=None,
    rule=None,
    graph_mode=None,
    remove_otype_loops=None,
)
kwargs = dict(
    **all_params,
    load=False,
    # min_timestamp=datetime.strptime("20240823-140000", "%Y%m%d-%H%M%S")
    user="jannes"
)
_, _tes_meta, _ = export.load_ocel_stats(
    name="alloc_target_emissions",
    **kwargs,  # type: ignore
)  # type: ignore
# _, _reports_meta, _ = export.load_ocel_stats(
#     name="alloc_report",
#     **kwargs,  # type: ignore
# )
_, _evs_meta, _ = export.load_ocel_stats(
    name="alloc_event_stats",
    **kwargs,  # type: ignore
)
_, _objs_meta, _ = export.load_ocel_stats(
    name="alloc_object_stats",
    **kwargs,  # type: ignore
)

for meta in [_tes_meta, _evs_meta, _objs_meta]:
    meta["rule"] = meta["rule"].replace(eva.RULE_ABBRS)

# Show number of DataFrames loaded
display(
    pd.concat(
        [
            (
                _tes_meta.groupby(["ocel", "target_otypes", "rule"], dropna=False)
                .size()
                .unstack()
                .pipe(pd_util.prepend_level, "target_emissions", axis=1)
            ),
            (
                _evs_meta.groupby(["ocel", "target_otypes", "rule"], dropna=False)
                .size()
                .unstack()
                .pipe(pd_util.prepend_level, "ev_stats", axis=1)
            ),
            (
                _objs_meta.groupby(["ocel", "target_otypes", "rule"], dropna=False)
                .size()
                .unstack()
                .pipe(pd_util.prepend_level, "obj_stats", axis=1)
            ),
            # (
            #     _reports_meta.groupby(["ocel", "target_otypes", "rule"], dropna=False)
            #     .size()
            #     .unstack()
            #     .pipe(pd_util.prepend_level, "alloc_reports", axis=1)
            # ),
        ],
        axis=1,
    )
    # .fillna(0).astype(int)
    .sort_index(
        key=pd_util.index_order(
            ["target_emissions", "ev_stats", "obj_stats", "alloc_reports", "AT", "PT", "CT"]
        ),
        axis=1,
    )
    .style.format(precision=0, na_rep="---")
    .pipe(pd_style.style_multiindex)
)
# meta["graph_mode"].value_counts(dropna=False)

### Load Data

In [None]:
reload(export)
reload(eva)
reload(pd_util)
reload(ag)

all_params = dict(
    ocel_key=None,
    target_otypes=None,
    rule=None,
    graph_mode=None,
    remove_otype_loops=None,
    replace={"rule": {"AllTargets": "AT", "ParticipatingTargets": "PT", "ClosestTargets": "CT"}},
)
kwargs = dict(
    **all_params,
    load=True,
)
_tes, _tes_meta, (tmin1, tmax1) = export.load_ocel_stats(
    name="alloc_target_emissions",
    **kwargs,  # type: ignore
)  # type: ignore
_evs, _evs_meta, (tmin2, tmax2) = export.load_ocel_stats(
    name="alloc_event_stats",
    **kwargs,  # type: ignore
)
_objs, _objs_meta, (tmin3, tmax3) = export.load_ocel_stats(
    name="alloc_object_stats",
    **kwargs,  # type: ignore
)

# print(tmin1)
# print(tmax1)
# print(tmin2)
# print(tmax2)
# print(tmin3)
# print(tmax3)

for meta in [_tes_meta, _evs_meta, _objs_meta]:
    meta["param_str"] = meta.apply(lambda row: eva.get_param_str(**row, mode="column"), axis=1)

_tes_list = {
    t: te.set_index("ocel:oid")["ocean:object_emissions"]
    for t, te in util.unnest_dict(_tes).items()
}
_evs_list = {t: evs for t, evs in util.unnest_dict(_evs).items()}
_objs_list = {t: objs for t, objs in util.unnest_dict(_objs).items()}
# _reports_list = util.unnest_dict(_reports)
assert _tes_list.keys() == _evs_list.keys()
assert set(_objs_list.keys()).issubset([t for t in _evs_list.keys() if t[2] == "CT"])

In [None]:
list(_evs_list.keys())

### Specify param ordering

Also collect HU/Resource types from meta

In [5]:
reload(util)

params = ["ocel", "target_otypes", "rule", "graph_mode", "remove_otype_loops"]
ocel_order = ["orderManagementWithDistances", "containerLogistics", "p2p", "hinge"]

all_meta = pd.concat([_tes_meta, _evs_meta])
hu_hu_meta = all_meta[all_meta["graph_mode"] == "HU-HU"]
assert (
    hu_hu_meta.groupby("ocel")["hu_otypes"].nunique() == 1
).all(), "HU/Resource partitions differentiate within runs with HU-HU mode!"
if not all_meta["target_otypes"].apply(lambda tots: "," not in tots).all():
    raise NotImplementedError("Evaluation only supporting single target otypes!")

otypes = all_meta.groupby("ocel")["target_otypes"].apply(lambda tots: set(tots)).to_dict()

hu_otypes = (
    hu_hu_meta.groupby("ocel")["hu_otypes"].first().apply(lambda huots: util.set_from_str(huots)).to_dict()
)
resource_otypes = (
    hu_hu_meta.groupby("ocel")["resource_otypes"].first().apply(lambda rots: util.set_from_str(rots)).to_dict()
)
assert all([otypes[ocel] == hu_otypes[ocel] | resource_otypes[ocel] for ocel in otypes]), "HU/Resource not complete!"
assert all([hu_otypes[ocel].isdisjoint(resource_otypes[ocel]) for ocel in otypes]), "HU/Resource not disjoint!"

# otype_order_funcs = {ocel: ocel_util.get_default_otype_order_func(
# TODO
# )
# Order otypes by HU/Resource and alphabetically
otype_order = sum([sorted(hu_otypes[ocel]) + sorted(resource_otypes[ocel]) for ocel in ocel_order], [])
param_order = ["AT", "PT", "CT", "CTx", "CTHU", "CTHUx"]

In [None]:
params

## Compute stats

In [None]:
_objs_list[("p2p", "material", "CT", "HU-HU", True)].meta

In [None]:
reload(pd_util)
reload(eva)

stat_results = [
    eva.compute_alloc_stat(
        stat,
        params=params,
        _target_emissions=_tes_list,
        _event_stats=_evs_list,
        _object_stats=_objs_list,
    )
    for _, stat in eva.STATS.items()
]
stats = pd.concat(stat_results).pivot(
    index=[*params, "param_str"],
    columns="stat",
    values="value",
)
stats = stats[stats["num_targets"] > 1]
ix = stats.index.to_frame()
ctstats_nop2p = stats[(ix["rule"] == "CT") & (ix["ocel"] != "p2p")]
# nonredundant: Remove P2P full graph (same as HU)
ctstats_nonredundant = stats[
    (ix["rule"] == "CT") & ~((ix["ocel"] == "p2p") & (ix["graph_mode"] == "Obj-Obj"))
]

results = eva.extract_stats(
    stats,
    funcs=[
        eva.num_targets,
        eva.te_variation_coeff,
        eva.median_targets_per_event,
        # eva.median_rel_targets_per_event,
        eva.num_events_uniquely_allocated,
        eva.num_events_uniformly_allocated,
        # eva.num_events_properly_allocated,
        eva.max_object_distance,
        eva.max_event_distance,
        eva.median_degree,
        eva.max_degree,
        eva.median_target_degree,
        eva.max_target_degree,
        # eva.og_nodes,
        eva.og_edges,
        eva.og_components,
        eva.total_time,
    ],
    param_order=param_order,
    otype_order=otype_order,
    ocel_order=ocel_order,
)

display(
    results
    # results[results["num_targets"] > 1]
    .pipe(pd_style.style_multiindex).pipe(eva.style_stats)
)

### View for Experiment 1 (Allocation Results)

In [None]:
from ocel.default_ocel import DEFAULT_OCELS

reload(pd_util)
reload(eva)

results1 = eva.extract_stats(
    stats,
    funcs=[
        eva.num_targets,
        eva.te_variation_coeff,
        eva.median_targets_per_event,
        eva.num_events_uniquely_allocated,
        eva.num_events_uniformly_allocated,
        eva.max_event_distance,
    ],
    param_order=param_order,
    otype_order=otype_order,
    ocel_order=ocel_order,
)

apply_style = lambda style, columns: style.pipe(
    pd_style.style_multiindex,
).pipe(
    eva.style_stats,
    columns=columns,
    latex=True,
)

col_renamer = {
    "num_targets": "$|\\Omega|$",
    "te_variation_coeff": "$\\cv$",
    "median_targets_per_event": "$|\\alpha(e)|_{50}$",
    "num_events_uniquely_allocated": "\\# Ev. uniquely [\\%]",
    "num_events_uniformly_allocated": "\\# Ev. uniformly [\\%]",
    "max_event_distance": "$\\dist{\\max}^E$",
}
def postprocess_table(latex: str):
    renamer = {
        "containerLogistics": "\\makecell[l]{Ctr.\\\\Log.}",
        "hinge": "\\makecell[l]{Hinge}",
        "orderManagementWithDistances": "\\makecell[l]{Ord.\\\\Mgmt.}",
        "p2p": "\\makecell[l]{P2P}",
        "target_otypes": "$\\OT_\\Omega$",
        "ocel": "$L$",
        " AT ": " \\scriptsize AT ",
        " PT ": " \\scriptsize PT ",
        " CT ": " \\scriptsize CT ",
        " CTHU ": " \\scriptsize\\hspace*{-.5em}CTHU\\hspace*{-.5em} ",
        # " AT ": " \\makecell[t]{\\scriptsize AT} ",
        # " PT ": " \\makecell[t]{\\scriptsize PT} ",
        # " CT ": " \\makecell[t]{\\scriptsize CT} ",
        # " CTHU ": " \\makecell[t]{\\scriptsize CTHU} ",
        # " CTHU ": " \\makecell[t]{\\scriptsize CT\\\\\\scriptsize HU} ",
    }
    # for ocel_key, default_ocel in default_ocels.items():
    #     if ocel_key in ocel_order:
    #         renamer |= default_ocel.abbr_map or {}
    for a, b in renamer.items():
        latex = latex.replace(a, b)
    # Percentages: remove percent sign (move to column head)
    # latex = re.sub(r"(\-?(?:\d*\.)?(?:\d+)) ?%", r"\\qty{\1}{\\percent}", latex)
    # Except for when percentage is in brackets (from AbsAndRel)
    latex = re.sub(r"\((\-?(?:\d*\.)?(?:\d+)) ?%\)", r"(\\qty{\1}{\\percent})", latex)
    latex = re.sub(r"(\-?(?:\d*\.)?(?:\d+)) ?%", r"\\num{\1}", latex)

    return latex


pd_style.df_to_latex(
    results1,
    caption="Results of the allocation evaluation. AT always returns a uniform emission distribution, hence $\\cv=0$ and \\qty{100}{\\percent} uniformly allocated events. PT also allocates most events uniformly. CT allocates to less targets per event while yielding higher target emission variation. CTHU allocates uniquely to most object types. Shortest distances used for allocation vary across the datasets, with CTHU on \\ocelkey{hinge} having the longest paths.",
    label="tab:eva-results-alloc",
    apply_style=apply_style,
    convert_css=True,
    postprocess_output=postprocess_table,
    auto_col_renaming=False,
    col_renamer=col_renamer,
    escape_columns=False,
    siunitx=False,
    hrules="multiindex-midrules",
    fontsize="footnote",
)

### View for Experiment 2 (Runtime)

In [None]:
stats2 = stats.reset_index().rename_axis(None, axis="columns")
# stats2 = stats2[(stats2["rule"] == "CT") & stats2["remove_otype_loops"]]
stats2 = stats2[(stats2["rule"] == "CT")]
stats2.reset_index(drop=True, inplace=True)
stats2.head()

In [None]:
stats2[stats2["ocel"] == "p2p"]["total_time"].pipe(pd_util.mmmmstr)

In [None]:
results.shape

**Explanation of parameters and stats**

Params:
- `AT`: AllTargets
- `PT`: ParticipatingTargets
- `CT`: ClosestTargets (full graph)
- `CTx`: ClosestTargets (full graph, w/o object type loops)
- `CTHU`: ClosestTargets (HU-HU graph)
- `CTHUx`: ClosestTargets (HU-HU graph, w/o object type loops)

Stats:
- `te_variation_coeff`: Variation coefficient (std/mean) of target object emissions.
- `median_targets_per_event`: Median per event: *to how many targets is the event allocated?*
- `median_rel_targets_per_event`: Median per event: *to how many targets (%) is the event allocated?*
- `num_events_...`
  - `...uniquely_allocated`: How many events get allocated to **EXACTLY ONE** target object?
  - `...uniformly_allocated`: How many events get allocated to **ALL** target objects?
  - `...properly_allocated`: How many events get allocated to **LESS THAN ALL** target objects?
- `max_event_distance`
- `max_object_distance`
- `median_degree`, `max_degree`: Degrees of all objects contained in graph
- `median_target_degree`, `max_target_degree`: Degrees of target objects
- `og_nodes`, `og_edges`, `og_components`: Number of nodes/edges/components in graph
- `total_time`: Runtime \[s\] for allocation

### Find equal columns in `results`

In [None]:
names = list(dict.fromkeys(results.columns.get_level_values(0)))

equalities = []

for (name1, p1), (name2, p2) in itertools.combinations(results.columns, 2):
    if name1 == name2:
        continue
    res1, res2 = results[(name1, p1)], results[(name2, p2)]
    if (res1 == res2).all():
        equalities.append(((name1, p1), (name2, p2)))

EQG = nx.from_edgelist(equalities)
# graph_util.nx_to_graphviz(EQG)
cliques = list(nx.find_cliques(EQG))
pd.concat([results[C].pipe(pd_util.prepend_level, f"eq{i}") for i, C in enumerate(cliques)], axis=1)

## 2-Stats-Scatter

In [6]:
# HU_COLOR = RWTH_ORANGE
# # THU_COLOR = RWTH_RED
# RESOURCE_COLOR = RWTH_TURQUOISE

# OCEL_ABBRS = {
#     "orderManagementWithDistances": "OrdMgmt.",
#     "containerLogistics": "CtrLog.",
#     "p2p": "P2P",
#     "hinge": "Hinge",
# }
# OCEL_COLORS = {
#     "orderManagementWithDistances": RWTH_TEAL,
#     "containerLogistics": RWTH_MAYGREEN,
#     "p2p": RWTH_PURPLE,
#     "hinge": "black",
# }

# GM_COLORS = {"HU-HU": HU_COLOR, "Obj-Obj": RESOURCE_COLOR}
GM_MARKERS = {
    "HU-HU": dict(
        marker=">",
    ),
    "Obj-Obj": dict(
        marker="o",
    ),
}

GRID_ARGS = dict(
    color="gray",
    alpha=.5,
    linewidth=.4,
)
# GM_LABELS = {"HU-HU": "CTHU", "Obj-Obj": "CT"}

In [23]:
from visualization.constants import *

In [None]:
from visualization import plots
reload(plots)
reload(export)

import matplotlib.ticker as mticker

OCEL_COLORS = {
    "orderManagementWithDistances": RWTH_RED,
    "containerLogistics": RWTH_MAYGREEN,
    "p2p": RWTH_BLUE,
    "hinge": "black",
}

plt.rc("font", family="CMU Sans Serif")
plt.rc("axes", unicode_minus=False)

fig, (w, h) = plots.figure(aspect=2, width=1)
fig.subplots(ncols=2, sharey=True)
ax1 = plt.subplot(1, 2, 1)
ax1.set_axisbelow(True)
plt.title("Runtime by number of edges")
plt.grid(**GRID_ARGS)
plots.scatter_stats(
    ctstats_nonredundant,
    eva.og_edges,
    eva.total_time,
    # label_col="graph_mode",
    marker_col="graph_mode",
    marker_map=GM_MARKERS,
    color_col="ocel",
    color_map=OCEL_COLORS,
    label_map={**OCEL_ABBRS, **GM_LABELS},
    label_order=[*ocel_order, "Obj-Obj", "HU-HU"],
    # color_legend=True,
    # legends="above",
    # legend_x0=0,
    # legend_x1=1.05,
    # label="CT, CTx",
    # c=RESOURCE_COLOR,
    alpha=0.85,
)
# plt.xlabel("$|\\mathrm{E}(\\mathit{OG})|$")
plt.xlabel("Number of edges")
plt.ylabel("Runtime")
plt.yscale("log")
plots.set_log_runtime_ticks(axis="y")
plt.gca().xaxis.set_major_formatter(plots.thousands_tick_formatter)

ax2 = plt.subplot(1, 2, 2)
ax2.set_axisbelow(True)
plt.title("Runtime by maximum degree")
plt.grid(**GRID_ARGS)
plots.scatter_stats(
    ctstats_nonredundant,
    eva.max_degree,
    eva.total_time,
    marker_col="graph_mode",
    marker_map=GM_MARKERS,
    color_col="ocel",
    color_map=OCEL_COLORS,
    label_map={**OCEL_ABBRS, **GM_LABELS},
    label_order=[*ocel_order, "Obj-Obj", "HU-HU"],
    color_legend=True,
    marker_legend=True,
    alpha=.85,
)
# plt.xlabel("$|\\mathrm{E}(\\mathit{OG})|$")
plt.xlabel("Maximum degree")
plt.ylabel("")
plt.xscale("log")
plt.gca().xaxis.set_major_formatter(mticker.ScalarFormatter())
# plt.yscale("log")
plots.set_log_runtime_ticks(axis="y")

export.save_ocel_plt(
    fig,
    width=1,
    # subfigure=True,
    name="eva-scatter-edges-degree-runtime",
    ocel_key=None,
    caption="Runtime of \\allocrule{ClosestTargets} in relation to the number of edges in the object graph (left) and the maximum degree (right). Different colors indicate the OCELs used for evaluation, different marker shapes the two graph versions.",
    label="fig:eva-scatter-edges-degree-runtime",
    dry=True,
)

## Runtime Analysis

In [None]:
eva.minutes_and_seconds(results["total_time"].max(axis=None))

In [None]:
stats2.sort_values("total_time", ascending=False)[
    [*params, "param_str", "total_time", "og_edges", "max_event_distance"]
].tail(30).reset_index(drop=True)

### Runtime comparison: `graph_mode`

In [None]:
comp_gm.sort_values(by=("total_time", "HU-HU / Obj-Obj"), ascending=False)

In [None]:
reload(eva)
reload(export)
reload(pd_util)

param_cols = ["ocel", "target_otypes", "graph_mode", "remove_otype_loops"]
comp_gm = eva.analyze(
    ctstats_nop2p,
    param_cols=param_cols,
    col="graph_mode",
    target=["total_time", "og_edges"],
    add_dependent=True,
)
# (1 - comp_gm[("total_time", "HU-HU / Obj-Obj")]).describe()
print()
reload(pd_util)

print("Runtime reduction (full -> HU graph)")
print("abs:", comp_gm[("total_time", "Obj-Obj - HU-HU")].pipe(pd_util.mmmmstr, format=".2f", unit="\\second"))
print("rel:", (1 - comp_gm[("total_time", "HU-HU / Obj-Obj")]).pipe(pd_util.mmmmstr, format=".1%"))
print("Edge number reduction (full -> HU graph)")
print("abs:", comp_gm[("og_edges", "Obj-Obj - HU-HU")].pipe(pd_util.mmmmstr, dtype=int))
print("rel:", (1 - comp_gm[("og_edges", "HU-HU / Obj-Obj")]).pipe(pd_util.mmmmstr, format=".1%"))

print("Runtime reduction (full -> HU graph)")
display(comp_gm[("total_time", "Obj-Obj - HU-HU")].pipe(pd_util.mmmm).to_frame().style.format(precision=2))
display((1 - comp_gm[("total_time", "HU-HU / Obj-Obj")]).pipe(pd_util.mmmm).to_frame().style.format("{:.1%}"))
print("Edge number reduction (full -> HU graph)")
display(comp_gm[("og_edges", "Obj-Obj - HU-HU")].pipe(pd_util.mmmm).to_frame())
display((1 - comp_gm[("og_edges", "HU-HU / Obj-Obj")]).pipe(pd_util.mmmm).to_frame().style.format("{:.1%}"))

In [None]:
comp_gm.groupby("ocel")[[("og_edges", "Obj-Obj - HU-HU")]].apply(pd_util.mmmm).unstack()

In [None]:
comp_gm_deg = eva.analyze(
    ctstats_nop2p,
    param_cols=param_cols,
    col="graph_mode",
    target=["median_target_degree", "max_target_degree", "median_degree", "max_degree"],
    add_dependent=True,
)
comp_gm_deg[
    [
        ("median_target_degree", "Obj-Obj"),
        ("median_target_degree", "HU-HU"),
        ("median_target_degree", "Obj-Obj - HU-HU"),
        ("max_target_degree", "Obj-Obj"),
        ("max_target_degree", "HU-HU"),
        ("max_target_degree", "Obj-Obj - HU-HU"),
        ("median_degree", "Obj-Obj"),
        ("median_degree", "HU-HU"),
        ("median_degree", "Obj-Obj - HU-HU"),
        ("max_degree", "Obj-Obj"),
        ("max_degree", "HU-HU"),
        ("max_degree", "Obj-Obj - HU-HU"),
    ]
].agg(pd_util.mmmm).transpose()

In [None]:
ctstats_nop2p[ctstats_nop2p.index.to_frame()["ocel"] == "orderManagementWithDistances"][["median_target_degree", "max_target_degree", "median_degree", "max_degree"]]

In [87]:
LABEL_ORDER = [*ocel_order, "Obj-Obj", "HU-HU"]
LABEL_MAP = {**OCEL_ABBRS, **GM_LABELS}

In [None]:
import matplotlib.ticker as mtick
from matplotlib.figure import Figure
from matplotlib.axes import Axes

reload(plots)


def plot_gm_effects(
    fig: Figure | None,
    ax: Axes | None,
    *,
    create_fig: bool = True,
    save: bool = False,
):
    if not fig and not ax and create_fig:
        fig, (w, h) = plots.figure(subfigures=2, aspect=5 / 6)
    assert fig or ax
    if not ax and fig:
        ax = fig.gca()
    assert ax
    if not fig:
        fig = ax.figure  # type: ignore
    assert fig

    x = 1 - comp_gm[("og_edges", "HU-HU / Obj-Obj")]
    y = 1 - comp_gm[("total_time", "HU-HU / Obj-Obj")]
    plt.title(f"Effects of using HU graph")
    # plt.grid(axis="x", **GRID_ARGS)
    plt.grid(**GRID_ARGS)
    plt.gca().set_axisbelow(True)
    # plt.hlines(y=[0, 0.25, .5, .75, 1], xmin=-0.5, xmax=1.5, **GRID_ARGS)
    plots.colored_marked_scatter(
        # fig,
        # ax,
        x=x,
        y=y,
        color_labels=comp_gm.index.to_frame()["ocel"],
        marker_labels=comp_gm.index.to_frame()["ocel"].apply(lambda x: "xxx"),
        color_map=OCEL_COLORS,
        marker_map={"xxx": dict(marker="o")},
        label_map=LABEL_MAP,
        label_order=LABEL_ORDER,
        # color_legend=True,
        alpha=0.85,
        # marker="o",
    )
    plt.xlabel("Reduction in number of edges")
    plt.ylabel("Reduction in runtime")
    plt.xlim(-.025, x.max() + .025)
    ax.xaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
    # ax.legend(
    #     loc="lower right",
    #     bbox_to_anchor=(1, 1 + plots.LEGEND_ABOVE_TITLE_OFFSET),
    #     ncol=3,
    #     handletextpad=0.4,
    #     columnspacing=0.6,
    # )
    if save:
        export.save_ocel_plt(
            fig,
            subfigure=True,
            name="eva-scatter-gm-reduction",
            ocel_key=None,
            caption="Relative reduction in number of edges and runtime when using the HU allocation graph.",
            label="fig:eva-gm-effects",
            dry=True,
        )


plot_gm_effects(None, None)

In [None]:
# comp_gm

### Runtime comparison: `remove_otype_loops`

In [None]:
reload(eva)
reload(export)
reload(pd_util)

param_cols = ["ocel", "target_otypes", "rule", "graph_mode", "remove_otype_loops"]

ctstats = stats[stats.index.to_frame()["rule"] == "CT"]
comp_otl = eva.analyze(
    ctstats.reset_index(),
    param_cols=param_cols,
    col="remove_otype_loops",
    target=["total_time", "og_edges"],
    add_dependent=True,
)
# comp_otl

In [None]:
import matplotlib.ticker as mtick
import matplotlib.figure as mfig
reload(plots)


def plot_otl_effects(
    fig: Figure | None,
    ax: Axes | None,
    *,
    create_fig: bool = True,
    save: bool = False,
):
    if not fig and not ax and create_fig:
        fig, (w, h) = plots.figure(width=1, aspect=10 / 6)
    assert fig or ax
    if not ax and fig:
        ax = fig.gca()
    assert ax
    if not fig:
        fig = ax.figure  # type: ignore
    assert fig

    x = 1 - comp_otl[("og_edges", "True / False")]
    y = 1 - comp_otl[("total_time", "True / False")]
    plt.title(f"Effects of removing OT self-loops")
    # plt.vlines(x=[0, 0.25], ymin=-0.5, ymax=1.5, **AXLINE_STYLE)
    # plt.grid(axis="x", **GRID_ARGS)
    plt.grid(**GRID_ARGS)
    plt.gca().set_axisbelow(True)
    # plt.hlines(y=[0, 0.25], xmin=-0.5, xmax=1.5, **GRID_ARGS)
    plots.colored_marked_scatter(
        # fig,
        # ax,
        x=x,
        y=y,
        marker_labels=comp_otl.index.to_frame()["graph_mode"],
        color_map=OCEL_COLORS,
        color_labels=comp_otl.index.to_frame()["ocel"],
        marker_map=GM_MARKERS,
        label_map=LABEL_MAP,
        label_order=LABEL_ORDER,
        alpha=0.85,
        color_legend=True,
        marker_legend=True,
    )
    plt.xlabel("Reduction in number of edges")
    plt.ylabel("Reduction in runtime")
    plt.xlim(-.025, x.max() + .025)
    ax.xaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
    # ax.legend(
    #     loc="lower right",
    #     bbox_to_anchor=(1, 1 + plots.LEGEND_ABOVE_TITLE_OFFSET),
    #     ncol=2,
    #     handletextpad=0.4,
    #     columnspacing=0.6,
    # )

    if save:
        export.save_ocel_plt(
            fig,
            subfigure=True,
            name="eva-scatter-otloops-reduction",
            ocel_key=None,
            caption="Relative reduction in number of edges and runtime when removing object type self-loops. Only in few executions, the runtime increases (by up to \\qty{2.4}{\\percent}). Especially in HU graphs, edge reduction is high (up to \\qty{70.8}{\\percent}). In total, execution time is \\qty{2}{\\percent} lower.",
            label="fig:eva-otselfloops-effects",
            dry=True,
        )


plot_otl_effects(None, None)

In [None]:
nrows = 1
ncols = 2

fig, (w, h) = plots.figure(width=1, aspect=1.8)
# axs = fig.subplots(nrows, ncols)

ax = plt.subplot(nrows, ncols, 1)
ax.xaxis.set_major_locator(mtick.MultipleLocator(0.2))
ax.yaxis.set_major_locator(mtick.MultipleLocator(0.25))
plot_gm_effects(fig=fig, ax=ax, create_fig=False)

ax = plt.subplot(nrows, ncols, 2)
ax.xaxis.set_major_locator(mtick.MultipleLocator(0.2))
ax.yaxis.set_major_locator(mtick.MultipleLocator(0.25))
ax.yaxis.set_minor_locator(mtick.MultipleLocator(0.05))
plot_otl_effects(fig=fig, ax=ax, create_fig=False)

fig.tight_layout()
# fig.show()
export.save_ocel_plt(
    fig,
    # subfigure=True,
    name="eva-scatter-both-effects",
    ocel_key=None,
    caption="Relative reduction in number of edges and runtime when using the HU graph over the full graph (left) and removing OT self-loops (right). Three OCELs are distinguished by colors, the P2P OCEL is omitted on the left as it only contains HUs.",
    label="fig:eva-effects",
    width=1,
    dry=False,
)

In [None]:
plt.tight_layout()

In [None]:
reload(pd_util)

print("Runtime reduction (removing OT loops)")
print("abs:", comp_otl[("total_time", "False - True")].pipe(pd_util.mmmmstr, format=".2f", unit="\\second"))
print("rel:", (1 - comp_otl[("total_time", "True / False")]).pipe(pd_util.mmmmstr, format=".1%"))
print("Edge number reduction (removing OT loops)")
print("abs:", comp_otl[("og_edges", "False - True")].pipe(pd_util.mmmmstr, dtype=int))
print("rel:", (1 - comp_otl[("og_edges", "True / False")]).pipe(pd_util.mmmmstr, format=".1%"))

In [None]:
1 - comp[("total_time", True)].sum() / comp[("total_time", False)].sum()

In [None]:
x = stats[]

colored_scatter(
    x=x,
    y=y,
    c=comp.index.to_frame()["graph_mode"],
    color_map={"HU-HU": HU_COLOR, "Obj-Obj": RESOURCE_COLOR},
    label_map={"HU-HU": "CTHU", "Obj-Obj": "CT"},
)

In [None]:
comp[("total_time", "True - False")].describe()

In [None]:
cols = ["og_edges", "total_time"]

for i, (mode, col) in enumerate(itertools.product(["abs", "rel"], ["corr", 0, 1])):
    plt.subplot(2, 3, i + 1)

    if col == "corr":
        if mode == "rel":
            plt.scatter(
                x=1 - comp[(cols[0], "True / False")],
                y=1 - comp[(cols[1], "True / False")],
            )
            plt.hlines(y=0, xmin=0, xmax=1, color="gray", alpha=.5)
            plt.xlim(0, 1)
            plt.gca().xaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
            plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
        elif mode == "abs":
            x, y = comp[(cols[0], "True - False")], comp[(cols[1], "True - False")]
            plt.scatter(x=x, y=y)
            xmin, xmax = x.min(), x.max()
            ymin, ymax = y.min(), y.max()
            if xmin <= 0 <= xmax:
                plt.vlines(x=0, ymin=ymin, ymax=ymax, color="gray", alpha=0.5)
            if ymin <= 0 <= ymax:
                plt.hlines(y=0, xmin=xmin, xmax=xmax, color="gray", alpha=0.5)

        plt.xlabel(f"Reduction in {cols[0]}")
        plt.ylabel(f"Reduction in {cols[1]}")

    else:
        colname = cols[col]  # type: ignore
        s = "True / False" if mode == "rel" else "True - False"
        if mode == "rel":
            x = 1 - comp[(colname, s)]
        else:
            x = comp[(colname, s)]
        plt.hist(x, bins=15, rwidth=.8)
        if mode == "rel":
            plt.gca().xaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
        plt.xlabel(f"Δ {colname} ({mode}.)")
        plt.ylabel("number of runs")

plt.suptitle(f"Effects of removing object type loops")
plt.tight_layout()
plt.show()

## Verify `remove_otype_loops` has no influence

In [None]:
reload(export)

kwargs = dict(
    ocel_key=None,
    target_otypes=None,
    rule="ClosestTargets",
    graph_mode=None,
    remove_otype_loops=None,
)
_tes, _tes_meta, _ = export.load_ocel_stats(
    name="alloc_target_emissions",
    **kwargs,  # type: ignore
)  # type: ignore

In [None]:
_tes.keys()

In [None]:
_tes["p2p"]["material"]["ClosestTargets"]["HU-HU"][True].meta

In [None]:
reload(eva)
eva.analyze(
    ctstats.reset_index(),
    param_cols=params,
    col="remove_otype_loops",
    target="max_object_distance",
    rel=False,
)["True - False"].describe()

In [None]:
ps = list(dict.fromkeys([t[:-1] for t in _tes_list.keys()]))
# print(ps)
for p in ps:
    if p[2] != "CT":
        continue
    te = pd.merge(
        _tes_list[(*p, True)],
        _tes_list[(*p, False)],
        on="ocel:oid",
        suffixes=("_wo", "_w"),
    )
    print((te["ocean:object_emissions_wo"] != te["ocean:object_emissions_w"]).sum() / len(te))

In [None]:
both_available = _tes_meta.groupby(["ocel", "target_otypes", "graph_mode"])["remove_otype_loops"].nunique()

_ot_loops_stats = []

for ocel_key, tot, graph_mode_str in both_available[both_available == 2].index.to_list():
    te_w: pd.DataFrame = _tes[ocel_key][tot]["ClosestTargets"][graph_mode_str][False]  # type: ignore
    te_wo: pd.DataFrame = _tes[ocel_key][tot]["ClosestTargets"][graph_mode_str][True]  # type: ignore

    te_w = te_w.set_index("ocel:oid")["ocean:object_emissions"].rename("em_w")  # type: ignore
    te_wo = te_wo.set_index("ocel:oid")["ocean:object_emissions"].rename("em_wo")  # type: ignore
    both = pd.concat([te_w, te_wo], axis=1)

    # print(ocel_key, tot, graph_mode_str)

    if (both == 0).any().any():
        raise NotImplementedError

    both["diff"] = both["em_wo"] - both["em_w"]
    both["deviation"] = both["em_wo"] / both["em_w"] - 1

    assert np.isclose(te_w.sum(), te_wo.sum())
    assert np.isclose(both["diff"].sum(), 0)

    em_sum = te_w.sum()
    avg_em = te_w.mean()
    abs_diff = pd.Series(np.abs(both["diff"]) / avg_em)
    abs_dev = pd.Series(np.abs(both["deviation"]))

    agg_abs_diff = abs_diff.pipe(pd_util.mmmm)
    agg_abs_dev = abs_dev.pipe(pd_util.mmmm)

    _ot_loops_stats.append((ocel_key, tot, graph_mode_str, *agg_abs_diff, *agg_abs_dev))

    # print(
    #     f"Absolute difference (rel.) -- average: {abs_diff.mean() / avg_em:.3f}, max: {abs_diff.max() / avg_em:.3f}"
    # )
    # print(f"Absolute rel. deviation -- average: {abs_dev.mean():.3%}, max: {abs_dev.max()}")
    # print()
    # display(both)

ot_loops_stats = pd.DataFrame(_ot_loops_stats)
ot_loops_stats.set_index([0, 1, 2], inplace=True)
ot_loops_stats.index.names = ["ocel", "tot", "graph_mode"]
mmmm_cols = ["mean", "min", "50%", "max"]
ot_loops_stats.columns = pd.MultiIndex.from_tuples(itertools.product(["abs_diff", "abs_dev"], mmmm_cols))
ot_loops_stats_formatters = {
    **{("abs_diff", col): "{:.1%}" for col in mmmm_cols},
    **{("abs_dev", col): "{:.1%}" for col in mmmm_cols},
}
ot_loops_stats.style.format(ot_loops_stats_formatters)  # type: ignore
# abs_diff: Absolute difference of a target's two emission values, relative to the average emissions per target object.
# abs_dev: Absolute deviation (%) of a target's emission value w/o ot loops relative to w/ ot loops.

In [None]:
set(ot_loops_stats.index.get_level_values("ocel"))

In [None]:
# Only zeros for (ocel)
all_zero_ocel = ot_loops_stats.groupby("ocel").agg(lambda df: (df == 0).all())
ocel_all_zero = all_zero_ocel[all_zero_ocel.all(axis=1)].index.tolist()
print("ocel:", ocel_all_zero)
ot_loops_stats1 = ot_loops_stats[~ot_loops_stats.index.to_frame()["ocel"].isin(ocel_all_zero)]
# ot_loops_stats

# Only zeros for (ocel, tot)
all_zero_ocel_tot = ot_loops_stats1.groupby(["ocel", "tot"]).agg(lambda df: (df == 0).all())
ocel_tot_all_zero = all_zero_ocel_tot[all_zero_ocel_tot.all(axis=1)].index.tolist()
print("ocel, tot:", ocel_tot_all_zero)
ot_loops_stats1 = ot_loops_stats1[~ot_loops_stats1.index.to_frame().apply(lambda row: (row["ocel"], row["tot"]) in ocel_tot_all_zero, axis=1)]
# ot_loops_stats1

ALL = "(ALL)"

def sort_keys(keys: pd.Index):
    # keys = keys.tolist()
    if keys.name == "ocel":
        order = [
            *sorted(ocel_all_zero),
            *sorted({ocel for (ocel, tot) in ocel_tot_all_zero}),
            *set(ot_loops_stats.index.get_level_values("ocel")),
        ]
        return [order.index(k) for k in keys]
    if keys.name == "tot":
        return sorted(keys, key=lambda k: (0 if k == ALL else 1, k))
    if keys.name == "graph_mode":
        return sorted(keys, key=lambda k: (0 if k == ALL else 1, k))
    print(keys)
    return keys

ot_loops_stats2 = pd.concat(
    [
        pd.DataFrame(
            [(0,) * len(ot_loops_stats.columns)],
            index=pd.MultiIndex.from_tuples(
                [(ocel, ALL, ALL) for ocel in ocel_all_zero], names=ot_loops_stats.index.names
            ),
            columns=ot_loops_stats.columns,
        ),
        pd.DataFrame(
            [(0,) * len(ot_loops_stats.columns)],
            index=pd.MultiIndex.from_tuples(
                [(ocel, tot, ALL) for ocel, tot in ocel_tot_all_zero],
                names=ot_loops_stats.index.names,
            ),
            columns=ot_loops_stats.columns,
        ),
        ot_loops_stats1,
    ]
).sort_index(na_position="first", key=sort_keys)

ot_loops_stats2.style.format(ot_loops_stats_formatters)  # type: ignore

## Plot target emission distributions

In [None]:
_tes, _tes_meta, _ = export.load_ocel_stats(
    name="alloc_target_emissions",
    ocel_key=None,
    target_otypes=None,
    rule=None,
    graph_mode=None,
    remove_otype_loops=None,
    # user="jannes",
    replace={"rule": {"AllTargets": "AT", "ParticipatingTargets": "PT", "ClosestTargets": "CT"}},
)
_tes_list = {t: df.set_index("ocel:oid")["ocean:object_emissions"] for t, df in util.unnest_dict(_tes).items()}
_plot_tes = {
    ocel_key: {
        tot: {
            eva.get_param_str(rule, graph_mode, remove_otype_loops, mode="column"): _tes_list[
                (ocel_key, tot, rule, graph_mode, remove_otype_loops)
            ]
            # ][tot][rule][graph_mode][remove_otype_loops]
            for rule in _tes[ocel_key][tot]
            for graph_mode in _tes[ocel_key][tot][rule]
            for remove_otype_loops in _tes[ocel_key][tot][rule][graph_mode]
            if remove_otype_loops is not True
        }
        for tot in sorted(otypes[ocel_key], key=otype_order.index)
    }
    for ocel_key in _tes
}
# list(util.unnest_dict(_plot_tes).keys())

In [None]:
_plot_tes.keys()

In [None]:
import visualization.plots as plots
import math
import matplotlib.patches as mpatches # type: ignore
import matplotlib.ticker as mticker # type: ignore

mode = "plot_sorted_cum"

RULE_COLORS = {
    "PT": RWTH_MAGENTA,
    "CT": RWTH_TURQUOISE,
    "CTHU": RWTH_ORANGE,
    "AT": "gray",
}


def annotate(s: str, annot_count: int, color):
    plt.annotate(
        s,
        (0.05, 0.9 - 0.05 * annot_count),
        xycoords="axes fraction",
        color=color,
    )


def plot_target_emission_distributions(ocel_key: str, ncols: int):
    tots = sorted(otypes[ocel_key], key=otype_order.index)
    plot_rules = list(RULE_COLORS.keys())
    
    nrows = math.ceil(len(tots) / ncols)
    fig, axs = plt.subplots(
        nrows,
        ncols,
        figsize=(4 * ncols, 4 * nrows),
        sharey=mode == "plot_sorted_cum",
    )

    for i, tot in enumerate(tots):
        ax = plt.subplot(nrows, ncols, i + 1)
        annot_count = 0
        for j, rule in enumerate(_plot_tes[ocel_key][tot].keys()):
            target_emissions = _plot_tes[ocel_key][tot][rule]
            target_emissions_nonzero = target_emissions[target_emissions != 0]
            mean_target_emissions = target_emissions.mean()
            std_target_emissions = target_emissions.std()
            mean_nonzero_target_emissions = target_emissions_nonzero.mean()
            std_nonzero_target_emissions = target_emissions_nonzero.std()
            variation_nonzero_target_emissions = (
                std_nonzero_target_emissions / mean_nonzero_target_emissions
            )
            ntargets = len(target_emissions)
            color = RULE_COLORS[rule]
            fill = rule != "AT"
            if mode == "plot_sorted" or mode == "plot_sorted_cum":
                target_emissions_sorted = target_emissions.sort_values(ascending=True)
                if mode == "plot_sorted_cum":
                    xs = np.arange(ntargets + 1)
                    target_emissions_sorted = target_emissions_sorted.cumsum()
                    target_emissions_sorted = pd.concat([pd.Series([0]), target_emissions_sorted])
                elif mode == "plot_sorted":
                    xs = np.arange(ntargets)
                plots.filled_plot(
                    xs,
                    target_emissions_sorted.values,
                    label=rule,
                    color=color,
                    fill_alpha=0.2 if fill else 0,
                )
                annotate(
                    # rf"$\sigma_\text{{{rule_abbr}}} = {std_nonzero_target_emissions:.2f}$",
                    rf"$\mathrm{{CV}}_\text{{{rule}}} = {variation_nonzero_target_emissions:.2f}$",
                    annot_count,
                    color=color,
                )
                annot_count += 1
            elif mode == "hist":
                plt.hist(
                    target_emissions,
                    bins=10,
                    rwidth=0.8,
                    alpha=0.75,
                    label=rule,
                    color=color,
                )

        eq_rules = []
        for r1, r2 in itertools.combinations(plot_rules, 2):
            if np.allclose(
                _plot_tes[ocel_key][tot][r1],
                _plot_tes[ocel_key][tot][r2],
            ):
                eq_rules.append((r1, r2))
        if eq_rules:
            eq_rule_graph = nx.from_edgelist(eq_rules)
            eq_rule_cliques = list(nx.find_cliques(eq_rule_graph))
            for eq_rule_group in eq_rule_cliques:
                annotate(
                    " = ".join(sorted(eq_rule_group, key=plot_rules.index)),
                    annot_count,
                    color="black",
                )

                annot_count += 1

        plt.title(tot)
    # plt.title(f"'{tot}' Target object emissions")

    # plt.annotate(rf"$\mu = {mean_target_emissions:.1f}$", (0.05, 0.95), xycoords="axes fraction")

        if mode == "hist":
            plt.xlabel("Target object emissions [kgCO2e]")
            plt.ylabel("Number of target objects")
        if mode == "plot_sorted" or mode == "plot_sorted_cum":
            ax.xaxis.set_major_locator(
                mticker.MaxNLocator(
                    integer=True,
                    nbins=min(10, max(5, 12 - 2 * int(math.log10(ntargets)))),
                )
            )
            plt.margins(0)
            plt.xlabel("Target objects")
            if mode == "plot_sorted":
                plt.ylabel("Emissions [kgCO2e]")
            if mode == "plot_sorted_cum":
                plt.ylabel("Emissions [kgCO2e] (cum.)")

    plt.legend(
        handles=[mpatches.Patch(color=RULE_COLORS[rule], label=rule) for rule in plot_rules],
        loc="upper right" if mode == "hist" else "lower right",
    )
    plt.tight_layout()
    return fig

for ocel_key in _tes.keys():
    fig = plot_target_emission_distributions(ocel_key, ncols=3)
    # plot.show()

    export.save_ocel_plt(
        fig,
        dry=True,
        name=f"eva-target-emission-distribution",
        ocel_key=ocel_key,
        label=f"fig:eva-{ocel_key}-te-distr",
        caption=f"""Target emission distribution for the \\ocelkey{{{ocel_key}}} log after applying different allocation rules.
    In each plot, the objects of a different type has been set as target objects.
    The target emission values are sorted ascendingly and cumulated. Additionally, the variation coefficient ($\\CV$) is given.
    The AllTargets rule (gray) always distributes emissions uniformly among all targets.
    The ClosestTargets rule using the HU-HU allocation graph always yields the highest variation.
    """,
        position="t",
        width=1,
    )