In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import os
import regex as re
from itertools import combinations

In [2]:
unclean = pd.read_csv(
    'artifacts\\evaluations_first_run.csv', index_col=None
)

unclean_before = unclean.iloc[:1125]
unclean_after = unclean.iloc[1125:]

unclean_before = unclean_before.drop(unclean_before.columns[0], axis=1)
unclean_before = unclean_before.drop(unclean_before.columns[-1], axis=1)

unclean_after = unclean_after.drop(unclean_after.columns[-2:], axis=1)
unclean_after.columns = unclean_before.columns

cleaned = pd.concat(
    [
        unclean_before, 
        unclean_after
    ]
)
cleaned.to_csv(
    "artifacts/all_runs_cleaned.csv", index=None
)
all_runs = pd.read_csv(
    "artifacts/all_runs_cleaned.csv"
)

all_runs["Timeout"] = pd.to_numeric(all_runs["Timeout"], errors='coerce')
all_runs["StartSol_Profit"] = pd.to_numeric(all_runs["StartSol_Profit"], errors='coerce')
all_runs["MH_Profit"] = pd.to_numeric(all_runs["MH_Profit"], errors='coerce')
all_runs["Runtime"] = pd.to_numeric(all_runs["Runtime"], errors='coerce')
all_runs["Improvement"] = all_runs["MH_Profit"] - all_runs["StartSol_Profit"]

all_runs["AlgoConfig"] = all_runs.AlgoConfig.apply(lambda x: x.replace("array","").replace("(","").replace(")",""))
all_runs['AlgoConfig_parsed'] = all_runs['AlgoConfig'].apply(ast.literal_eval)
normAC = pd.json_normalize(all_runs["AlgoConfig_parsed"]).add_prefix("AlgoConfig.")
all_runs = all_runs.join(normAC)

all_runs["Parameters"] = all_runs.Parameters.apply(lambda x: x.replace("array","").replace("(","").replace(")",""))
all_runs['Parameters_parsed'] = all_runs['Parameters'].apply(ast.literal_eval)
normAC = pd.json_normalize(all_runs["Parameters_parsed"]).add_prefix("Parameters.")
all_runs = all_runs.join(normAC)

all_runs = all_runs.loc[all_runs["AlgoConfig.n"] != 6] ### Remove example.in

In [18]:
print(
    all_runs.iloc[:, 9].unique(),
    all_runs.iloc[:, 10].unique(),
    all_runs.iloc[:, 11].unique(),
    all_runs.iloc[:, 12].unique(),
)

[100  10  20  50] [0.666] [4] [2]


In [3]:
mh_names = {
    "lns" : "LNS",
    "simulated_annealing" : "SA",
    "large_neighborhood_search_simulated_annealing" : "LNS+SA",
    "tabu_search" : "TS",
    "tabu_search_lns" : "TS+LNS",
    "lns_ts_simulated_annealing" : "LNS+TS+SA",
    "reactive_tabu_search" : "RTS",
    # "lns_gc" : "LNS+GC"
}


# @article{gramazio-2017-ccd,
#   author={Gramazio, Connor C. and Laidlaw, David H. and Schloss, Karen B.},
#   journal={IEEE Transactions on Visualization and Computer Graphics},
#   title={Colorgorical: creating discriminable and preferable color palettes for information visualization},
#   year={2017}
# }
mh_colors = {
    "lns" : "#256676", 
    "simulated_annealing" : "#63ef85", 
    "large_neighborhood_search_simulated_annealing" : "#eb1241", 
    "tabu_search" : "#20d8fd", 
    "tabu_search_lns" : "#9c3190", 
    "lns_ts_simulated_annealing" : "#afc6fe",
    "reactive_tabu_search" : "#4d57a8", 
    # "lns_gc" : "#ffa8ff",   
}

In [12]:
def find_best_overall_configuration(data, meta_name, decision_argument):
    assert meta_name in mh_names.keys(), "This metaheuristic isn't available, yet"
    assert decision_argument in ["MH_Profit", "Runtime"], "You can only decide by MH_Profit or Runtime"


    algorithm_data = data.loc[data.MetaName == meta_name].copy()            # Filter for the specified algorithm
    algorithm_data['Parameters'] = algorithm_data['Parameters'].apply(ast.literal_eval)         # Parse the Parameters column to extract parameter values
    algorithm_data['Parameters_tuple'] = algorithm_data['Parameters'].apply(lambda x: tuple(sorted(x.items()))) # Convert the Parameters column from dictionaries to frozensets for hashing

    # Count the frequency of each configuration achieving the best profit
    if decision_argument == "MH_Profit":
        algorithm_data['best'] = algorithm_data.groupby('AlgoConfig')[f'{decision_argument}'].transform('max') == algorithm_data[f'{decision_argument}']
    if decision_argument == "Runtime":
        algorithm_data['best'] = algorithm_data.groupby('AlgoConfig')[f'{decision_argument}'].transform('min') == algorithm_data[f'{decision_argument}']
    counts = algorithm_data[algorithm_data['best']].groupby('Parameters_tuple').size().reset_index(name='count')

    best = counts.loc[counts['count'].idxmax()]     # Identify the best configuration based on the frequency

    return dict(best['Parameters_tuple'])

def pd_inner_json(ccc):
    return tuple(sorted(ast.literal_eval(ccc).items()))

def plot_comparison(data, meta1, meta2, decision_variable="MH_Profit"):
    assert (meta1 != meta2), "You can only compare different metaheuritics"
    assert (meta1 in mh_names.keys()) and (meta2 in mh_names.keys()), f"At least one of your metaheuristics is not in {mh_names.keys()}"

    output_dir = 'evaluation_plots/PerformaceProfiles'
    os.makedirs(output_dir, exist_ok=True)

    conf1 = find_best_overall_configuration(data, meta1, decision_variable)
    conf2 = find_best_overall_configuration(data, meta2, decision_variable)    
    
    runs1 = data.loc[
        (data.MetaName == meta1) & 
        (data['Parameters'].apply(pd_inner_json) == tuple(sorted(conf1.items())))
    ].copy()
    runs2 = data.loc[
        (data.MetaName == meta2) & 
        (data['Parameters'].apply(pd_inner_json) == tuple(sorted(conf2.items())))
    ].copy()


    runs1.sort_values(by=["AlgoConfig"], inplace=True)
    runs2.sort_values(by=["AlgoConfig"], inplace=True)

    unique_colors = runs1['AlgoConfig.n'].unique()
    unique_colors = sorted(unique_colors)
    color_map = {val: idx for idx, val in enumerate(unique_colors)}
    colors1 = runs1['AlgoConfig.n'].map(color_map)
    colors2 = runs2['AlgoConfig.n'].map(color_map)
    palette = plt.get_cmap("tab20b", len(unique_colors))

    fig, axes = plt.subplots(1,2, figsize=(12, 6))

    for ii, var in enumerate(["Improvement", "Runtime"]):
        ax_min = np.minimum(runs1[var].min(), runs2[var].min())
        ax_max = np.maximum(runs1[var].max(), runs2[var].max())

        scatter = axes[ii].scatter(runs1[var], runs2[var], c=colors1, cmap=palette, marker='x')
        axes[ii].plot(
            [ax_min, ax_max],
            [ax_min, ax_max], 
            'r--'
        )
        axes[ii].set_xlabel(f'{var} ({mh_names[meta1]})', fontsize=9)
        axes[ii].set_ylabel(f'{var} ({mh_names[meta2]})', fontsize=9)
        axes[ii].set_title(f'Compare ({mh_names[meta1]} vs {mh_names[meta2]})', fontsize=11)
        axes[ii].grid(True)

    handles, labels = scatter.legend_elements(prop="colors")
    legend_labels = [f"{label}" for label in unique_colors]
    axes[1].legend(handles, legend_labels, title="#Teams", bbox_to_anchor=(1.25, 1.02))

    plot_filename = os.path.join(output_dir, f'pps_{mh_names[meta1]}_{mh_names[meta2]}.png')
    plt.savefig(plot_filename, dpi=300, format="png", bbox_inches="tight")
    plt.close()
    # plt.show()

for m1, m2 in list(combinations(mh_names.keys(), 2)):
    plot_comparison(all_runs, m1, m2)

In [15]:
def boxplots_per_n(data, decision_variable="MH_Profit"):
    output_dir = 'evaluation_plots/Boxplots'
    os.makedirs(output_dir, exist_ok=True)

    unique_n_values = sorted(data['AlgoConfig.n'].unique())
    df_best_confs = None #pd.DataFrame(columns=data.columns)
    for meta in mh_names.keys():
        conf = find_best_overall_configuration(data, meta, decision_variable)    
        runs_meta = data.loc[
            (data.MetaName == meta) & 
            (data['Parameters'].apply(pd_inner_json) == tuple(sorted(conf.items())))
        ].copy()
        df_best_confs = pd.concat(
            [df_best_confs, runs_meta]
        )


    # Für jeden einzigartigen Wert von AlgoConfig.n eine eigene Grafik erstellen und speichern
    for n_value in unique_n_values:

        plt.figure(figsize=(9, 6))
        ax = sns.boxplot(x='AlgoConfig.n', y='Improvement', hue='MetaName',  data=df_best_confs[df_best_confs['AlgoConfig.n'] == n_value], showfliers=False, width=0.8, palette=mh_colors)
        plt.title(f'Boxplot of improvement by metaheuristic', fontsize=14)
        plt.xlabel('')
        plt.xticks(ticks=[], labels=[])
        plt.ylabel('Improvement', fontsize=11)
        plt.grid(axis='y')

        handles, labels = ax.get_legend_handles_labels()
        labels = [mh_names[label] for label in labels]
        plt.legend(handles=handles, labels=labels, title='Metaheuristic', bbox_to_anchor=(1.25, 1.02))
        

        # Speichern der Grafik
        plot_filename = os.path.join(output_dir, f'boxplot_AlgoConfig_n_{n_value}.png')
        plt.savefig(plot_filename, dpi=300, format="png", bbox_inches="tight")
        plt.close()
        # plt.show()

boxplots_per_n(all_runs)

In [6]:
variable = "Runtime"
n = 10

result = all_runs.loc[
    all_runs["AlgoConfig.n"] == n
].groupby(
    ["MetaName", "AlgoConfig.n"]
)[[variable]].agg(
    Min=(variable, lambda x: x.min()),
    Max=(variable, lambda x: x.max()),
    Q25=(variable, lambda x: x.quantile(0.25)),
    Q50=(variable, lambda x: x.quantile(0.50)),
    Q75=(variable, lambda x: x.quantile(0.75)),   
).copy()

result = result.rename_axis(index={'MetaName': 'Meta Name', 'AlgoConfig.n': 'n'}).reset_index()
result["Meta Name"] = result["Meta Name"].replace(mh_names)

sort_order = ["LNS","SA","TS","LNS+SA","TS+LNS","LNS+TS+SA","RTS"]
result['Meta Name'] = pd.Categorical(result['Meta Name'], categories=sort_order, ordered=True)
result = result.sort_values('Meta Name')

cols = [col for col in result.columns if col != "n"]
result = result[cols]

# Funktion zum Markieren der größten Werte
def highlight_max(s):
    is_max = s == s.min()
    return ['\\textbf{' + '{:.1f}'.format(v) + '}' if m else '{:.1f}'.format(v) for v, m in zip(s, is_max)]

# Markiere die größten Werte in den relevanten Spalten
result['Min'] = highlight_max(result['Min'])
result['Max'] = highlight_max(result['Max'])
result['Q25'] = highlight_max(result['Q25'])
result['Q50'] = highlight_max(result['Q50'])
result['Q75'] = highlight_max(result['Q75'])


latex_table = result.to_latex(
        index=False,
        formatters={"Meta Name": str.upper},
        float_format="{:.1f}".format,
)

latex_table_with_title = r"""\begin{table}[ht]
\centering
""" + latex_table + r"""\caption{Results of """ + variable + f" with {n} teams" + "}\n" + r"\end{table}"

print(latex_table_with_title)

\begin{table}[ht]
\centering
\begin{tabular}{llllll}
\toprule
Meta Name & Min & Max & Q25 & Q50 & Q75 \\
\midrule
LNS & 0.7 & 2.3 & 0.8 & 0.9 & 1.2 \\
SA & 0.5 & 3.8 & 0.9 & 1.3 & 2.0 \\
TS & 0.7 & 2.5 & 0.9 & 1.1 & 1.4 \\
LNS+SA & 29.8 & 29.9 & 29.8 & 29.8 & 29.8 \\
TS+LNS & 0.5 & \textbf{1.5} & \textbf{0.7} & \textbf{0.8} & \textbf{1.0} \\
LNS+TS+SA & 29.8 & 29.9 & 29.8 & 29.8 & 29.8 \\
RTS & \textbf{0.4} & 3.2 & 0.9 & 1.1 & 1.3 \\
\bottomrule
\end{tabular}
\caption{Results of Runtime with 10 teams}
\end{table}


##### Vielleicht noch nützlich

In [7]:
decision_variable="MH_Profit"
data = test.copy()

for mh in mh_names.keys():
    conf = find_best_overall_configuration(test, mh, decision_variable)
    runs = data.loc[
        (data.MetaName == mh) & 
        (data['Parameters'].apply(get_configs) == tuple(sorted(conf.items())))
    ].copy()

    fig, axes = plt.subplots(1,1, figsize=(10, 5))
    step = axes.step(runs.Runtime, runs.Improvement)

    

NameError: name 'test' is not defined