In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
from itertools import combinations

In [2]:
mh_names = {
    "lns" : "LNS",
    "simulated_annealing" : "SA",
    "large_neighborhood_search_simulated_annealing" : "LNS+SA",
    # "tabu_search" : "TS",
    # "tabu_search_lns" : "TS+LNS",
    # "lns_ts_simulated_annealing" : "LNS+TS+SA",
}

In [3]:
# Read the CSV file without specifying dtypes
all_runs = pd.read_csv(
    'artifacts\\evaluations_first_run.csv',
)
#     names=[
#         "MetaName", 
#         "AlgoConfig", 
#         "Timeout",
#         "Parameters",
#         "StartSol_Profit", 
#         "MH_Profit", 
#         "Runtime"
#     ]
# )

# # Convert columns to the desired types manually
# all_runs["Timeout"] = pd.to_numeric(all_runs["Timeout"], errors='coerce')
# all_runs["StartSol_Profit"] = pd.to_numeric(all_runs["StartSol_Profit"], errors='coerce')
# all_runs["MH_Profit"] = pd.to_numeric(all_runs["MH_Profit"], errors='coerce')
# all_runs["Runtime"] = pd.to_numeric(all_runs["Runtime"], errors='coerce')

# # Calculate the Improvement column
# all_runs["Improvement"] = all_runs["MH_Profit"] - all_runs["StartSol_Profit"]

# # Display the first few rows to verify
all_runs

Unnamed: 0.1,Unnamed: 0,MetaName,AlgoConfig,Timeout,Parameters,StartSol_Profit,MH_Profit,Runtime,Improvement
0,0,simulated_annealing,"{'n': 100, 't': 0.666, 's': 4, 'r': 2, 'p': ar...",30.0,"{'alpha': 0.85, 'epsilon': 0.001, 'neighborhoo...",388243.0,398345.000000,28.218750,10102.0
1,1,simulated_annealing,"{'n': 100, 't': 0.666, 's': 4, 'r': 2, 'p': ar...",30.0,"{'alpha': 0.95, 'epsilon': 0.001, 'neighborhoo...",388243.0,388516.000000,27.812500,273.0
2,2,simulated_annealing,"{'n': 100, 't': 0.666, 's': 4, 'r': 2, 'p': ar...",30.0,"{'alpha': 0.95, 'epsilon': 0.0001, 'neighborho...",388243.0,396649.000000,29.281250,8406.0
3,3,simulated_annealing,"{'n': 100, 't': 0.666, 's': 4, 'r': 2, 'p': ar...",30.0,"{'alpha': 0.95, 'epsilon': 0.0001, 'neighborho...",388243.0,388417.000000,27.765625,174.0
4,4,simulated_annealing,"{'n': 100, 't': 0.666, 's': 4, 'r': 2, 'p': ar...",30.0,"{'alpha': 0.95, 'epsilon': 0.001, 'neighborhoo...",388243.0,396243.000000,28.093750,8000.0
...,...,...,...,...,...,...,...,...,...
1539,tabu_search,"{'n': 50, 't': 0.666, 's': 4, 'r': 2, 'p': arr...",30.0,"{'max_size_tabu_list': 3, 'neighborhood': 'sel...",96745,100024.0,29.531250,,
1540,tabu_search,"{'n': 50, 't': 0.666, 's': 4, 'r': 2, 'p': arr...",30.0,"{'max_size_tabu_list': 5, 'neighborhood': 'ran...",96745,100624.0,29.812500,,
1541,tabu_search,"{'n': 50, 't': 0.666, 's': 4, 'r': 2, 'p': arr...",30.0,"{'max_size_tabu_list': 5, 'neighborhood': 'sel...",96745,99806.0,29.734375,,
1542,tabu_search,"{'n': 50, 't': 0.666, 's': 4, 'r': 2, 'p': arr...",30.0,"{'max_size_tabu_list': 7, 'neighborhood': 'ran...",96745,100783.0,29.750000,,


In [4]:
test = all_runs.loc[
    (all_runs.MetaName == "lns") |
    (all_runs.MetaName == "simulated_annealing") |
    (all_runs.MetaName == "large_neighborhood_search_simulated_annealing")
]

test = test.set_index("Unnamed: 0")

In [5]:
def find_best_overall_configuration(data, meta_name, decision_argument):
    assert meta_name in mh_names.keys(), "This metaheuristic isn't available, yet"
    assert decision_argument in ["MH_Profit", "Runtime"], "You can only decide by MH_Profit or Runtime"


    algorithm_data = data.loc[data.MetaName == meta_name].copy()            # Filter for the specified algorithm
    algorithm_data['Parameters'] = algorithm_data['Parameters'].apply(ast.literal_eval)         # Parse the Parameters column to extract parameter values
    algorithm_data['Parameters_tuple'] = algorithm_data['Parameters'].apply(lambda x: tuple(sorted(x.items()))) # Convert the Parameters column from dictionaries to frozensets for hashing

    # Count the frequency of each configuration achieving the best profit
    if decision_argument == "MH_Profit":
        algorithm_data['best'] = algorithm_data.groupby('AlgoConfig')[f'{decision_argument}'].transform('max') == algorithm_data[f'{decision_argument}']
    if decision_argument == "Runtime":
        algorithm_data['best'] = algorithm_data.groupby('AlgoConfig')[f'{decision_argument}'].transform('min') == algorithm_data[f'{decision_argument}']
    counts = algorithm_data[algorithm_data['best']].groupby('Parameters_tuple').size().reset_index(name='count')

    best = counts.loc[counts['count'].idxmax()]     # Identify the best configuration based on the frequency

    return dict(best['Parameters_tuple'])

def get_configs(ccc):
    return tuple(sorted(ast.literal_eval(ccc).items()))

def plot_comparison(data, meta1, meta2, decision_variable="MH_Profit"):
    assert (meta1 != meta2), "You can only compare different metaheuritics"
    assert (meta1 in mh_names.keys()) and (meta2 in mh_names.keys()), f"At least one of your metaheuristics is not in {mh_names.keys()}"

    conf1 = find_best_overall_configuration(data, meta1, decision_variable)
    conf2 = find_best_overall_configuration(data, meta2, decision_variable)    
    
    runs1 = data.loc[
        (data.MetaName == meta1) & 
        (data['Parameters'].apply(get_configs) == tuple(sorted(conf1.items())))
    ].copy()
    runs2 = data.loc[
        (data.MetaName == meta2) & 
        (data['Parameters'].apply(get_configs) == tuple(sorted(conf2.items())))
    ].copy()

    if runs1.shape != runs2.shape:
        print(meta1, meta2)
        print(runs1.shape, runs2.shape)
        return

    fig, axes = plt.subplots(1,2, figsize=(10, 5))

    for ii, var in enumerate(["Improvement", "Runtime"]):
        ax_min = np.minimum(runs1[var].min(), runs2[var].min())
        ax_max = np.maximum(runs1[var].max(), runs2[var].max())

        scatter = axes[ii].scatter(runs1[var], runs2[var], marker='x')
        axes[ii].plot(
            [ax_min, ax_max],
            [ax_min, ax_max], 
            'r--'
        )
        axes[ii].set_xlabel(f'{var} ({mh_names[meta1]})', fontsize=8)
        axes[ii].set_ylabel(f'{var} ({mh_names[meta2]})', fontsize=8)
        axes[ii].set_title(f'Compare ({mh_names[meta1]} vs {mh_names[meta2]})', fontsize=8)
        axes[ii].grid(True)

    plt.show()



In [None]:
# plot_comparison(all_runs, 'lns', 'simulated_annealing')

for m1, m2 in list(combinations(mh_names.keys(), 2)):
    
    print(m1, m2)
    plot_comparison(test, m1, m2)