In [None]:
import sys
import pandas as pd
import numpy as np
from scipy.stats import kendalltau
import matplotlib.pyplot as plt
import seaborn as sns
import os
from collections import defaultdict

In [None]:
# Define colors for consistency
COLORS = {
    'Random': '#ff7f0e',  # Orange
    'Random Walk': '#1f77b4',  # Blue
    'Greedy': '#2ca02c',  # Green
    'Steepest': '#d62728',  # Red
    'Heuristic': '#9467bd'   # Purple
}

In [None]:
def clean_solution(solution_str):
    solution = solution_str.strip('[]').split()
    return [int(x) for x in solution]

csv_path= "results/results_2025-04-03T09_23.csv"  # Change this to your CSV path
df = pd.read_csv(csv_path)
df["Solution"] = df["Solution"].apply(clean_solution)
df.head()

In [None]:
df

In [None]:
def load_optimal_solution(instance_name):
    base_name = os.path.splitext(instance_name)[0]
    sln_path = f"instances/{base_name}.sln"

    try:
        with open(sln_path, 'r') as f:
            lines = f.readlines()
            _, optimal_fitness = map(int, lines[0].strip().split())
            optimal_solution = []
            for line in lines[1:]:
                optimal_solution.extend(map(int, line.strip().split()))
            return optimal_fitness, optimal_solution
    except:
        return None

instances = df['Instance'].unique()
best_solution_fitness = {}
best_solutions = {}

for instance in instances:
    result = load_optimal_solution(instance)
    if result:
        best_solution_fitness[instance], best_solutions[instance] = result


In [None]:
def load_optimal_solution(instance_name):
    base_name = os.path.splitext(instance_name)[0]
    sln_path = f"instances/{base_name}.sln"

    try:
        with open(sln_path, 'r') as f:
            lines = f.readlines()
            _, optimal_fitness = map(int, lines[0].strip().split())
            optimal_solution = []
            for line in lines[1:]:
                optimal_solution.extend(map(int, line.strip().split()))
            return optimal_fitness, optimal_solution
    except:
        return None

instances = df['Instance'].unique()
best_solution_fitness = {}
best_solutions = {}

for instance in instances:
    result = load_optimal_solution(instance)
    if result:
        best_solution_fitness[instance], best_solutions[instance] = result

In [None]:
df['GapToBest'] = df.apply(lambda row: 
    (row['FinalFitness'] - best_solution_fitness.get(row['Instance'], row['FinalFitness'])) / 
    best_solution_fitness.get(row['Instance'], row['FinalFitness']) * 100, axis=1)

df['ImprovementPercent'] = ((df['InitialFitness'] - df['FinalFitness']) / df['InitialFitness']) * 100
df['EvalsPerSecond'] = df['Evaluations'] / ((df['TimeMs'] / 1000) + 1e-9)
df['Efficiency'] = df['EvalsPerSecond'] / (df['FinalFitness'] + 1e-9)
df.head()

In [None]:
best_solutions

In [None]:
def analyze_qap_results(df):
    instances = df['Instance'].unique()
    
    best_solution_fitness = {}
    best_solutions = {}
    
    for instance in instances:
        best_solution_fitness[instance], best_solutions[instance] = load_optimal_solution(instance)

    # Add gap to best known solution
    df['GapToBest'] = df.apply(lambda row: (row['FinalFitness'] - best_solution_fitness[row['Instance']]) / best_solution_fitness[row['Instance']] * 100, axis=1)
    
    # Add improvement percentage
    df['ImprovementPercent'] = ((df['InitialFitness'] - df['FinalFitness']) / df['InitialFitness'] * 100)
    
    # Add evaluations per second
    df['EvalsPerSecond'] = df['Evaluations'] / ((df['TimeMs'] / 1000) + 1e-9)
    
    return df
df = analyze_qap_results(df)

In [None]:
def plot_gap_to_best(df):
    plt.figure(figsize=(14, 8))

    # Order instances by mean GapToBest
    instance_order = df.groupby("Instance")["GapToBest"].mean().sort_values().index
    df["Instance"] = pd.Categorical(df["Instance"], categories=instance_order, ordered=True)
    instances = list(instance_order)

    # Calculate mean and std by instance and solver
    gap_stats = df.groupby(['Instance', 'Solver'])['GapToBest'].agg(['mean', 'std']).reset_index()
    solvers = df['Solver'].unique()

    for solver in solvers:
        means = []
        stds = []
        for instance in instances:
            filtered = gap_stats[(gap_stats["Instance"] == instance) & (gap_stats["Solver"] == solver)]
            if not filtered.empty:
                mean_val = filtered["mean"].values[0]
                std_val = filtered["std"].values[0]
                means.append(mean_val if mean_val > 0 else mean_val + 1e-2)
                stds.append(std_val)
            else:
                means.append(np.nan)
                stds.append(0)

        x = np.arange(len(instances))
        means = np.array(means)
        stds = np.array(stds)
        lower = means - stds
        upper = means + stds

        # Ensure all y values are positive for log scale
        lower = np.where(lower <= 0, 1e-3, lower)

        plt.plot(x, means, label=solver, color=COLORS[solver], linestyle='--', marker='o')
        plt.fill_between(x, lower, upper, color=COLORS[solver], alpha=0.2)

    plt.xlabel('Instance')
    plt.ylabel('Gap to Best Known Solution (%)')
    plt.title('Average Gap to Best Known Solution by Solver and Instance')
    plt.xticks(np.arange(len(instances)), instances, rotation=45)
    plt.yscale("log")
    plt.legend()
    plt.tight_layout()
    plt.savefig('new_plots/gap_to_best.png')
    plt.show()


plot_gap_to_best(df)

In [None]:
def plot_initial_vs_final_grid(df):
    solvers = [s for s in df['Solver'].unique() if s != "Heuristic"]
    instances = df['Instance'].unique()

    for instance in instances:
        instance_df = df[df['Instance'] == instance]
        fig, axs = plt.subplots(2, 2, figsize=(12, 10))
        axs = axs.flatten()  # Flatten 2D array to 1D for easier indexing

        for i, solver in enumerate(solvers):
            solver_df = instance_df[instance_df['Solver'] == solver]
            ax = axs[i]

            ax.scatter(solver_df['InitialFitness'], solver_df['FinalFitness'],
                       label=solver, alpha=0.7, color=COLORS[solver])
            ax.set_title(solver)
            ax.set_xlabel("Initial Fitness")
            ax.set_ylabel("Final Fitness")
            ax.legend()

        # Hide any unused subplots (in case < 4 solvers)
        for j in range(len(solvers), 4):
            fig.delaxes(axs[j])

        instance_name = instance.split(sep=".")[0]

        fig.suptitle(f'Initial vs Final Fitness for {instance_name}', fontsize=16)
        plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust for suptitle
        plt.savefig(f'new_plots/initial_vs_final_{instance_name}.png')
        plt.show()

plot_initial_vs_final_grid(df)


In [None]:
def plot_efficiencies(df):
    df['Efficiency'] = df['EvalsPerSecond'] / (df['FinalFitness'] + 1e-9)
    instances = df['Instance'].unique()
    solvers = df['Solver'].unique()

    metrics = {
        "evaluations_per_second.png": "EvalsPerSecond",
        "evaluations_count.png": "Evaluations",
        "execution_time.png": "TimeMs",
        "best_final.png": "FinalFitness",
        "efficiency.png": "Efficiency"
    }

    titles = {
        "evaluations_per_second.png": "Average Number of Evaluations per Second",
        "evaluations_count.png": "Average Number of Function Evaluations",
        "execution_time.png": "Average Execution Time",
        "best_final.png": "Average Final Fitness",
        "efficiency.png": "Average Efficiency"
    }

    y_labels = {
        "evaluations_per_second.png": "Evaluations per Second",
        "evaluations_count.png": "Number of Evaluations",
        "execution_time.png": "Time (ms)",
        "best_final.png": "Fitness",
        "efficiency.png": "Efficiency (EvalsPerSecond / FinalFitness)"
    }

    for filename, metric in metrics.items():
        plt.figure(figsize=(14, 8))

        for solver in solvers:
            means = []
            stds = []

            for instance in instances:
                filtered = df[(df["Solver"] == solver) & (df["Instance"] == instance)]
                if not filtered.empty:
                    mean_val = filtered[metric].mean()
                    std_val = filtered[metric].std()
                    means.append(mean_val + 1e-2)
                    stds.append(std_val)
                else:
                    means.append(np.nan)
                    stds.append(0)

            x = np.arange(len(instances))
            means = np.array(means)
            stds = np.array(stds)

            lower = means - stds
            upper = means + stds
            lower = np.where(lower <= 0, 1e-3, lower)  # Avoid log(0)

            plt.plot(x, means, label=solver, color=COLORS[solver], linestyle='--', marker='o')
            plt.fill_between(x, lower, upper, color=COLORS[solver], alpha=0.2)

        plt.xlabel('Instance')
        plt.ylabel(y_labels[filename])
        plt.title(titles[filename])
        plt.xticks(np.arange(len(instances)), instances, rotation=45)
        plt.yscale('log')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f"new_plots/{filename}")
        plt.show()
        
plot_efficiencies(df)

In [None]:
def plot_improvement_distribution(df):
    # Now plot improvement by instance
    plt.figure(figsize=(14, 8))
    
    # Group by instance and solver
    imp_data = df.groupby(['Instance', 'Solver'])['ImprovementPercent'].mean().reset_index()
    
    # For each solver, create a set of bars
    instances = df['Instance'].unique()
    solvers = df['Solver'].unique()
    width = 5 / len(solvers)
    offset = 0
    
    for instance in sorted(instances):
        instance_data = imp_data[imp_data['Instance'] == instance]
        
        # Sort solvers within the instance by ImprovementPercent
        instance_data = instance_data.sort_values(by="ImprovementPercent", ascending=True)
        
        # Plot each solver's improvement
        for i, solver in enumerate(instance_data['Solver']):
            solver_data = instance_data[instance_data['Solver'] == solver]
            x_position = offset + i 
            
            plt.bar(x_position, solver_data['ImprovementPercent'].values[0], width=width, label=solver if offset == 0 else "", color=COLORS[solver])
        
        offset += len(instance_data) + 1 
    
    plt.xlabel('Instance')
    plt.ylabel('Average Improvement (%)')
    plt.title('Average Solution Improvement by Instance and Solver')
    plt.xticks((np.arange(len(instances))*(len(solvers)+1))+(len(solvers)//2), instances, rotation=45)

    plt.legend()
    plt.tight_layout()
    plt.savefig('new_plots/improvement_by_instance.png')
    plt.show()

plot_improvement_distribution(df)

In [None]:
df["optimal_solution"] = None

In [None]:
for index, row in df.iterrows():
    instance_value = row['Instance']
    
    # Check if 'Instance' value exists in the best_solutions dictionary
    if isinstance(instance_value, list):
        instance_value = str(instance_value)  # Convert list to a string, if needed

    # Assign the corresponding value from the dictionary to the new column
    if instance_value in best_solutions:
        df.at[index, 'optimal_solution'] = best_solutions[instance_value]
    else:
        df.at[index, 'optimal_solution'] = None

In [None]:
def normalized_kendalltau(row):
    # Extract the lists from the row
    solution = row['Solution']
    optimal_solution = row['optimal_solution']
    
    # Calculate Kendall tau correlation
    tau, p_value = kendalltau(solution, optimal_solution)
    
    # Return the tau value (which is already normalized between -1 and 1)
    return tau

# Apply the function to each row and create a new column
df['normalized_kendalltau'] = df.apply(normalized_kendalltau, axis=1)

In [None]:
def plot_improvement_distribution(df):
    # Now plot improvement by instance
    plt.figure(figsize=(14, 8))
    
    # Group by instance and solver
    imp_data = df.groupby(['Instance', 'Solver'])['normalized_kendalltau'].mean().reset_index()
    
    # For each solver, create a set of bars
    instances = df['Instance'].unique()
    solvers = df['Solver'].unique()
    width = 5 / len(solvers)
    offset = 0
    
    for instance in sorted(instances):
        instance_data = imp_data[imp_data['Instance'] == instance]
        
        # Sort solvers within the instance by ImprovementPercent
        instance_data = instance_data.sort_values(by="normalized_kendalltau", ascending=True)
        
        # Plot each solver's improvement
        for i, solver in enumerate(instance_data['Solver']):
            solver_data = instance_data[instance_data['Solver'] == solver]
            x_position = offset + i 
            
            plt.bar(x_position, solver_data['normalized_kendalltau'].values[0], width=width, label=solver if offset == 0 else "", color=COLORS[solver])
        
        offset += len(instance_data) + 1 
    
    plt.xlabel('Instance')
    plt.ylabel('Average Normalized Quality (Kendall Tau)')
    plt.title('Average Normalized Quality (Kendall Tau)')
    plt.xticks((np.arange(len(instances))*(len(solvers)+1))+(len(solvers)//2), instances, rotation=45)

    plt.legend()
    plt.tight_layout()
    plt.savefig('new_plots/quality.png')
    plt.show()

plot_improvement_distribution(df)

In [None]:
def plot_best_found(df):
    solvers = df['Solver'].unique()

    filename = "best_found"
    plt.figure(figsize=(14, 8))

    for solver in solvers:
        means = []
        stds = []

        for instance in instances:
            filtered = df[(df["Solver"] == solver) & (df["Instance"] == instance)]
            if not filtered.empty:
                mean_val = filtered["FinalFitness"].min()
                means.append(mean_val + 1e-2)
            else:
                means.append(np.nan)

        x = np.arange(len(instances))
        means = np.array(means)

        plt.plot(x, means, label=solver, color=COLORS[solver], linestyle='--', marker='o')

    plt.xlabel('Instance')
    plt.ylabel("Best fitness")
    plt.title("Best Final Solution Found Per Instance Per Algorithm")
    plt.xticks(np.arange(len(instances)), instances, rotation=45)
    plt.yscale('log')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"new_plots/{filename}")
    plt.show()
        
plot_efficiencies(df)