In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import resample
from collections import defaultdict
from IPython.display import display

# Load the CSV file (whitespace-delimited)
csv_path = "C:\\Users\\aditi\\Downloads\\dbmopp_dataset_perf.csv"
df = pd.read_csv(csv_path, sep='\s+')

# Features to analyze
features = ['n_var', 'n_obj', 'nonident_ps', 'var_density', 'n_discon_ps', 'n_local_fronts', 'n_resist_regions']


# Budgets to analyze
budgets = [5000, 10000, 30000, 50000]

# Parameters
n_iterations = 1000
noise_std = 1e-4

# Store all results
all_feature_results = []

# Loop over each budget level
for budget_level in budgets:
    df_budget = df[df['budget'] == budget_level]

    print(f"\n Analyzing budget = {budget_level}...\n")

# Store results
all_feature_results = []

# Loop over each feature
for feature in features:
    # Define median split groups
    median_val = df[feature].median()
    df['feature_group'] = np.where(df[feature] <= median_val, f'Low {feature}', f'High {feature}')

    for group_name, group_df in df.groupby('feature_group'):
        if len(group_df) < 10:
            continue  # skip if too small

        algo_rank_counts = defaultdict(lambda: [0] * 4)

        for _ in range(n_iterations):
            sample = resample(group_df, replace=True, n_samples=len(group_df))
            sample = sample.copy()
            sample['hypervolume'] += np.random.normal(0, noise_std, size=len(sample))
            mean_scores = sample.groupby('algo')['hypervolume'].mean().reset_index()
            ranked = mean_scores.sort_values(by='hypervolume', ascending=False).reset_index(drop=True)
            for i, row in ranked.iterrows():
                algo_rank_counts[row['algo']][i] += 1

        # Format results
        algorithms = sorted(algo_rank_counts.keys())
        result = {
            'Feature': [feature] * len(algorithms),
            'Group': [group_name] * len(algorithms),
            'Algorithm': algorithms
        }
        for rank in range(4):
            result[f'Rank {rank+1} Frequency'] = [
                algo_rank_counts[algo][rank] / n_iterations for algo in algorithms
            ]

        all_feature_results.append(pd.DataFrame(result))

# Combine all feature benchmarking results and clean up
final_df = pd.concat(all_feature_results, ignore_index=True)
final_df.drop_duplicates(inplace=True)
final_df.reset_index(drop=True, inplace=True)

# Group by feature and feature group, and display results
for feature in sorted(final_df['Feature'].unique()):
    for group in sorted(final_df[final_df['Feature'] == feature]['Group'].unique()):
        print(f"\n Results for Feature = {feature}, Group = {group}")
        display(final_df[(final_df['Feature'] == feature) & (final_df['Group'] == group)])


 Analyzing budget = 5000...


 Analyzing budget = 10000...


 Analyzing budget = 30000...


 Analyzing budget = 50000...


 Results for Feature = n_discon_ps, Group = High n_discon_ps


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
28,n_discon_ps,High n_discon_ps,IBEA,1.0,0.0,0.0,0.0
29,n_discon_ps,High n_discon_ps,MOEAD,0.0,0.0,1.0,0.0
30,n_discon_ps,High n_discon_ps,NSGAII,0.0,1.0,0.0,0.0
31,n_discon_ps,High n_discon_ps,Random,0.0,0.0,0.0,1.0



 Results for Feature = n_discon_ps, Group = Low n_discon_ps


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
32,n_discon_ps,Low n_discon_ps,IBEA,1.0,0.0,0.0,0.0
33,n_discon_ps,Low n_discon_ps,MOEAD,0.0,0.0,0.949,0.051
34,n_discon_ps,Low n_discon_ps,NSGAII,0.0,1.0,0.0,0.0
35,n_discon_ps,Low n_discon_ps,Random,0.0,0.0,0.051,0.949



 Results for Feature = n_local_fronts, Group = High n_local_fronts


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
36,n_local_fronts,High n_local_fronts,IBEA,1.0,0.0,0.0,0.0
37,n_local_fronts,High n_local_fronts,MOEAD,0.0,0.0,0.0,1.0
38,n_local_fronts,High n_local_fronts,NSGAII,0.0,1.0,0.0,0.0
39,n_local_fronts,High n_local_fronts,Random,0.0,0.0,1.0,0.0



 Results for Feature = n_local_fronts, Group = Low n_local_fronts


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
40,n_local_fronts,Low n_local_fronts,IBEA,1.0,0.0,0.0,0.0
41,n_local_fronts,Low n_local_fronts,MOEAD,0.0,0.0,1.0,0.0
42,n_local_fronts,Low n_local_fronts,NSGAII,0.0,1.0,0.0,0.0
43,n_local_fronts,Low n_local_fronts,Random,0.0,0.0,0.0,1.0



 Results for Feature = n_obj, Group = High n_obj


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
8,n_obj,High n_obj,IBEA,1.0,0.0,0.0,0.0
9,n_obj,High n_obj,MOEAD,0.0,0.0,0.085,0.915
10,n_obj,High n_obj,NSGAII,0.0,1.0,0.0,0.0
11,n_obj,High n_obj,Random,0.0,0.0,0.915,0.085



 Results for Feature = n_obj, Group = Low n_obj


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
12,n_obj,Low n_obj,IBEA,1.0,0.0,0.0,0.0
13,n_obj,Low n_obj,MOEAD,0.0,0.0,1.0,0.0
14,n_obj,Low n_obj,NSGAII,0.0,1.0,0.0,0.0
15,n_obj,Low n_obj,Random,0.0,0.0,0.0,1.0



 Results for Feature = n_resist_regions, Group = High n_resist_regions


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
44,n_resist_regions,High n_resist_regions,IBEA,1.0,0.0,0.0,0.0
45,n_resist_regions,High n_resist_regions,MOEAD,0.0,0.0,1.0,0.0
46,n_resist_regions,High n_resist_regions,NSGAII,0.0,1.0,0.0,0.0
47,n_resist_regions,High n_resist_regions,Random,0.0,0.0,0.0,1.0



 Results for Feature = n_resist_regions, Group = Low n_resist_regions


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
48,n_resist_regions,Low n_resist_regions,IBEA,1.0,0.0,0.0,0.0
49,n_resist_regions,Low n_resist_regions,MOEAD,0.0,0.0,0.2,0.8
50,n_resist_regions,Low n_resist_regions,NSGAII,0.0,1.0,0.0,0.0
51,n_resist_regions,Low n_resist_regions,Random,0.0,0.0,0.8,0.2



 Results for Feature = n_var, Group = High n_var


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
0,n_var,High n_var,IBEA,1.0,0.0,0.0,0.0
1,n_var,High n_var,MOEAD,0.0,0.0,1.0,0.0
2,n_var,High n_var,NSGAII,0.0,1.0,0.0,0.0
3,n_var,High n_var,Random,0.0,0.0,0.0,1.0



 Results for Feature = n_var, Group = Low n_var


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
4,n_var,Low n_var,IBEA,1.0,0.0,0.0,0.0
5,n_var,Low n_var,MOEAD,0.0,0.0,0.0,1.0
6,n_var,Low n_var,NSGAII,0.0,1.0,0.0,0.0
7,n_var,Low n_var,Random,0.0,0.0,1.0,0.0



 Results for Feature = nonident_ps, Group = High nonident_ps


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
16,nonident_ps,High nonident_ps,IBEA,1.0,0.0,0.0,0.0
17,nonident_ps,High nonident_ps,MOEAD,0.0,0.0,1.0,0.0
18,nonident_ps,High nonident_ps,NSGAII,0.0,1.0,0.0,0.0
19,nonident_ps,High nonident_ps,Random,0.0,0.0,0.0,1.0



 Results for Feature = nonident_ps, Group = Low nonident_ps


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
20,nonident_ps,Low nonident_ps,IBEA,1.0,0.0,0.0,0.0
21,nonident_ps,Low nonident_ps,MOEAD,0.0,0.0,1.0,0.0
22,nonident_ps,Low nonident_ps,NSGAII,0.0,1.0,0.0,0.0
23,nonident_ps,Low nonident_ps,Random,0.0,0.0,0.0,1.0



 Results for Feature = var_density, Group = Low var_density


Unnamed: 0,Feature,Group,Algorithm,Rank 1 Frequency,Rank 2 Frequency,Rank 3 Frequency,Rank 4 Frequency
24,var_density,Low var_density,IBEA,1.0,0.0,0.0,0.0
25,var_density,Low var_density,MOEAD,0.0,0.0,1.0,0.0
26,var_density,Low var_density,NSGAII,0.0,1.0,0.0,0.0
27,var_density,Low var_density,Random,0.0,0.0,0.0,1.0
