## Dataset selection

In [14]:
import re
import pandas as pd
from pathlib import Path

In [15]:
from utils import construct_instance_log_dict
partition = "test"
dataset = "MVC"
INCLUDE_CONPAS = True
base_path = f"../wkdir/{dataset}/{partition}"

methods_paths = []
configurations = [
    ("thresholded_expected_error","graph_with_literals_8_GTR", "backpas"),
    ("fixed_three_ratios","graph_with_literals_8_GTR", "backpas-net"),
    ("thresholded_expected_error","graph_with_variables_2_GCN", "backpas-param"),
    ("fixed_three_ratios","graph_with_variables_2_GCN", "backpas-v0"),
]

method_to_name = {}

for method,network, name in configurations:
    backpas_network_method_paths = f"{base_path}/{network}/trust_region_{method}"
    backpas_network_method_paths = Path(backpas_network_method_paths)
    methods_to_add = [method_path for method_path in backpas_network_method_paths.iterdir() if method_path.is_dir() and not method_path.name.endswith("_log")]
    methods_paths += methods_to_add 
    if len(methods_to_add) == 1:
        method_to_name[methods_to_add[0].name] = name

print(f"Methods paths: {methods_paths}")
baseline_path = Path(f"{base_path}/baseline")
conpas_path = Path(f"{base_path}/conpas_paper")



if INCLUDE_CONPAS:
    methods_paths.append(conpas_path)
instance_log_dict = construct_instance_log_dict(
    baseline_path=baseline_path,
    methods_paths=methods_paths
)

Methods paths: [PosixPath('../wkdir/MVC/test/graph_with_literals_8_GTR/trust_region_thresholded_expected_error/threshold_0.9958418844977528_alpha_-0.92825330964769'), PosixPath('../wkdir/MVC/test/graph_with_literals_8_GTR/trust_region_fixed_three_ratios/k_0.5024084501989196_value_0_0.2415110713475371_delta_0.0009228807242832'), PosixPath('../wkdir/MVC/test/graph_with_variables_2_GCN/trust_region_thresholded_expected_error/threshold_0.9470682374978162_alpha_-0.0458712083257581'), PosixPath('../wkdir/MVC/test/graph_with_variables_2_GCN/trust_region_fixed_three_ratios/k_0.4169123225132681_value_0_0.4346804150117205_delta_0.1184103259973196')]
Method threshold_0.9958418844977528_alpha_-0.92825330964769 has 0 instances with timeout.
Method k_0.5024084501989196_value_0_0.2415110713475371_delta_0.0009228807242832 has 0 instances with timeout.
Method threshold_0.9470682374978162_alpha_-0.0458712083257581 has 0 instances with timeout.
Method k_0.4169123225132681_value_0_0.4346804150117205_delta

## Test hyperparameters selected

In [16]:
if dataset== "MIS" or dataset == "MVC":
    n_nodes = 6000
elif dataset == "CA":
    n_nodes = 4000
elif dataset == "MIS-mixed-train":
    n_nodes = None # No fixed number of nodes
else:
    raise ValueError(f"Dataset {dataset} not recognized.")

for key, value in method_to_name.items():
    if key.startswith("threshold"):
        key_parts = key.split("_")
        print(f"{value} : $({key_parts[1]}, {key_parts[3]})$")
    elif key.startswith("k_0"):
        key_parts = key.split("_")
        k = float(key_parts[1])
        value_0 = float(key_parts[4])
        delta = float(key_parts[6])
        Delta = int(k * n_nodes * delta)
        K_0 = int(k * n_nodes * value_0)
        K_1 = int(k * n_nodes * (1 - value_0))
        print(f"{value} : $({K_0}, {K_1}, {Delta})$")
    else:
        raise ValueError(f"Method name {key} not recognized.")

backpas : $(0.9958418844977528, -0.92825330964769)$
backpas-net : $(728, 2286, 2)$
backpas-param : $(0.9470682374978162, -0.0458712083257581)$
backpas-v0 : $(1087, 1414, 296)$


## Primal integral and statistical tests

### Checking missing logs

In [17]:
missing_logs = []
for instance_name, instance_log in instance_log_dict.items():
    for method_name, log_file_path in instance_log.items():
        if not log_file_path.exists():
            instance_path = log_file_path.parent.parent / log_file_path.parent.name.replace("_log", "") / log_file_path.name.replace(".log", "")
            missing_logs.append((instance_name, method_name, log_file_path, instance_path))

In [18]:
df_aux = pd.DataFrame(missing_logs, columns=["instance", "method", "log_file_path","instance_path"])
df_aux.groupby("method")["log_file_path"].count().sort_values(ascending=False)

Series([], Name: log_file_path, dtype: int64)

In [19]:
from utils import create_temp_file_list
if len(missing_logs)>0:
    file_paths = set()
    for instance_name, method_name, log_file_path, instance_path in missing_logs:
        file_paths.add(instance_path)
    file_paths = list(file_paths)
    aux = input("Enter step")
    output_filename = Path(base_path) / f"step_{aux}.txt"
    #print(len(file_paths),output_filename)
    aux = input(f"The file {output_filename} will be created with {len(file_paths)} instances. Press OK to confirm")
    if aux =="ok":
        create_temp_file_list(file_paths, output_filename=str(output_filename))
    else:
        raise Exception("File not created due to lack of confirmation")
else:
    print("You can continue")

You can continue


### Primal integral

In [20]:
from utils import get_all_logs_for_instance
if dataset== "MIS" or dataset== "MIS-mixed-train" or dataset == "CA":
    objective = "max"
elif dataset == "MVC":
    objective = "min"
else:
    raise ValueError(f"Dataset {dataset} not recognized.")
df_primal_integral,df = get_all_logs_for_instance(instance_log_dict, objective=objective)

In [21]:
df_primal_integral.groupby("method")["primal_integral"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
baseline,100.0,7.791648,1.150872,4.626144,7.147265,7.776652,8.566974,10.656632
conpas_paper,100.0,6.831431,2.532578,1.242482,5.23397,6.406065,7.634079,18.479718
k_0.4169123225132681_value_0_0.4346804150117205_delta_0.1184103259973196,100.0,2.195695,0.436552,1.006312,2.069017,2.132363,2.199714,4.029137
k_0.5024084501989196_value_0_0.2415110713475371_delta_0.0009228807242832,100.0,1.73676,0.48899,0.027397,1.02525,2.02512,2.032725,2.094765
threshold_0.9470682374978162_alpha_-0.0458712083257581,100.0,1.496491,0.446727,0.141569,1.221912,1.411368,1.77356,2.773978
threshold_0.9958418844977528_alpha_-0.92825330964769,100.0,0.489605,0.504363,0.0,0.000133,0.043654,1.000154,1.109847


In [22]:
df_primal_gap = df.groupby(["method","instance"])["primal_gap"].min().reset_index().copy()

In [23]:
assert (df_primal_integral.groupby(["instance","method"]).count().max() == 1).all()

In [24]:
def method_renamed(x):
    if x in method_to_name:
        return method_to_name[x]
    elif x.startswith("conpas"):
        return "conpas"
    elif x.startswith("baseline"):
        return "gurobi"
    else:
        return x
def create_df_per_instance(df_param,metric):
    df_per_instance = df_param.copy()
    df_per_instance["method_renamed"] = df_per_instance["method"].apply(method_renamed)
    df_per_instance = df_per_instance[["method_renamed","instance",metric]].pivot_table(
        index="instance",
        columns="method_renamed",
        values=metric,
        aggfunc="first"
    ).add_prefix(f"{metric}_").reset_index()
    return df_per_instance

df_primal_gap_per_instance = create_df_per_instance(df_primal_gap,"primal_gap")
df_primal_integral_per_instance = create_df_per_instance(df_primal_integral,"primal_integral")

In [25]:
df_primal_integral_per_instance

method_renamed,instance,primal_integral_backpas,primal_integral_backpas-net,primal_integral_backpas-param,primal_integral_backpas-v0,primal_integral_conpas,primal_integral_gurobi
0,trust_region_test_mvc_instance_0.lp,1.018240,2.021722,1.357915,2.139779,9.123724,6.545733
1,trust_region_test_mvc_instance_1.lp,0.005215,2.084252,1.408061,2.236411,5.237675,9.485464
2,trust_region_test_mvc_instance_10.lp,1.000002,2.031217,1.935568,2.070396,5.324901,7.156622
3,trust_region_test_mvc_instance_11.lp,1.002089,1.002228,1.541172,2.179274,5.608167,9.020447
4,trust_region_test_mvc_instance_12.lp,0.000114,2.048086,1.512979,2.187317,10.119668,8.788531
...,...,...,...,...,...,...,...
95,trust_region_test_mvc_instance_95.lp,1.000002,2.022458,1.136701,2.079888,9.613414,7.232484
96,trust_region_test_mvc_instance_96.lp,0.033581,2.019601,1.014620,2.030125,6.935843,7.297394
97,trust_region_test_mvc_instance_97.lp,0.000328,1.007682,1.540096,2.151575,6.280839,7.369351
98,trust_region_test_mvc_instance_98.lp,1.000000,2.032556,1.042953,2.187381,6.909693,7.876011


In [26]:
summary = df_primal_integral_per_instance.describe().T.round(2)
summary.sort_values("mean",ascending=False)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method_renamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
primal_integral_gurobi,100.0,7.79,1.15,4.63,7.15,7.78,8.57,10.66
primal_integral_conpas,100.0,6.83,2.53,1.24,5.23,6.41,7.63,18.48
primal_integral_backpas-v0,100.0,2.2,0.44,1.01,2.07,2.13,2.2,4.03
primal_integral_backpas-net,100.0,1.74,0.49,0.03,1.03,2.03,2.03,2.09
primal_integral_backpas-param,100.0,1.5,0.45,0.14,1.22,1.41,1.77,2.77
primal_integral_backpas,100.0,0.49,0.5,0.0,0.0,0.04,1.0,1.11


### Statistical Tests

In [27]:
import pandas as pd
from scipy.stats import wilcoxon, ttest_rel



def run_tests(name_a, name_b, col_a, col_b, dataset_name,metric):
    alpha = 0.05

    # Wilcoxon tests
    w_stat_a_b, w_pval_a_b = wilcoxon(col_a, col_b, alternative='less')
    w_stat_b_a, w_pval_b_a = wilcoxon(col_a, col_b, alternative='greater')

    # Paired t-tests
    t_stat_a_b, t_pval_a_b = ttest_rel(col_a, col_b, alternative='less')
    t_stat_b_a, t_pval_b_a = ttest_rel(col_a, col_b, alternative='greater')

    # Directional conclusions
    wilcoxon_conclusion_a_b = f"{name_a} better" if w_pval_a_b < alpha else "not significant"
    wilcoxon_conclusion_b_a = f"{name_b} better" if w_pval_b_a < alpha else "not significant"

    ttest_conclusion_a_b = f"{name_a} better" if t_pval_a_b < alpha else "not significant"
    ttest_conclusion_b_a = f"{name_b} better" if t_pval_b_a < alpha else "not significant"

    return {
        "dataset": dataset_name,
        "metric":metric,
        "Method A": name_a,
        "Method B": name_b,

        # Wilcoxon values
        "wilcoxon_stat_A_better": w_stat_a_b,
        "wilcoxon_pvalue_A_better": w_pval_a_b,
        "wilcoxon_is_A_better": w_pval_a_b < alpha,
        "wilcoxon_conclusion_A_better": wilcoxon_conclusion_a_b,

        "wilcoxon_stat_B_better": w_stat_b_a,
        "wilcoxon_pvalue_B_better": w_pval_b_a,
        "wilcoxon_is_B_better": w_pval_b_a < alpha,
        "wilcoxon_conclusion_B_better": wilcoxon_conclusion_b_a,


        # t-test values
        "ttest_stat_A_better": t_stat_a_b,
        "ttest_pvalue_A_better": t_pval_a_b,
        "ttest_is_A_better": t_pval_a_b < alpha,
        "ttest_conclusion_A_better": ttest_conclusion_a_b,

        "ttest_stat_B_better": t_stat_b_a,
        "ttest_pvalue_B_better": t_pval_b_a,
        "ttest_is_B_better": t_pval_b_a < alpha,
        "ttest_conclusion_B_better": ttest_conclusion_b_a,
    }

def run_all_comparisons(df_per_instance, dataset_name,metric):
    results = []
    methods = ["backpas-v0", "backpas-net", 'backpas-param', "backpas", "conpas", "gurobi"]
    #foreach pair of methods run tests
    for i in range(len(methods)):
        for j in range(i+1, len(methods)):
            method_a = methods[i]
            method_b = methods[j]
            results.append(run_tests(
                method_a, method_b,
                df_per_instance[f'{metric}_{method_a}'],
                df_per_instance[f'{metric}_{method_b}'],
                dataset_name,
                metric
            ))
    return results
print("dataset:", dataset)
results_primal_integral = run_all_comparisons(df_primal_integral_per_instance, dataset_name=dataset, metric="primal_integral")
results_primal_gap = run_all_comparisons(df_primal_gap_per_instance, dataset_name=dataset, metric="primal_gap")
# Final dataframe
df_test_results = pd.DataFrame(results_primal_integral + results_primal_gap)
df_test_results.to_csv(f"test_results_{dataset}_statistical_ablation.csv", index=False)


dataset: MVC


In [28]:
df_test_results[['dataset', 'metric', 'Method A', 'Method B', 'wilcoxon_conclusion_A_better', 'wilcoxon_conclusion_B_better', 'ttest_conclusion_A_better', 'ttest_conclusion_B_better']]

Unnamed: 0,dataset,metric,Method A,Method B,wilcoxon_conclusion_A_better,wilcoxon_conclusion_B_better,ttest_conclusion_A_better,ttest_conclusion_B_better
0,MVC,primal_integral,backpas-v0,backpas-net,not significant,backpas-net better,not significant,backpas-net better
1,MVC,primal_integral,backpas-v0,backpas-param,not significant,backpas-param better,not significant,backpas-param better
2,MVC,primal_integral,backpas-v0,backpas,not significant,backpas better,not significant,backpas better
3,MVC,primal_integral,backpas-v0,conpas,backpas-v0 better,not significant,backpas-v0 better,not significant
4,MVC,primal_integral,backpas-v0,gurobi,backpas-v0 better,not significant,backpas-v0 better,not significant
5,MVC,primal_integral,backpas-net,backpas-param,not significant,backpas-param better,not significant,backpas-param better
6,MVC,primal_integral,backpas-net,backpas,not significant,backpas better,not significant,backpas better
7,MVC,primal_integral,backpas-net,conpas,backpas-net better,not significant,backpas-net better,not significant
8,MVC,primal_integral,backpas-net,gurobi,backpas-net better,not significant,backpas-net better,not significant
9,MVC,primal_integral,backpas-param,backpas,not significant,backpas better,not significant,backpas better
