In [1]:
import os
os.chdir("../")

In [2]:
import glob
import pandas as pd
import time
import numpy as np
from scipy import stats

In [3]:
resultDir = 'results'
problems = ['setcover', 'cauctions', 'facilities'] # choices=['setcover', 'cauctions', 'facilities', 'indset']
sampling_Strategies = ['uniform5','depthK','depthK2'] # choices: uniform5, depthK, depthK2, depthK3
seeds = [0,1,2,3,4]

In [4]:
problem = 'facilities'

In [5]:
def get_summary_df_per_problem(problem):

    eval_files = glob.glob(f'{resultDir}/{problem}_*.csv')
    eval_file = eval_files[-1]

    df = pd.read_csv(eval_file)
    df = pd.concat([df[df['type']=='small'], df[df['type']=='medium']])
    df = df.astype({'nlps': float, 'nnodes' : float})

    df_gcnns = df[df['policy'] != 'internal:relpscost'].copy()

    def gmean_1shifted(x):
        return stats.mstats.gmean(x + 1) - 1

    dfgcnns_gmean = df_gcnns.groupby(['type','sampling_strategy'])[['nnodes', 'stime']].agg(gmean_1shifted)
    dfgcnns_mean = df_gcnns.groupby(['type','sampling_strategy'])[['nnodes', 'stime']].mean()

    def norm_by_uniform5(dfgcnns_grouped):
        df_list = []
        for probSize in dfgcnns_grouped.index.levels[0]:
            df_list.append(dfgcnns_grouped.loc[probSize] / dfgcnns_grouped.loc[(probSize, 'uniform5')])
        return pd.concat(df_list, keys=dfgcnns_grouped.index.levels[0])

    dfgcnns_gmean_norm = norm_by_uniform5(dfgcnns_gmean)
    dfgcnns_mean_norm = norm_by_uniform5(dfgcnns_mean)

    dfgcnns_std_norm_perInstance = df_gcnns.groupby(['type','sampling_strategy','instance']).std() / df_gcnns.groupby(['type','sampling_strategy','instance']).mean()
    dfgcnns_std_norm = dfgcnns_std_norm_perInstance.groupby(['type','sampling_strategy'])[['nnodes','stime']].mean()

    ttest_res = pd.DataFrame(index=dfgcnns_mean_norm.index, columns=pd.MultiIndex.from_product((['nnodes', 'stime'], ['t_stats', 'p_value'])))
    for metric in ['nnodes', 'stime']:
        for probSize in dfgcnns_mean_norm.index.levels[0]: # medium, small
            mean2 = 1
            std2 = dfgcnns_std_norm.at[(probSize,'uniform5'),metric]
            for sampling_strategy in dfgcnns_mean_norm.index.levels[1]: # depthK, depthK2, uniform5
                mean1 = dfgcnns_mean_norm.at[(probSize,sampling_strategy),metric]
                std1_norm = dfgcnns_std_norm.at[(probSize,sampling_strategy),metric]
                std1 = std1_norm * mean1
                t_stats, p_value = stats.ttest_ind_from_stats(mean1, std1, 100, mean2, std2, 100)
                ttest_res.loc[(probSize,sampling_strategy),(metric,)] = [t_stats, p_value]

    def get_winner_indices(x):
        return x.idxmin()

    winner_idx = df_gcnns.groupby(['type','instance','seed'])['stime'].agg(pd.Series.idxmin)
    df_gcnns['wins'] = 0
    df_gcnns.loc[winner_idx,'wins'] = 1
    dfgcnns_wins = df_gcnns.groupby(['type', 'sampling_strategy'])['wins'].sum()

    output_idx = pd.MultiIndex.from_product((['small', 'medium'], sampling_Strategies), names=['type', 'sampling_strategy'])

    output_df_mean = dfgcnns_gmean_norm.reindex(output_idx)
    output_df_std = dfgcnns_std_norm.reindex(output_idx)
    output_df_wins = dfgcnns_wins.reindex(output_idx)
    output_df_ttest = ttest_res.reindex(output_idx)

    out_df1 = pd.DataFrame(index=output_idx, columns=pd.Index(['nnodes', 'stime', 'wins', 't-stats (p-val)'], name='metrics'))

    for col_name in ['nnodes', 'stime']:
        out_df1[col_name] = ["%.4fr ± %.2f" % (m, s*100) + "%" for (m, s) in zip(output_df_mean[col_name], output_df_std[col_name])]
    out_df1['wins'] = output_df_wins
    out_df1['t-stats (p-val)'] = ["%.2f (%.4f)" % (t, p) for (t, p) in zip(output_df_ttest[('stime','t_stats')], output_df_ttest[('stime','p_value')])]
    return out_df1

In [6]:
df_prob_summaries = {problem : get_summary_df_per_problem(problem) for problem in problems}
# df_facilities = get_summary_df_per_problem('facilities')
# df_setcover = get_summary_df_per_problem('setcover')
# df_cauctions = get_summary_df_per_problem('cauctions')

In [7]:
df_probs_dict = {}
for problem, prob_df in df_prob_summaries.items():
    moved_keys = prob_df.index.get_level_values('type').unique()
    df_probs_dict[problem] =  pd.concat([prob_df.loc[probSize] for probSize in moved_keys], axis=1, keys=moved_keys)
df_summaries_allProbs = pd.concat(df_probs_dict)

In [12]:
df_summaries_allProbs

Unnamed: 0_level_0,type,small,small,small,small,medium,medium,medium,medium
Unnamed: 0_level_1,metrics,nnodes,stime,wins,t-stats (p-val),nnodes,stime,wins,t-stats (p-val)
Unnamed: 0_level_2,sampling_strategy,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
setcover,uniform5,1.0000r ± 8.06%,1.0000r ± 4.04%,37,0.00 (1.0000),1.0000r ± 7.86%,1.0000r ± 7.01%,45,0.00 (1.0000)
setcover,depthK,1.0156r ± 8.56%,1.0057r ± 4.75%,29,4.15 (0.0000),1.1279r ± 13.48%,1.1200r ± 11.59%,12,11.78 (0.0000)
setcover,depthK2,1.0041r ± 8.44%,0.9937r ± 4.91%,34,-0.86 (0.3911),1.0257r ± 13.47%,1.0258r ± 11.65%,43,2.24 (0.0259)
cauctions,uniform5,1.0000r ± 18.39%,1.0000r ± 11.74%,29,0.00 (1.0000),1.0000r ± 12.18%,1.0000r ± 10.22%,33,0.00 (1.0000)
cauctions,depthK,1.0220r ± 16.46%,0.9917r ± 9.76%,25,-1.02 (0.3100),0.9775r ± 11.95%,0.9678r ± 8.51%,41,-2.37 (0.0187)
cauctions,depthK2,0.9698r ± 14.76%,0.9753r ± 8.72%,46,-2.08 (0.0386),0.9890r ± 11.08%,0.9895r ± 8.85%,26,-1.52 (0.1308)
facilities,uniform5,1.0000r ± 22.13%,1.0000r ± 15.13%,25,0.00 (1.0000),1.0000r ± 14.78%,1.0000r ± 15.13%,33,0.00 (1.0000)
facilities,depthK,0.9955r ± 23.31%,0.9842r ± 14.66%,39,-1.00 (0.3209),1.0070r ± 16.22%,1.0043r ± 14.29%,25,-0.26 (0.7949)
facilities,depthK2,1.0032r ± 23.29%,0.9950r ± 14.72%,36,0.17 (0.8618),1.0375r ± 16.26%,1.0090r ± 15.23%,42,-0.89 (0.3725)


In [8]:
output_filepath = f"{resultDir}/eval_allProbs"
# df_summaries_allProbs.to_csv(output_filepath)

In [9]:
df_prob_summaries['facilities']

Unnamed: 0_level_0,metrics,nnodes,stime,wins,t-stats (p-val)
type,sampling_strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
small,uniform5,1.0000r ± 22.13%,1.0000r ± 15.13%,25,0.00 (1.0000)
small,depthK,0.9955r ± 23.31%,0.9842r ± 14.66%,39,-1.00 (0.3209)
small,depthK2,1.0032r ± 23.29%,0.9950r ± 14.72%,36,0.17 (0.8618)
medium,uniform5,1.0000r ± 14.78%,1.0000r ± 15.13%,33,0.00 (1.0000)
medium,depthK,1.0070r ± 16.22%,1.0043r ± 14.29%,25,-0.26 (0.7949)
medium,depthK2,1.0375r ± 16.26%,1.0090r ± 15.23%,42,-0.89 (0.3725)


In [10]:
df_prob_summaries['facilities'].to_clipboard()

In [11]:
df_summaries_allProbs.to_excel(output_filepath+".xls")

In [71]:
df_test = pd.read_csv(f"{resultDir}/eval_allProbs.csv")

In [78]:
df_summaries_allProbs.to_excel(output_filepath+".xls")

In [79]:
df_test = pd.read_excel(output_filepath+".xls")