In [1]:
from os import listdir
from os.path import isfile, join

import pickle
import numpy as np
import scipy as sp
import pandas as pd

from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model

from sklearn.neighbors import DistanceMetric

from benchmark.cfg import OHConverter

from constants.var_types import VAR_TYPES

In [2]:
import os
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found")

GPU found


In [3]:
# Custom Realistic Scores

In [4]:
def internetadv_ratio_contraint(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    if df_cf_found.shape[0] > 0:
        scaler = load_scaler(dsName)
        df_num_scaled = pd.DataFrame(scaler.inverse_transform(df_cf_found[['1555', '1556', '1557']]))

        output = ((df_num_scaled[1] / df_num_scaled[0]).map(lambda x: round(x, 2)) - df_num_scaled[2].map(
            lambda x: round(x, 2)) < 0.01).map(int)
        output.index = df_cf_found.index

        return output

    return np.nan

In [5]:
def pbc_multivariate_constraints(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    if df_cf_found.shape[0] > 0:
        scaler = load_scaler(dsName)
        cf_scaled = pd.DataFrame(scaler.inverse_transform(df_cf_found))

        x1 = (cf_scaled[2].map(lambda x: round(x, 4)) == (cf_scaled[0] * cf_scaled[1]).map(lambda x: round(x, 4))).map(
            int)

        x2 = (cf_scaled[3].map(lambda x: round(x, 3)) - (cf_scaled[1] / cf_scaled[0]).map(
            lambda x: round(x, 3)) < 0.01).map(int)

        x3 = (cf_scaled[4].map(lambda x: round(x, 3)) - (cf_scaled[7] / cf_scaled[2]).map(
            lambda x: round(x, 3)) < 0.01).map(int)

        x4 = (cf_scaled[5].map(lambda x: round(x, 3)) - (cf_scaled[8] / cf_scaled[2]).map(
            lambda x: round(x, 3)) < 0.01).map(int)

        x5 = (cf_scaled[6].map(lambda x: round(x, 3)) - (cf_scaled[7] / cf_scaled[9]).map(
            lambda x: round(x, 3)) < 0.01).map(int)

        output = (x1 + x2 + x3 + x4 + x5).map(lambda x: x == 5).map(int)
        output.index = df_cf_found.index

        return output

    return np.nan

In [6]:
def bcw_radius_area_constraint(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    if df_cf_found.shape[0] > 0:
        scaler = load_scaler(dsName)
        cf_scaled = pd.DataFrame(scaler.inverse_transform(df_cf_found))

        output = (cf_scaled[1] ** 2 * np.pi - cf_scaled[4] < cf_scaled[4] * 0.10).map(int)
        output.index = df_cf_found.index

        return output

    return np.nan

In [7]:
dict_functions = {
    'BCW': {'ruc': [], 'rmc': [bcw_radius_area_constraint]},
    'PBC': {'ruc': [], 'rmc': [pbc_multivariate_constraints]},
    'InternetAdv': {'ruc': [], 'rmc': [internetadv_ratio_contraint]}
}

In [8]:
# Verify if offset between the x and o classes is, at maximum 1
def tictactoe_constraint(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    if df_cf_found.shape[0] > 0:
        if cat_feats:

            df_ohi = check_one_hot_integrity(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter)
            df_cb = check_binary_categorical(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter)

            dfc_converted_array = []
            for idx in range(df_cf_found.shape[0]):
                if df_ohi.iloc[idx] == 1 and df_cb.iloc[idx] == 1:
                    dfc_converted_array.append(converter.convert(df_cf_found.iloc[int(idx):int(idx) + 1].to_numpy())[0])
                else:
                    dfc_converted_array.append(['x'] * df_cf_found.shape[1])
            dfc_converted = pd.DataFrame(dfc_converted_array)

            dict_ttt = {'x': 1, 'o': -1, 'b': 0}
            for col in list(dfc_converted.columns):
                dfc_converted[col] = dfc_converted[col].apply(lambda x: dict_ttt[x])

            output = dfc_converted.sum(axis=1).map(lambda x: int(x in [0, 1, -1]))
            output.index = df_cf_found.index

            return output

    return np.nan

In [9]:
# Realistic Scores

In [10]:
def check_outside_num_range(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    if df_cf_found.shape[0] > 0:
        if len(num_feats) > 0:
            scaler = load_scaler(dsName)
            scaled_ds = pd.DataFrame(scaler.inverse_transform(df_oh[num_feats]))

            num_feats_idx = []

            for n_f in num_feats:
                num_feats_idx.append(str(list(df_oh.columns).index(n_f)))
            scaled_cf = pd.DataFrame(scaler.inverse_transform(df_cf_found[num_feats_idx]))

            for i in range(scaled_ds.shape[1]):
                scaled_cf[i] = scaled_cf[i].map(lambda x: scaled_ds[i].min() <= x and x <= scaled_ds[i].max())

            output = scaled_cf.T.sum().map(lambda x: x == scaled_cf.shape[1]).map(int)
            output.index = df_cf_found.index

            return output
    return np.nan

In [11]:
def load_scaler(dsName):
    return pickle.load(open(f'./scalers/{dsName}.sc', 'rb'))

In [12]:
def verify_one_hot(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    oh_i = check_one_hot_integrity(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter)
    oh_b = check_binary_categorical(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter)

    return (oh_i * oh_b).sum() / oh_b.shape[0]

In [13]:
def check_one_hot_integrity(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)
    bin_feats = converter.binary_cats

    if df_cf_found.shape[0] > 0:
        if len(cat_feats) > 0:
            cat_oh_feats = list(set(cat_feats) - set(bin_feats))

            if len(cat_oh_feats) > 0:
                array_cat_int = []
                for cat_oh_f in cat_oh_feats:
                    array_cat_int.append(
                        df_cf_found.iloc[:, list(converter.dict_feat_idx[cat_oh_f].values())].apply(lambda x: x > 0,
                                                                                                    axis=1).sum(
                            axis=1).map(lambda x: x == 1))

                df_cat_int = pd.concat(array_cat_int, axis=1)

                output = df_cat_int.sum(axis=1).map(lambda x: x == df_cat_int.shape[1]).map(int)
                output.index = df_cf_found.index

                return output

    return np.nan

In [14]:
def check_binary_categorical(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)

    if df_cf_found.shape[0] > 0:

        if len(cat_feats) > 0:
            num_feats_idx = [str(list(df_oh.columns).index(n_f)) for n_f in num_feats]

            cat_feats_idx = list(set(df_cf_found.columns) - set(num_feats_idx))

            factual_cat_cols = df_fc_found.loc[:, cat_feats_idx].apply(
                lambda x: x.map(lambda y: y not in [1.0, 0.0, 1, 0]), axis=1).T.sum()

            assert factual_cat_cols.sum() == 0

            output = df_cf_found.loc[:, cat_feats_idx].apply(lambda x: x.map(lambda y: y in [1.0, 0.0, 1, 0]),
                                                             axis=1).apply(sum, axis=1).map(
                lambda x: int(x == len(cat_feats_idx)))
            output.index = df_cf_found.index

            return output

    return np.nan

In [15]:
def get_bin_feats_idx(df_oh, converter):
    bin_cols = converter.binary_cats
    bin_feats = []
    for b_c in bin_cols:
        try:
            bin_feats.append(list(df_oh.columns).index(b_c))
        except ValueError:
            bin_feats.append(list(df_oh.columns).index('0'*(4-len(b_c))+b_c))
    return bin_feats

In [16]:
# General Scores

In [17]:
def md(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)

    if df_cf_found.shape[0] > 0:
        output_result = []
        for idx in range(df_cf_found.shape[0]):
            m_dis = sp.spatial.distance.mahalanobis(df_cf_found.iloc[idx].to_numpy(),
                                                    df_fc_found.drop(columns=['output']).iloc[idx].to_numpy(),
                                                    df_oh.drop(columns=['output']).cov().to_numpy())
            output_result.append(m_dis)

        out_array = output_result
        output_results = [np.nan] * df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results

    return [np.nan]*df_cf.shape[0]

In [18]:
# Mean can give misleading values because very high values on MAD
def madd(df_oh, df_cf, df_fc, cat_feats, num_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)

    if df_cf_found.shape[0] > 0:
        df_mad = df_cf_found.iloc[:0]

        df_oh_c = df_oh.copy()
        df_oh_c.columns = df_fc_found.columns
        num_feats_idx = [str(list(df_oh.columns).index(n_f)) for n_f in num_feats]

        cat_feats_idx = list(set(df_cf_found.columns) - set(num_feats_idx))

        mad_num = {}
        for n_feat_idx in num_feats_idx:
            # 1e-8 added to avoid 0 and, then, division by zero
            mad_num[n_feat_idx] = sp.stats.median_abs_deviation(df_oh_c[n_feat_idx]) + 1e-8

            df_mad[n_feat_idx] = abs(df_cf_found[n_feat_idx] - df_fc_found[n_feat_idx]) / mad_num[n_feat_idx]

        for c_feat_idx in cat_feats_idx:
            df_mad[c_feat_idx] = (df_cf_found[c_feat_idx] != df_fc_found[c_feat_idx]).map(int)

        output_result = [0]*df_cf.shape[0]
        if len(cat_feats_idx) > 0:
            add_output_result = df_mad[cat_feats_idx].mean(axis=1)
            for null_row in list(set([*range(len(output_result))])-set(df_mad.index)):
                add_output_result.loc[null_row] = np.nan
            add_output_result = add_output_result.sort_index()
            output_result = np.add(output_result, add_output_result.tolist())
        if len(num_feats_idx) > 0:
            add_output_result = df_mad[num_feats_idx].mean(axis=1)
            for null_row in list(set([*range(len(output_result))])-set(df_mad.index)):
                add_output_result.loc[null_row] = np.nan
            add_output_result = add_output_result.sort_index()
            output_result = np.add(output_result, add_output_result.tolist())

        out_array = output_result.tolist()
        output_results = [np.nan] * df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results

    return [np.nan]*df_cf.shape[0]

In [19]:
def get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter):
    df_fc_pro = get_df_fc_pro(df_cf, df_fc, cat_feats, converter)
    df_cf_c, df_fc_pro_c = get_correct(df_cf, df_fc_pro, model)

    return df_cf_c, df_fc_pro_c

In [20]:
def get_correct(df_cf, df_fc, model):
    df_cf_found, df_fc_found = remove_na(df_cf, df_fc)
    if df_cf_found.shape[0] > 0:
        labels_cf = model.predict(df_cf_found.to_numpy()).round()
        return df_cf_found[labels_cf.reshape(-1, 1) != df_fc_found['output'].to_numpy().reshape(-1, 1)], df_fc_found[
            labels_cf.reshape(-1, 1) != df_fc_found['output'].to_numpy().reshape(-1, 1)]

    return df_cf_found.iloc[:0], df_fc.iloc[:0]

In [21]:
def validity_total(df_cf, df_fc, model):
    df_cf_found, df_fc_found = remove_na(df_cf, df_fc)
    if df_cf_found.shape[0] > 0:

        out_array = (model.predict(df_cf_found.to_numpy()).round().reshape(1,-1)!=df_fc_found['output'].to_numpy())[0].tolist()
        output_results = [np.nan]*df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results
    return [False]*df_cf.shape[0]

In [22]:
def validity_found(df_cf, df_fc, model):
    df_cf_found, df_fc_found = remove_na(df_cf, df_fc)
    if df_cf_found.shape[0] > 0:

        out_array = (model.predict(df_cf_found.to_numpy()).round().reshape(1,-1)!=df_fc_found['output'].to_numpy())[0].tolist()
        output_results = [np.nan] * df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results
    return [False]*df_cf.shape[0]

In [23]:
def remove_na(df_cf, df_fc):
    return df_cf.dropna(), df_fc[0==df_cf.isna().sum(axis=1)]

In [24]:
def get_df_fc_pro(df_cf, df_fc, cat_feats, converter):
    array_df_fc_pro = []
    if len(cat_feats) > 0:
        for idx, row in df_fc.iterrows():
            row_fc_pro = converter.convert_to_oh(pd.DataFrame(row).T.drop(columns=['output']).to_numpy().tolist()[0])
            array_df_fc_pro.append(row_fc_pro)
        df_fc_pro = pd.DataFrame(array_df_fc_pro)
        df_fc_pro['output'] = df_fc['output']
    else:
        df_fc_pro = df_fc.copy()

    df_fc_pro.columns = list(df_cf.columns) + ['output']

    return df_fc_pro

In [25]:
def sparsity(df_cf, df_fc, cat_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)

    if df_cf_found.shape[0] > 0:
        df_fc_found = df_fc_found.copy().drop(columns=['output'])

        scores = []
        for i in range(df_cf_found.shape[0]):
            scores.append((df_cf_found.iloc[i].round(4) == df_fc_found.iloc[i].round(4)).sum() / df_cf_found.shape[1])

        out_array = scores
        output_results = [np.nan] * df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results

    return [np.nan]*df_cf.shape[0]

In [26]:
def l2(df_cf, df_fc, cat_feats, model, converter):
    df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, model, converter)

    if df_cf_found.shape[0] > 0:
        df_fc_found = df_fc_found.copy().drop(columns=['output'])
        scores = []
        for i in range(df_cf_found.shape[0]):
            scores.append(np.linalg.norm(
                df_cf_found.iloc[i].round(4).to_numpy().reshape(-1, 1) -
                df_fc_found.iloc[i].round(4).to_numpy().reshape(-1, 1)))

        out_array = scores
        output_results = [np.nan] * df_cf.shape[0]
        for idx_result, idxFound in enumerate(list(df_fc_found.index)):
            output_results[idxFound] = out_array[idx_result]

        return output_results

    return [np.nan]*df_cf.shape[0]

In [27]:
cat_ds = []
num_ds = []
mix_ds = []

for dsName, dsData in VAR_TYPES.items():

    if len(dsData['categorical']) > 0 and len(dsData['numerical']):
        mix_ds.append(dsName)
    elif len(dsData['categorical']) > 0:
        cat_ds.append(dsName)
    else:
        num_ds.append(dsName)

In [28]:
onlyfiles = [f for f in listdir('./cfoutput') if isfile(join('./cfoutput', f))]

In [29]:
def _nn_ohe(input_shape, hidden_layers_ws, output_number):
    x_in = Input(shape=(input_shape,))
    x = Dense(hidden_layers_ws, activation='relu')(x_in)
    x_out = Dense(2, activation='softmax')(x)
    if output_number == 1:
        x_bin = Dense(1, activation='linear')(x_out)
        nn = Model(inputs=x_in, outputs=x_bin)
    if output_number == 2:
        nn = Model(inputs=x_in, outputs=x_out)

    nn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return nn

In [30]:
dict_results = {}
for file in onlyfiles:
    if file.split('_')[0] != 'TIME':
        n = file.split('_')[0]
        if n == 'STABILITY':
            continue
        dsName = file.split('_')[1]
        framework = file.split('_')[2].split('.')[0]

        cat_feats = VAR_TYPES[dsName]['categorical']
        num_feats = VAR_TYPES[dsName]['numerical']

        model_keras = load_model(f'./models/{dsName}.h5')

        # Get architecture info and pass to our NN architecture
        input_shape = model_keras.get_weights()[0].shape[0]
        hidden_layers_ws = model_keras.get_weights()[0].shape[1]

        adapted_nn = _nn_ohe(input_shape, hidden_layers_ws, 1)
        adapted_nn.build(input_shape=input_shape)

        adapted_nn.layers[1].set_weights(model_keras.layers[0].get_weights())
        adapted_nn.layers[2].set_weights(model_keras.layers[1].get_weights())

        if n == 1:
            # If the tested class is the minority, revert output order
            second_layer_w = []
            for w_row in model_keras.layers[1].get_weights()[0]:
                second_layer_w.append(np.flip(w_row).tolist())

            second_layer_b = np.flip(model_keras.layers[1].get_weights()[1])

            second_layer = [np.array(second_layer_w, dtype=np.float32),
                            np.array(second_layer_b, dtype=np.float32)]
            adapted_nn.layers[2].set_weights(second_layer)

        adapted_nn.layers[3].set_weights(
            [np.array([[0.0], [1.0]], dtype=np.float32), np.array([0.0], dtype=np.float32)])

        if len(num_feats) > 0:
            df = pd.read_csv(f'./data/NORM_{dsName}.csv')
        else:
            df = pd.read_csv(f'./data/{dsName}.csv')

        if len(cat_feats) > 0 and len(num_feats) > 0:
            df_oh = pd.read_csv(f'./data/OH_NORM_{dsName}.csv')
        elif len(cat_feats) > 0:
            df_oh = pd.read_csv(f'./data/OH_{dsName}.csv')
        else:
            df_oh = pd.read_csv(f'./data/NORM_{dsName}.csv')

        df_cf = pd.read_csv(f'./cfoutput/{n}_{dsName}_{framework}.csv')
        df_train = pd.read_csv(f'./experiments_data/{dsName}_TRAINDATASET.csv')
        df_fc = pd.read_csv(f'./experiments_data/{dsName}_CFDATASET_{n}.csv').drop(columns=['Unnamed: 0'])

        converter = OHConverter.Converter(df, cat_feats, list(df_oh.columns))

        print(n, dsName, framework)

        scoreValidityTotal = validity_total(df_cf, df_fc, adapted_nn)
        scoreValidityFound = validity_found(df_cf, df_fc, adapted_nn)
        scoreSparsity = sparsity(df_cf, df_fc, cat_feats, adapted_nn, converter)
        scoreL2 = l2(df_cf, df_fc, cat_feats, adapted_nn, converter)
        scoreMadd = madd(df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)
        scoreMd = md(df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)

        df_cf_found, df_fc_found = get_cleaned_processed(df_cf, df_fc, cat_feats, adapted_nn, converter)

        ruc = []
        rmc = []

        scoreCheckBinCat = check_binary_categorical(df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)
        if scoreCheckBinCat is not np.nan:
            ruc.append(scoreCheckBinCat)

        scoreCheckOutsideNumRange = check_outside_num_range(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn,
                                                            converter)
        if scoreCheckOutsideNumRange is not np.nan:
            ruc.append(scoreCheckOutsideNumRange)

        scoreCheckOneHotIntegrity = check_one_hot_integrity(df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)
        if scoreCheckOneHotIntegrity is not np.nan:
            rmc.append(scoreCheckOneHotIntegrity)

        if dsName in list(dict_functions.keys()):
            custom_ruc_functions = dict_functions[dsName]['ruc']
            custom_rmc_functions = dict_functions[dsName]['rmc']

            for c_ruc_function in custom_ruc_functions:
                output_ruc = c_ruc_function(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)
                if output_ruc is not np.nan:
                    ruc.append(output_ruc)
                else:
                    assert len(ruc) == 0

            for c_rmc_function in custom_rmc_functions:
                output_rmc = c_rmc_function(dsName, df_oh, df_cf, df_fc, cat_feats, num_feats, adapted_nn, converter)
                if output_rmc is not np.nan:
                    rmc.append(output_rmc)
                else:
                    assert len(rmc) == 0

        if len(ruc) > 0:
            scoreRUC = (sum(ruc) / len(ruc) == 1).map(int).tolist()

            out_array = scoreRUC
            output_results = [np.nan] * df_cf.shape[0]
            for idx_result, idxFound in enumerate(list(df_fc_found.index)):
                output_results[idxFound] = out_array[idx_result]

            scoreRUC = output_results

        else:
            scoreRUC = [np.nan]*df_cf.shape[0]

        if len(rmc) > 0:
            scoreRMC = (sum(rmc) / len(rmc) == 1).map(int).tolist()

            out_array = scoreRMC
            output_results = [np.nan] * df_cf.shape[0]
            for idx_result, idxFound in enumerate(list(df_fc_found.index)):
                output_results[idxFound] = out_array[idx_result]

            scoreRMC = output_results

        else:
            scoreRMC = [np.nan]*df_cf.shape[0]

        if dsName not in dict_results.keys():
            dict_results[dsName] = {}

        if n not in dict_results[dsName].keys():
            dict_results[dsName][n] = []

        result_entry = {
            'framework': framework,
            'n': n,
            'validity': scoreValidityTotal,
            'validityFound': scoreValidityFound,
            'sparsity': scoreSparsity,
            'L2': scoreL2,
            'RUC': scoreRUC,
            'RMC': scoreRMC,
            'MAD': scoreMadd,
            'MD': scoreMd
        }

        dict_results[dsName][n].append(result_entry)

1 StatlogGC SEDC




1 SoybeanSmall GROWINGSPHERES4
1 PBC GROWINGSPHERES4




1 SDD LORE




0 DefaultOfCCC MLEXPLAIN




1 SDD ALIBIC
0 BCW LORE




1 CMSC CADEX




1 SDD GROWINGSPHERES4




0 Ecoli ALIBIC




0 CarEvaluation ALIBICNOGRAD
0 SoybeanSmall SEDC
1 Lymphography SYNAS
0 StudentPerf GROWINGSPHERES3




1 StatlogGC DICE




0 BalanceScale ALIBIC
1 BCW CADEX




1 PBC ALIBICNOGRAD




0 HayesRoth SEDC
0 Wine ALIBICNOGRAD




1 HayesRoth GROWINGSPHERES3
0 MagicGT SYNAS




0 BalanceScale ALIBICNOGRAD
0 Ecoli SEDC




0 PBC LORE




1 Ecoli CADEX




1 HayesRoth SEDC
0 Lenses GROWINGSPHERES3
0 Ecoli SYNAS




0 SoybeanSmall MLEXPLAIN
1 StatlogGC CADEX




1 SoybeanSmall DICE
0 DefaultOfCCC DICE




1 HayesRoth GROWINGSPHERES4
0 Lymphography ALIBICNOGRAD
1 MagicGT CADEX




0 StatlogGC SEDC




1 CMSC SEDC




0 TicTacToe ALIBICNOGRAD
0 Lymphography ALIBIC
0 Lymphography SEDC
0 SoybeanSmall ALIBICNOGRAD
1 Chess MLEXPLAIN
1 StatlogGC GROWINGSPHERES4




1 Nursery MLEXPLAIN
0 BalanceScale GROWINGSPHERES4
1 SoybeanSmall MACE
0 Nursery GROWINGSPHERES4
1 Nursery DICE
1 Adult CADEX




0 StudentPerf SEDC




0 HayesRoth GROWINGSPHERES4
0 TicTacToe MLEXPLAIN
1 BalanceScale CADEX
0 PBC ALIBICNOGRAD




1 MagicGT ALIBICNOGRAD




1 Wine ALIBICNOGRAD




0 CarEvaluation LORE
0 Chess DICE
0 InternetAdv LORE




1 TicTacToe SEDC
1 PBC DICE




1 Iris SYNAS




0 TicTacToe DICE
0 BCW SEDC




0 StatlogGC SYNAS




0 SDD DICE
0 StudentPerf MACE
1 DefaultOfCCC MLEXPLAIN




1 CMSC DICE




1 Ecoli ALIBIC




0 HayesRoth MACE
0 Lymphography GROWINGSPHERES4
1 BalanceScale ALIBIC
0 BalanceScale SEDC
0 Adult GROWINGSPHERES3




1 TicTacToe GROWINGSPHERES4
0 Iris MLEXPLAIN




0 Adult ALIBIC




0 SoybeanSmall CADEX
0 BCW MLEXPLAIN




0 Adult GROWINGSPHERES4




1 SDD ALIBICNOGRAD
0 Nursery ALIBICNOGRAD
0 Chess SEDC
0 Nursery SEDC
1 MagicGT MLEXPLAIN




1 BalanceScale SYNAS
0 StatlogGC CADEX




0 DefaultOfCCC ALIBICNOGRAD




0 Iris ALIBICNOGRAD




1 SoybeanSmall CADEX
1 BalanceScale ALIBICNOGRAD
0 MagicGT GROWINGSPHERES4




1 Chess ALIBIC
0 CMSC SEDC




1 MagicGT ALIBIC




0 Lymphography GROWINGSPHERES3
1 DefaultOfCCC CADEX




1 Adult ALIBICNOGRAD




0 ISOLET SEDC




1 Chess ALIBICNOGRAD
1 InternetAdv SEDC




0 Iris GROWINGSPHERES4




0 CarEvaluation DICE
0 StudentPerf SYNAS




1 BalanceScale SEDC
0 InternetAdv GROWINGSPHERES4




0 SoybeanSmall SYNAS
0 StatlogGC GROWINGSPHERES3




1 CMSC SYNAS




0 Adult DICE




1 Lenses SYNAS
1 ISOLET GROWINGSPHERES3
1 Ecoli SEDC




0 DefaultOfCCC GROWINGSPHERES4




0 SoybeanSmall GROWINGSPHERES4
1 PBC SEDC




0 HayesRoth GROWINGSPHERES3
1 StudentPerf LORE
0 DefaultOfCCC SEDC




1 Ecoli GROWINGSPHERES3




1 Chess SYNAS
0 StudentPerf LORE
0 StudentPerf ALIBIC




0 Nursery MLEXPLAIN
0 Iris DICE




0 BCW GROWINGSPHERES4




1 Nursery SYNAS
0 PBC CADEX




1 Adult GROWINGSPHERES3




1 InternetAdv GROWINGSPHERES4




1 DefaultOfCCC LORE
1 Iris CADEX




0 ISOLET MLEXPLAIN
0 Chess GROWINGSPHERES4
0 MagicGT ALIBICNOGRAD




1 InternetAdv DICE
0 Lymphography LORE
1 ISOLET ALIBICNOGRAD




1 Ecoli MACE
0 SDD SYNAS
0 BalanceScale DICE
0 Chess LORE
0 CMSC DICE




0 SoybeanSmall MACE
1 Chess MACE
1 Iris ALIBIC




0 MagicGT MACE




0 BCW SYNAS




1 BalanceScale GROWINGSPHERES4
1 SDD GROWINGSPHERES3




1 Lenses GROWINGSPHERES4
1 Adult ALIBIC




0 SDD GROWINGSPHERES4




1 TicTacToe MLEXPLAIN
1 Ecoli SYNAS




1 Ecoli GROWINGSPHERES4




0 Lymphography SYNAS
1 Nursery LORE
1 HayesRoth DICE
0 SDD MLEXPLAIN
1 CMSC MLEXPLAIN




0 PBC GROWINGSPHERES4




0 ISOLET GROWINGSPHERES4




1 SoybeanSmall ALIBICNOGRAD
0 Wine GROWINGSPHERES3




1 Wine ALIBIC




1 ISOLET SEDC




1 Wine CADEX




0 Adult MLEXPLAIN




0 Nursery SYNAS
1 Adult MLEXPLAIN




0 SoybeanSmall LORE
1 SoybeanSmall SEDC
1 MagicGT LORE




0 Wine SEDC




1 Nursery CADEX
0 HayesRoth MLEXPLAIN
0 Lenses SYNAS
1 TicTacToe LORE
0 TicTacToe SEDC
1 Adult DICE




1 Lymphography MACE
0 InternetAdv MLEXPLAIN




0 Wine MLEXPLAIN
1 HayesRoth LORE
0 Lymphography MACE
0 BalanceScale MLEXPLAIN
0 SoybeanSmall ALIBIC
1 Lymphography GROWINGSPHERES4
1 StatlogGC SYNAS




0 Ecoli DICE




0 CarEvaluation MACE
0 Lenses MACE
1 PBC GROWINGSPHERES3




0 Ecoli ALIBICNOGRAD




1 Nursery ALIBICNOGRAD
1 TicTacToe ALIBIC
0 StatlogGC DICE




0 Lenses MLEXPLAIN
1 Adult SYNAS




0 BalanceScale CADEX
1 ISOLET ALIBIC




1 BCW DICE




0 TicTacToe GROWINGSPHERES3
0 InternetAdv GROWINGSPHERES3




1 TicTacToe ALIBICNOGRAD
0 Lenses LORE
0 MagicGT LORE




1 Nursery SEDC
1 TicTacToe CADEX
1 DefaultOfCCC SEDC




1 StatlogGC LORE
1 CMSC ALIBIC




0 BalanceScale SYNAS
1 Ecoli ALIBICNOGRAD




0 PBC GROWINGSPHERES3




1 StudentPerf GROWINGSPHERES3




1 DefaultOfCCC DICE




1 MagicGT GROWINGSPHERES4




0 BalanceScale LORE
1 CMSC LORE




1 HayesRoth MACE
1 MagicGT SEDC




0 CarEvaluation GROWINGSPHERES3
1 BalanceScale DICE
0 Iris LORE




1 SoybeanSmall MLEXPLAIN
1 Lymphography CADEX
1 Adult SEDC




1 InternetAdv CADEX
1 CMSC GROWINGSPHERES4




0 HayesRoth CADEX
1 Lenses CADEX
0 SoybeanSmall GROWINGSPHERES3
0 Iris SYNAS




1 PBC MACE
1 CarEvaluation ALIBICNOGRAD
0 CMSC MACE




0 Lenses CADEX
0 MagicGT CADEX




0 Adult CADEX




1 SDD DICE




0 ISOLET ALIBIC




1 CarEvaluation SYNAS
0 BCW CADEX




0 CarEvaluation GROWINGSPHERES4
0 Nursery DICE
0 DefaultOfCCC ALIBIC




0 StudentPerf DICE




1 BalanceScale LORE
1 StudentPerf MLEXPLAIN




1 SoybeanSmall ALIBIC
0 SDD SEDC
1 Lenses SEDC
0 TicTacToe CADEX
0 Lymphography CADEX
0 ISOLET GROWINGSPHERES3
1 StudentPerf CADEX




0 PBC DICE




1 ISOLET DICE
0 StatlogGC LORE
0 Ecoli CADEX




1 InternetAdv MLEXPLAIN




1 CarEvaluation LORE
1 CarEvaluation DICE
0 InternetAdv DICE




0 InternetAdv ALIBIC




0 Lenses SEDC
0 Wine MACE
1 CarEvaluation GROWINGSPHERES3
1 HayesRoth ALIBICNOGRAD
1 Ecoli DICE




1 CarEvaluation ALIBIC
0 BCW ALIBICNOGRAD




0 Lenses DICE
0 Chess CADEX
1 CMSC ALIBICNOGRAD




0 TicTacToe SYNAS
1 BCW LORE




1 StudentPerf ALIBICNOGRAD




0 TicTacToe GROWINGSPHERES4
0 PBC SYNAS




0 PBC SEDC




0 DefaultOfCCC CADEX




1 SDD MLEXPLAIN
0 Iris GROWINGSPHERES3




1 MagicGT MACE




0 Wine GROWINGSPHERES4




0 HayesRoth LORE
0 Ecoli MACE




1 StatlogGC GROWINGSPHERES3




0 DefaultOfCCC SYNAS




0 BCW GROWINGSPHERES3




1 Lymphography ALIBIC
1 SoybeanSmall GROWINGSPHERES3
1 Lymphography LORE
1 Ecoli MLEXPLAIN




0 StatlogGC ALIBICNOGRAD




1 StatlogGC ALIBICNOGRAD




1 Lymphography SEDC
0 CMSC GROWINGSPHERES3




0 TicTacToe MACE
0 CMSC SYNAS




0 InternetAdv SEDC




0 Nursery ALIBIC
0 Ecoli GROWINGSPHERES3




0 SDD CADEX




0 PBC ALIBIC




1 Nursery GROWINGSPHERES4
0 Wine CADEX




1 HayesRoth ALIBIC
1 Iris LORE




0 MagicGT GROWINGSPHERES3




1 BCW MLEXPLAIN




1 Chess CADEX
1 BCW SYNAS




0 Wine DICE




0 Ecoli LORE
1 BCW SEDC




1 BCW GROWINGSPHERES4




1 Nursery GROWINGSPHERES3
1 PBC LORE




1 Iris GROWINGSPHERES4




1 MagicGT GROWINGSPHERES3




1 Wine SYNAS




0 Iris MACE
1 Chess LORE
1 Wine MACE




0 Iris ALIBIC




1 PBC ALIBIC




0 StatlogGC MLEXPLAIN
1 SDD SYNAS
0 CMSC LORE




0 Chess SYNAS
1 Iris DICE
0 SDD ALIBIC
0 CarEvaluation CADEX
0 Iris SEDC
1 StudentPerf GROWINGSPHERES4




1 Lenses DICE
0 PBC MACE




0 HayesRoth DICE
1 PBC MLEXPLAIN




1 Wine GROWINGSPHERES4




0 Wine ALIBIC




1 SoybeanSmall SYNAS
1 Iris ALIBICNOGRAD




0 Nursery LORE
0 StudentPerf GROWINGSPHERES4




1 TicTacToe SYNAS
1 Chess GROWINGSPHERES3
1 SoybeanSmall LORE
1 CarEvaluation CADEX
1 DefaultOfCCC GROWINGSPHERES3




0 Lenses GROWINGSPHERES4
1 Wine MLEXPLAIN
1 Lenses GROWINGSPHERES3
0 MagicGT DICE




1 Chess DICE
1 StudentPerf ALIBIC




0 MagicGT SEDC




0 Lymphography DICE
1 Lymphography DICE
0 Chess MACE
1 TicTacToe MACE
1 InternetAdv LORE




1 BCW GROWINGSPHERES3




1 PBC CADEX




1 HayesRoth MLEXPLAIN
0 SDD ALIBICNOGRAD
1 TicTacToe DICE
0 Ecoli MLEXPLAIN
0 CarEvaluation MLEXPLAIN
1 StudentPerf MACE




0 Adult ALIBICNOGRAD




1 StudentPerf SYNAS




1 DefaultOfCCC SYNAS




0 CMSC GROWINGSPHERES4




0 CarEvaluation SYNAS
1 CMSC GROWINGSPHERES3




1 Lymphography MLEXPLAIN
0 SDD GROWINGSPHERES3




1 CarEvaluation SEDC
0 SoybeanSmall DICE
0 Adult SEDC




0 Iris CADEX




0 StatlogGC GROWINGSPHERES4




1 SDD SEDC




1 Nursery ALIBIC
1 Lenses MACE
0 CMSC ALIBIC




1 PBC SYNAS




0 Nursery CADEX
1 ISOLET MLEXPLAIN
0 HayesRoth ALIBIC
0 TicTacToe ALIBIC
1 InternetAdv ALIBIC




1 Lenses ALIBIC
1 BalanceScale MLEXPLAIN
1 StudentPerf DICE




1 Adult LORE




0 ISOLET ALIBICNOGRAD




1 BCW ALIBICNOGRAD




0 BCW DICE




0 Wine SYNAS




0 HayesRoth ALIBICNOGRAD
0 MagicGT ALIBIC




0 Chess ALIBICNOGRAD
1 Lymphography GROWINGSPHERES3
1 StatlogGC ALIBIC




0 Chess ALIBIC
1 Lenses MLEXPLAIN
0 CarEvaluation SEDC
1 MagicGT SYNAS




1 DefaultOfCCC GROWINGSPHERES4




1 CarEvaluation MLEXPLAIN
0 DefaultOfCCC GROWINGSPHERES3




0 Wine LORE
1 BalanceScale GROWINGSPHERES3
0 Nursery GROWINGSPHERES3
0 Adult SYNAS




0 MagicGT MLEXPLAIN




0 CMSC ALIBICNOGRAD




1 Lenses LORE
0 Lymphography MLEXPLAIN
1 TicTacToe GROWINGSPHERES3
0 ISOLET DICE




1 Wine DICE




0 BCW ALIBIC




1 BCW ALIBIC




0 Adult LORE
1 Iris MACE




0 CarEvaluation ALIBIC
1 StudentPerf SEDC




1 HayesRoth CADEX
1 HayesRoth SYNAS
1 Wine SEDC




0 BalanceScale MACE
0 Lenses ALIBIC
0 Nursery MACE
0 ISOLET CADEX




1 Wine GROWINGSPHERES3




0 Ecoli GROWINGSPHERES4




1 ISOLET GROWINGSPHERES4




1 Iris GROWINGSPHERES3




1 Iris MLEXPLAIN




1 DefaultOfCCC ALIBICNOGRAD




0 PBC MLEXPLAIN




1 StatlogGC MLEXPLAIN
1 Adult GROWINGSPHERES4




1 Chess GROWINGSPHERES4
0 InternetAdv CADEX




1 Chess SEDC
0 Chess GROWINGSPHERES3
0 BalanceScale GROWINGSPHERES3
1 Iris SEDC




1 BalanceScale MACE
0 SDD LORE




1 Lymphography ALIBICNOGRAD
0 CMSC MLEXPLAIN




1 CarEvaluation MACE
1 CarEvaluation GROWINGSPHERES4
1 ISOLET CADEX




0 DefaultOfCCC LORE
0 StatlogGC ALIBIC




1 MagicGT DICE




0 CMSC CADEX




1 Lenses ALIBICNOGRAD
0 StudentPerf ALIBICNOGRAD




0 Chess MLEXPLAIN
1 Nursery MACE
0 StudentPerf CADEX
0 StudentPerf MLEXPLAIN
0 TicTacToe LORE
1 DefaultOfCCC ALIBIC




0 Lenses ALIBICNOGRAD
0 HayesRoth SYNAS
1 SDD CADEX




In [32]:
cvnt001 = { 2: 2.576, 3: 2.913, 4: 3.113, 5: 3.255, 6: 3.364, 7: 3.452, 8: 3.526, 9: 3.590, 10: 3.646, 11: 3.696,
            12: 3.741, 13: 3.781, 14: 3.818, 15: 3.853, 16: 3.884, 17: 3.914, 18: 3.941, 19: 3.967, 20: 3.992,
            21: 4.015, 22: 4.037, 23: 4.057, 24: 4.077, 25: 4.096, 26: 4.114, 27: 4.132, 28: 4.148, 29: 4.164,
            30: 4.179, 31: 4.194, 32: 4.208, 33: 4.222, 34: 4.236, 35: 4.249, 36: 4.261, 37: 4.273, 38: 4.285,
            39: 4.296, 40: 4.307, 41: 4.318, 42: 4.329, 43: 4.339, 44: 4.349, 45: 4.359, 46: 4.368, 47: 4.378,
            48: 4.387, 49: 4.395, 50: 4.404, }

cvnt005 = { 2: 1.960, 3: 2.344, 4: 2.569, 5: 2.728, 6: 2.850, 7: 2.948, 8: 3.031, 9: 3.102, 10: 3.164, 11: 3.219,
            12: 3.268, 13: 3.313, 14: 3.354, 15: 3.391, 16: 3.426, 17: 3.458, 18: 3.489, 19: 3.517, 20: 3.544,
            21: 3.569, 22: 3.593, 23: 3.616, 24: 3.637, 25: 3.658, 26: 3.678, 27: 3.696, 28: 3.714, 29: 3.732,
            30: 3.749, 31: 3.765, 32: 3.780, 33: 3.795, 34: 3.810, 35: 3.824, 36: 3.837, 37: 3.850, 38: 3.863,
            39: 3.876, 40: 3.888, 41: 3.899, 42: 3.911, 43: 3.922, 44: 3.933, 45: 3.943, 46: 3.954, 47: 3.964,
            48: 3.973, 49: 3.983, 50: 3.992, }

cvnt010 = { 2: 1.645, 3: 2.052, 4: 2.291, 5: 2.460, 6: 2.589, 7: 2.693, 8: 2.780, 9: 2.855, 10: 2.920, 11: 2.978,
            12: 3.030, 13: 3.077, 14: 3.120, 15: 3.159, 16: 3.196, 17: 3.230, 18: 3.261, 19: 3.291, 20: 3.319,
            21: 3.346, 22: 3.371, 23: 3.394, 24: 3.417, 25: 3.439, 26: 3.459, 27: 3.479, 28: 3.498, 29: 3.516,
            30: 3.533, 31: 3.550, 32: 3.567, 33: 3.582, 34: 3.597, 35: 3.612, 36: 3.626, 37: 3.640, 38: 3.653,
            39: 3.666, 40: 3.679, 41: 3.691, 42: 3.703, 43: 3.714, 44: 3.726, 45: 3.737, 46: 3.747, 47: 3.758,
            48: 3.768, 49: 3.778, 50: 3.788, }

In [59]:
framework_names = ['ALIBIC', 'ALIBICNOGRAD', 'CADEX', 'DICE', 'GROWINGSPHERES3', 
                   'GROWINGSPHERES4', 'LORE', 'MACE', 'MLEXPLAIN', 'SEDC', 'SYNAS']

In [60]:
dsNames = np.sort(list(VAR_TYPES.keys())).tolist()

In [162]:
scores_parameters = {
    'validity': 'max',
    'sparsity': 'max',
    'L2': 'min',
    'RUC': 'max',
    'RMC': 'max',
    'MAD': 'min',
    'MD': 'min'
}

In [163]:
def calculate_rankings(row, d):
    best_values = row.dropna().unique()

    if d == 'max':
        best_values[::-1].sort()
    if d == 'min':
        best_values.sort()

    ranking_row = {}

    counter_score = 1
    for bv in best_values:
        sfw = list(row[row == bv].index)
        score = sum([*range(counter_score, counter_score + len(sfw))])/len(sfw)
        counter_score += len(sfw)
        for fw in sfw:
            ranking_row[fw] = score

    nan_fws = list(set(framework_names) - set(ranking_row.keys()))
    if len(nan_fws) > 0:
        nan_score = sum([*range(counter_score, counter_score + len(nan_fws))]) / len(nan_fws)

        for fw in nan_fws:
            ranking_row[fw] = nan_score

    return ranking_row

In [164]:
def get_mean_ranking(dsName, n, parameter, d):
    data_results = dict_results[dsName][n]
    total_parameter = []
    names_frameworks = []
    number_of_rows = -1
    for data in data_results:
        names_frameworks.append(data['framework'])
        total_parameter.append(data[parameter])
        if number_of_rows == -1:
            number_of_rows = len(data[parameter])
        else:
            assert len(data[parameter]) == number_of_rows

    df_param = pd.DataFrame(total_parameter).T
    df_param.columns = names_frameworks

    ranking_all_rows = []
    for idx, row in df_param.iterrows():

        ranking_row = calculate_rankings(row, d)

        ranking_all_rows.append(ranking_row)

    return ranking_all_rows

In [165]:
result_rows_best = []
for dsName in dict_results.keys():
    for n in dict_results[dsName].keys():
        # Just to get the frameworks sorted
        frameworks_sorted = np.sort(framework_names).tolist()

        for score_name, score_order in scores_parameters.items():
            result_best = get_mean_ranking(dsName, n, score_name, score_order)
            Ndataset = len(result_best)
            df_best = pd.DataFrame(result_best)[frameworks_sorted]
            df_best = df_best.mean().to_frame(0).T
            df_best['N'] = Ndataset
            
            df_best['dataset'] = dsName
            df_best['n'] = n
            df_best['score'] = score_name

            df_best.index = [f'{dsName}_{n}_{score_name}_{score_order}']

            result_rows_best.append(df_best)

df_mean_ranking = pd.concat(result_rows_best)

In [166]:
df_mean_ranking.loc[list(df_mean_ranking.index)]

Unnamed: 0,ALIBIC,ALIBICNOGRAD,CADEX,DICE,GROWINGSPHERES3,GROWINGSPHERES4,LORE,MACE,MLEXPLAIN,SEDC,SYNAS,N,dataset,n,score
StatlogGC_1_validity_max,8.580,4.01,4.010,4.010,4.080,4.010,8.98,10.97,8.98,4.150,4.220,100,StatlogGC,1,validity
StatlogGC_1_sparsity_max,9.085,2.77,6.975,5.775,5.295,3.500,9.51,9.51,9.51,2.765,1.305,100,StatlogGC,1,sparsity
StatlogGC_1_L2_min,9.130,4.09,6.620,5.270,1.070,2.410,9.51,9.51,9.51,3.075,5.805,100,StatlogGC,1,L2
StatlogGC_1_RUC_max,8.980,3.54,4.175,4.175,4.230,4.140,9.51,9.51,9.51,4.140,4.090,100,StatlogGC,1,RUC
StatlogGC_1_RMC_max,8.960,2.32,2.320,5.365,5.875,5.685,9.51,9.51,9.51,4.415,2.530,100,StatlogGC,1,RMC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ISOLET_1_L2_min,6.410,6.27,1.020,7.190,7.190,2.180,7.19,7.19,7.19,6.980,7.190,100,ISOLET,1,L2
ISOLET_1_RUC_max,6.400,6.40,2.145,7.190,7.190,1.145,7.19,7.19,7.19,6.770,7.190,100,ISOLET,1,RUC
ISOLET_1_RMC_max,6.000,6.00,6.000,6.000,6.000,6.000,6.00,6.00,6.00,6.000,6.000,100,ISOLET,1,RMC
ISOLET_1_MAD_min,6.870,6.85,2.040,7.030,7.030,1.030,7.03,7.03,7.03,7.030,7.030,100,ISOLET,1,MAD


In [167]:
df_mean_ranking[
    (df_mean_ranking['dataset']=='SDD')&
    (df_mean_ranking['n']=='0')
].drop(columns=['N', 'dataset', 'n', 'score'])

Unnamed: 0,ALIBIC,ALIBICNOGRAD,CADEX,DICE,GROWINGSPHERES3,GROWINGSPHERES4,LORE,MACE,MLEXPLAIN,SEDC,SYNAS
SDD_0_validity_max,5.645,5.645,4.705,5.645,8.815,1.395,6.965,10.25,5.645,5.645,5.645
SDD_0_sparsity_max,6.895,6.895,3.88,6.895,5.88,1.15,6.825,6.895,6.895,6.895,6.895
SDD_0_L2_min,6.895,6.895,3.53,6.895,5.97,1.38,6.855,6.895,6.895,6.895,6.895
SDD_0_RUC_max,6.895,6.895,3.615,6.895,6.005,1.275,6.84,6.895,6.895,6.895,6.895
SDD_0_RMC_max,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
SDD_0_MAD_min,6.73,6.73,4.63,6.73,6.53,1.0,6.73,6.73,6.73,6.73,6.73
SDD_0_MD_min,6.895,6.895,3.24,6.895,5.87,1.77,6.855,6.895,6.895,6.895,6.895


In [168]:
best_fws = []
for name, df in df_mean_ranking.drop(columns=['dataset', 'n', 'score']).iterrows():

    # No of treatments = No of frameworks / No of tests = No of rows

    N = df['N']
    k = (df.shape[0]-1)

    spx2f = sum([(R-(k+1)/2)**2 for R in df.drop('N').values])

    x2f = 12*N/(k*(k+1))*spx2f

    ff = (N-1)*x2f/(N*(k-1)-x2f)

    # 95% of confidence
    critical_f_value = sp.stats.f.ppf(q=1-.05, dfn=k-1, dfd=N-1)
    
    reject_h0 = True if ff >= critical_f_value else False

    cd = cvnt005[k]*(k*(k+1)/(6*N))**0.5



    unique_ranks = df.drop('N').unique().tolist()

    best_result = min(unique_ranks)

    groups = []
    for rc in unique_ranks:
        rc_group = [rc]
        for rt in list(set(unique_ranks)-set([rc])):
            if abs(rt-rc) < cd:
                rc_group.append(rt)
        dupe = False
        for gp in groups:
            if set(gp) == set(rc_group):
                dupe = True
        if not dupe:

            groups.append(rc_group)

    best_group = []
    for gp in groups:
        if best_result in gp:
            best_group += gp

    best_fw = {}
    for fw, rank in df.drop('N').items():
        if rank in best_group:
            best_fw[fw] = 1
        else:
            best_fw[fw] = 0

    res_out = pd.DataFrame([best_fw])[framework_names]
    res_out.index = [name]
    
    res_out['H0_Reject'] = reject_h0
    
    res_out['dataset'] = name.split('_')[0]
    res_out['n'] = name.split('_')[1]
    res_out['score'] = name.split('_')[2]

    best_fws.append(res_out)

df_best_fws = pd.concat(best_fws)

In [169]:
def highlight_best(s):
    '''
    highlight the statistically significant with gray.
    '''
    
    has_best = df_best_fws.loc[s.name]['H0_Reject']
    
    if has_best:
        
        bests = df_best_fws.loc[s.name].drop(['H0_Reject', 'dataset', 'n', 'score'])
        bests = bests[bests==1]
        bests_fws = list(bests[bests==1].index)
        
        return ['background-color: gray' if v in bests_fws else '' for v in list(s.index)]
    is_max = s == s.max()
    
    return ['']*len(s)

In [173]:
df_mean_ranking[
    (df_mean_ranking['n']=='0')
].drop(columns=['N', 'dataset', 'n', 'score']).style.apply(highlight_max, axis=1).to_excel('analysis.xlsx')