In [55]:
'''
This contains code for generating the table "Comparison to state-of-the art counterfactual example generation techniques in terms of explanation time t, sparsity s, L1-Norm 𝛿1, L2-Norm 𝛿2, and validity %. (*) denotes datasets where RFOCSE necessitated uncapped explanation time."

A CSV and TEX version will be generated, adjustments to the LaTeX table fontsize and table width may be neccessary

Experiment results files needed: CompareMethods

Results used in the paper are provided in "../results/final" if generating new results run each experiment and update the results paths below
'''
# path to each result file
results_path = "../results/compare_methods_tab4.csv"

# path to output the figure
export_figures = True
output_dir = "./reproducibility/"
table_save_name = "compare_methods_gbc_tab4"

In [56]:
import os
import re
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sys.path.append(os.path.abspath("../"))
#from dataset import DS_NAMES
#from utilities.figure_tools import (get_latest_results_directory, load_results,
                                    #make_fig_directory)

cmap = plt.get_cmap("Set1")
colors = cmap.colors
if export_figures and not os.path.isdir(output_dir):
    os.makedirs(output_dir)

In [57]:
all_results = pd.read_csv(results_path)
all_results = all_results[all_results["n_trees"] == 100]

########### rename the FACET varitions to match the paper ###########
# FACET on random forest --> FCT-RF
all_results.loc[all_results["model_type"]=="RandomForest", "explainer"] = "FCT-RF"
# FACET on gradient boosting ensemble with complete interesection --> FCT-GB1
gb1_rows = (all_results["model_type"]=="GradientBoostingClassifier") & (all_results["gbc_intersection"]=="CompleteEnsemble")
all_results.loc[gb1_rows, "explainer"] = "FCT-GB1"
# FACET on gradient boosting ensemble with complete interesection --> FCT-GB2
gb2_rows = (all_results["model_type"]=="GradientBoostingClassifier") & (all_results["gbc_intersection"]=="MinimalWorstGuess")
all_results.loc[gb2_rows, "explainer"] = "FCT-GB2"
all_results.head(20)

# group by the new names
all_results = all_results.groupby(["dataset", "explainer"]).mean().reset_index()

found_expl = list(all_results["explainer"].unique())
found_ds = list(all_results["dataset"].unique())
print("explainers:", found_expl)
print("datasets", found_ds)
expl_order = ["FCT-RF", "FCT-GB1", "FCT-GB2"]
all_results.head()

explainers: ['FCT-GB1', 'FCT-GB2', 'FCT-RF']
datasets ['adult', 'cancer', 'compas', 'credit', 'glass', 'magic', 'spambase', 'vertebral']


Unnamed: 0,dataset,explainer,n_trees,max_depth,iteration,accuracy,precision,recall,f1,per_valid,avg_dist,avg_manhattan,avg_length,prep_time,explain_time,sample_time,n_explain
0,adult,FCT-GB1,100.0,3.0,0.0,0.862797,0.782532,0.605442,0.68269,1.0,0.186042,0.269632,2.75,20.484443,7.188049,0.359402,20.0
1,adult,FCT-GB2,100.0,3.0,0.0,0.862797,0.782532,0.605442,0.68269,1.0,0.139133,0.204595,2.65,30.272103,5.874154,0.293708,20.0
2,adult,FCT-RF,100.0,3.0,0.0,0.829961,0.829871,0.380499,0.521766,1.0,1.08724,2.180795,4.0,20.71936,5.323026,0.266151,20.0
3,cancer,FCT-GB1,100.0,3.0,0.0,0.95614,1.0,0.883721,0.938272,1.0,0.15664,0.338974,7.45,16.921963,0.446038,0.022302,20.0
4,cancer,FCT-GB2,100.0,3.0,0.0,0.95614,1.0,0.883721,0.938272,1.0,0.152684,0.323263,7.15,39.217728,0.435002,0.02175,20.0


In [58]:
opt_vals = {}
bold_opt = False
domin = {
    "sample_time": True,
    "avg_length": True,
    "avg_dist": True,
    "avg_manhattan": True,
    "per_valid": False,
}
metrics = ["sample_time", "avg_length", "avg_dist", "avg_manhattan", "per_valid"]


for ds in all_results["dataset"].unique():
    idx = (all_results["dataset"] == ds)
    opt_vals[ds] = {}
    for m in metrics:
        if domin[m]:
            opt_vals[ds][m] = all_results[idx][m].min()
        else:
            opt_vals[ds][m] = all_results[idx][m].max()

metric_latex = {
    "sample_time": "$t\downarrow$",
    "avg_length": "$\delta_0\downarrow$",
    "avg_manhattan": "$\delta_1\downarrow$",
    "avg_dist": "$\delta_2\downarrow$",
    "per_valid": "$\%\\uparrow$",
}
pretty_names = {
    "FCT-RF" : "FCT-RF",
    "FCT-GB1" : "FCT-GB1",
    "FCT-GB2" : "FCT-GB2",
}
all_metrics = ["sample_time", "avg_length", "avg_manhattan", "avg_dist", "per_valid"]

In [59]:
def df_to_csv_latex(df_source, expls, metrics, fname, include_row_label=True):
    #df_ds = df_source["dataset"].unique()
    df_ds = ["compas", "glass", "vertebral"]
    df = df_source.copy()
    df = df.set_index(["dataset", "explainer"])
    with open(output_dir + fname + ".csv", "w") as csv:
        with open(output_dir + fname + ".tex", "w") as tab:
            # csv header row start
            if include_row_label:
                csv.write("dataset,")
            # tab header row start
            tab.write("\\begin{table*}[t]\n\small\n\centering\n\\begin{tabularx}{0.95\\textwidth}{")
            if include_row_label:
                tab.write("|X")
            tab.write("|")
            for expl in expls:
                for m in metrics:
                    tab.write("c")
                tab.write("|")
            tab.write("}\n\hline")
            if include_row_label:
                tab.write("\\textbf{Dataset}")
            # csv and tab header row
            for expl in expls:
                tab.write(" & \multicolumn{5}{c|}{\\textbf{" + pretty_names[expl] +"}}")
                for m in metrics:
                    csv.write(pretty_names[expl] + ",")
                csv.write(",")
            csv.write("\n")
            tab.write(" " + re.escape("\\") + "\n")
            # header row two
            csv.write(",")
            for expl in expls:
                for m in metrics:
                    csv.write(m + ",")
                    tab.write("& " + metric_latex[m] + " ")
                csv.write(",")
            csv.write("\n")
            tab.write(re.escape("\\") + "\n\hline\n")
            # csv and tab body row
            for ds in df_ds:
                if include_row_label:
                    csv.write(ds + ",")
                    tab.write(ds)
                for expl in expls:
                    for m in metrics:
                        if m == "per_valid":
                            val = df.loc[ds, expl][m] * 100
                            val_str = "{:0.1f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_length":
                            val = df.loc[ds, expl][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_dist":
                            val = df.loc[ds, expl][m] / df.loc[ds, "FCT-RF"][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_manhattan":
                            val = df.loc[ds, expl][m] / df.loc[ds, "FCT-RF"][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "sample_time":
                            val = df.loc[ds, expl][m]
                            val_str = "{:0.4f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        else:
                            val_str = str(df.loc[ds, expl][m])
                            csv.write(val_str + ",")
                            tab.write(" & " + val_str)
                    csv.write(",")
                csv.write("\n")
                tab.write(" " + re.escape("\\") + "\n")
            # tab latex close
            tab.write("\hline\n\end{tabularx}\n")
            tab.write("\caption{Comparison to state-of-the art counterfactual example generation techniques in terms of explanation time $t$, explanation distance $\delta$, and percent of instances successfully explained. ($\\ast$) denotes cases which necessitated uncapped explanation time.}\n")
            tab.write("\label{tab.compare_methods}\n\\vspace{-7mm}\n\end{table*}\n")

In [60]:
def df_to_csv_latex_transponse(df_source, expls, ds_names, metrics, fname, include_row_label=True):
    valid_expls = []
    for e in expls: 
        if e in found_expl:
            valid_expls.append(e)
    valid_expls
    print(expls)

    valid_ds = []
    for ds in ds_names: 
        if ds in found_ds:
            valid_ds.append(ds)
    ds_names = valid_ds
    print(ds_names)

    df_ds = df_source["dataset"].unique()
    df = df_source.copy()
    df = df.set_index(["dataset", "explainer"])

    with open(output_dir + fname + ".csv", "w") as csv:
        with open(output_dir + fname + ".tex", "w") as tab:
            # csv header row start
            if include_row_label:
                csv.write("dataset,")
            # tab header row start
            tab.write("\\begin{table*}[t]\n\small\n\centering\n\\begin{tabularx}{0.95\\textwidth}{")
            if include_row_label:
                tab.write("|X")
            tab.write("|")
            for ds in valid_ds:
                for m in metrics:
                    tab.write("c")
                tab.write("|")
            tab.write("}\n\hline")
            if include_row_label:
                tab.write("\\textbf{Dataset}")
            # csv and tab header row
            for ds in valid_ds:
                tab.write(" & \multicolumn{5}{c|}{\\textbf{" + ds.upper() +"}}")
                for m in metrics:
                    csv.write(ds.upper() + ",")
                csv.write(",")
            csv.write("\n")
            tab.write(" " + re.escape("\\") + "\n")
            # header row two
            csv.write(",")
            for ds in valid_ds:
                for m in metrics:
                    csv.write(m + ",")
                    tab.write("& " + metric_latex[m] + " ")
                csv.write(",")
            csv.write("\n")
            tab.write(re.escape("\\") + "\n\hline\n")
            # csv and tab body row
            for expl in valid_expls:
                if include_row_label:
                    csv.write(pretty_names[expl] + ",")
                    tab.write(pretty_names[expl])
                for ds in valid_ds:
                    for m in metrics:
                        if m == "per_valid":
                            val = df.loc[ds, expl][m] * 100
                            val_str = "{:0.1f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_length":
                            val = df.loc[ds, expl][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_dist":
                            val = df.loc[ds, expl][m] / df.loc[ds, "FCT-RF"][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "avg_manhattan":
                            val = df.loc[ds, expl][m] / df.loc[ds, "FCT-RF"][m]
                            val_str = "{:0.2f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        elif m == "sample_time":
                            val = df.loc[ds, expl][m]
                            val_str = "{:0.4f}".format(val)
                            csv.write(val_str + ",")
                            if bold_opt and df.loc[ds, expl][m] == opt_vals[ds][m]:
                                tab.write(" & \\textbf{" + val_str + "}")
                            else:
                                tab.write(" & " + val_str)
                        else:
                            val_str = str(df.loc[ds, expl][m])
                            csv.write(val_str + ",")
                            tab.write(" & " + val_str)
                    csv.write(",")
                csv.write("\n")
                tab.write(" " + re.escape("\\") + "\n")
            # tab latex close
            tab.write("\hline\n\end{tabularx}\n")
            tab.write("\caption{Comparison to state-of-the art counterfactual example generation techniques in terms of explanation time $t$, explanation distance $\delta$, and percent of instances successfully explained. ($\\ast$) denotes cases which necessitated uncapped explanation time.}\n")
            tab.write("\label{tab.compare_methods}\n\\vspace{-7mm}\n\end{table*}\n")

In [61]:
all_results

Unnamed: 0,dataset,explainer,n_trees,max_depth,iteration,accuracy,precision,recall,f1,per_valid,avg_dist,avg_manhattan,avg_length,prep_time,explain_time,sample_time,n_explain
0,adult,FCT-GB1,100.0,3.0,0.0,0.862797,0.782532,0.605442,0.68269,1.0,0.186042,0.269632,2.75,20.484443,7.188049,0.359402,20.0
1,adult,FCT-GB2,100.0,3.0,0.0,0.862797,0.782532,0.605442,0.68269,1.0,0.139133,0.204595,2.65,30.272103,5.874154,0.293708,20.0
2,adult,FCT-RF,100.0,3.0,0.0,0.829961,0.829871,0.380499,0.521766,1.0,1.08724,2.180795,4.0,20.71936,5.323026,0.266151,20.0
3,cancer,FCT-GB1,100.0,3.0,0.0,0.95614,1.0,0.883721,0.938272,1.0,0.15664,0.338974,7.45,16.921963,0.446038,0.022302,20.0
4,cancer,FCT-GB2,100.0,3.0,0.0,0.95614,1.0,0.883721,0.938272,1.0,0.152684,0.323263,7.15,39.217728,0.435002,0.02175,20.0
5,cancer,FCT-RF,100.0,3.0,0.0,0.947368,0.95122,0.906977,0.928571,1.0,0.337952,1.001537,12.95,31.826928,1.943134,0.097157,20.0
6,compas,FCT-GB1,100.0,3.0,0.0,0.635417,0.586066,0.609808,0.597701,1.0,0.2175,0.2175,1.0,11.940747,0.050001,0.0025,20.0
7,compas,FCT-GB2,100.0,3.0,0.0,0.635417,0.586066,0.609808,0.597701,1.0,0.2175,0.2175,1.0,23.936827,0.058017,0.002901,20.0
8,compas,FCT-RF,100.0,3.0,0.0,0.652462,0.619159,0.565032,0.590858,1.0,0.07125,0.07125,1.0,19.87771,0.049001,0.00245,20.0
9,credit,FCT-GB1,100.0,3.0,0.0,0.808776,0.829094,0.951146,0.885936,1.0,0.104433,0.159743,4.3,18.594934,2.414137,0.120707,20.0


In [62]:
found_ds

['adult',
 'cancer',
 'compas',
 'credit',
 'glass',
 'magic',
 'spambase',
 'vertebral']

In [63]:
# create the table from the main paper
main_paper_ds = ["adult", "cancer", "credit", "magic", "spambase"]
df_to_csv_latex_transponse(all_results, expl_order, main_paper_ds, all_metrics, table_save_name, True)

['FCT-RF', 'FCT-GB1', 'FCT-GB2']
['adult', 'cancer', 'credit', 'magic', 'spambase']


In [64]:
# create the table from the appendix
apdx_ds = ["compas", "glass", "vertebral"]
df_to_csv_latex_transponse(all_results, expl_order, apdx_ds, all_metrics, table_save_name + "_apdx", True)

['FCT-RF', 'FCT-GB1', 'FCT-GB2']
['compas', 'glass', 'vertebral']


In [65]:
all_results[["dataset", "explainer", "avg_dist", "per_valid", "prep_time", "sample_time"]].pivot(index=["dataset"], columns=["explainer"], values=["per_valid"])

Unnamed: 0_level_0,per_valid,per_valid,per_valid
explainer,FCT-GB1,FCT-GB2,FCT-RF
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
adult,1.0,1.0,1.0
cancer,1.0,1.0,1.0
compas,1.0,1.0,1.0
credit,1.0,1.0,1.0
glass,1.0,1.0,1.0
magic,1.0,1.0,1.0
spambase,1.0,1.0,1.0
vertebral,1.0,1.0,1.0


In [66]:
all_results[["dataset", "explainer", "avg_dist", "per_valid", "prep_time", "sample_time"]].pivot(index=["dataset"], columns=["explainer"], values=["avg_dist"])

Unnamed: 0_level_0,avg_dist,avg_dist,avg_dist
explainer,FCT-GB1,FCT-GB2,FCT-RF
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
adult,0.186042,0.139133,1.08724
cancer,0.15664,0.152684,0.337952
compas,0.2175,0.2175,0.07125
credit,0.104433,0.103868,0.836277
glass,0.133535,0.127987,0.197129
magic,0.163413,0.163384,0.20704
spambase,0.035813,0.033596,0.058023
vertebral,0.052877,0.052158,0.082985


In [67]:
all_results[["dataset", "explainer", "avg_dist", "per_valid", "prep_time", "sample_time"]].pivot(index=["dataset"], columns=["explainer"], values=["sample_time"])

Unnamed: 0_level_0,sample_time,sample_time,sample_time
explainer,FCT-GB1,FCT-GB2,FCT-RF
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
adult,0.359402,0.293708,0.266151
cancer,0.022302,0.02175,0.097157
compas,0.0025,0.002901,0.00245
credit,0.120707,0.1046,0.100255
glass,0.003551,0.00275,0.01375
magic,0.0121,0.0116,0.012276
spambase,0.063502,0.061578,0.026
vertebral,0.00115,0.00105,0.00235
