# Imports + class

In [2]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from z3 import *
from xgboost import XGBClassifier
from pmlb import fetch_data
set_option(rational_to_decimal=True)

import re
import time
import pandas as pd
from sklearn.model_selection import train_test_split
np.set_printoptions(suppress=True)

In [3]:
class XGBoostExplainer:
    def __init__(self, model, data):
        self.model = model
        self.data = data.values
        self.columns = data.columns
        self.max_categories = 2

        set_option(rational_to_decimal=True)
        self.categoric_features = self.get_categoric_features(self.data)
        self.T_model = self.model_trees_expression(self.model)
        self.T = self.T_model

    def explain(self, instance, reorder="asc"):
        self.I = self.instance_expression(instance)
        self.D, self.D_add = self.decision_function_expression(self.model, [instance])
        return self.explain_expression(self.I, And(self.T, self.D_add), self.D, self.model, reorder)

    def explain_prob(self, instance, reorder="asc", threshold_margin=0, target_threshold=None):
        try:
            self.I = self.instance_expression(instance)
            self.D, self.D_add = self.decision_function_expression(self.model, [instance])
            return self.explain_expression_prob(self.I, And(self.T, self.D_add), self.D, self.model, reorder, threshold_margin, target_threshold)
        except Exception as e:
            print("Error in explain_prob:", e)
            return []

    def get_categoric_features(self, data: np.ndarray):
        categoric_features = []
        for i in range(data.shape[1]):
            feature_values = data[:, i]
            unique_values = np.unique(feature_values)
            if len(unique_values) <= self.max_categories:
                categoric_features.append(self.columns[i])

        return categoric_features

    def feature_constraints(self, constraints=[]):
        """TODO
        esperado receber limites das features pelo usuário
        formato previso: matriz/dataframe [feaature, min/max, valor]
        constraaint_expression = "constraaint_df_to_feature()"
        """
        return

    def model_trees_expression(self, model):
        df = model.get_booster().trees_to_dataframe()
        if model.get_booster().feature_names == None:
            feature_map = {f"f{i}": col for i, col in enumerate(self.columns)}
            df["Feature"] = df["Feature"].replace(feature_map)

        df["Split"] = df["Split"].round(4)
        self.booster_df = df
        class_index = 0  # if model.n_classes_ == 2:
        all_tree_formulas = []

        for tree_index in df["Tree"].unique():
            tree_df = df[df["Tree"] == tree_index]
            o = Real(f"o_{tree_index}_{class_index}")

            if len(tree_df) == 1 and tree_df.iloc[0]["Feature"] == "Leaf":
                leaf_value = tree_df.iloc[0]["Gain"]
                all_tree_formulas.append(And(o == leaf_value))
                continue
            path_formulas = []

            def get_conditions(node_id):
                conditions = []
                current_node = tree_df[tree_df["ID"] == node_id]
                if current_node.empty:
                    return conditions

                parent_node = tree_df[
                    (tree_df["Yes"] == node_id) | (tree_df["No"] == node_id)
                ]
                if not parent_node.empty:
                    parent_data = parent_node.iloc[0]
                    feature = parent_data["Feature"]
                    split_value = parent_data["Split"]
                    x = Real(feature)
                    if parent_data["Yes"] == node_id:
                        conditions.append(x < split_value)
                    else:
                        conditions.append(x >= split_value)
                    conditions = get_conditions(parent_data["ID"]) + conditions

                return conditions

            for _, node in tree_df[tree_df["Feature"] == "Leaf"].iterrows():
                leaf_value = node["Gain"]
                leaf_id = node["ID"]
                conditions = get_conditions(leaf_id)
                path_formula = And(*conditions)
                implication = Implies(path_formula, o == leaf_value)
                path_formulas.append(implication)

            all_tree_formulas.append(And(*path_formulas))
        return And(*all_tree_formulas)

    def get_init_value(self, model, x, estimator_variables):
        estimator_pred = Solver()
        estimator_pred.add(self.I)
        estimator_pred.add(self.T)
        if estimator_pred.check() == sat:
            solvermodel = estimator_pred.model()
            total_sum = sum(
                float(solvermodel.eval(var).as_fraction()) for var in estimator_variables
            )
        else:
            total_sum = 0
            print("estimator error")
        self.predicted_margin = model.predict(x, output_margin=True)[0]
        init_value = self.predicted_margin - total_sum
        self.init_value = init_value
        return init_value

    def decision_function_expression(self, model, x):
        n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_
        predicted_class = model.predict(x)[0]
        self.predicted_class = predicted_class
        n_estimators = int(len(model.get_booster().get_dump()) / n_classes)
        estimator_variables = [Real(f"o_{i}_0") for i in range(n_estimators)] # _0 only for binary classification
        self.estimator_variables = estimator_variables
        init_value = self.get_init_value(model, x, estimator_variables)
        # print("init:", round(init_value, 2))

        equation_list = []

        estimator_sum = Real("estimator_sum")
        equation_o = estimator_sum == Sum(estimator_variables)
        equation_list.append(equation_o)

        decision = Real("decision")
        equation_list.append(decision == estimator_sum + init_value)

        if predicted_class == 0:
            final_equation = decision < 0
        else:
            final_equation = decision > 0

        return final_equation, And(equation_list)

    def instance_expression(self, instance):
        formula = [Real(self.columns[i]) == value for i, value in enumerate(instance)]
        return formula

    def explain_expression(self, I, T_s, D_s, model, reorder):
        i_expression = I.copy()

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]

        for feature in i_expression.copy():
            # print("\n---removed", feature)
            i_expression.remove(feature)

            # prove(Implies(And(And(i_expression), T), D))
            if self.is_proved(Implies(And(And(i_expression), T_s), D_s)):
                continue
                # print('proved')
            else:
                # print('not proved')
                i_expression.append(feature)
        # print(self.is_proved(Implies(And(And(i_expression), T_s), D_s)))
        return i_expression

    def explain_expression_prob(self, I, T_s, D_s, model, reorder, threshold_margin, target_threshold):
        i_expression = I.copy()

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]

        threshold = 0

        if target_threshold:
            threshold = target_threshold
        elif threshold_margin != 0:
            threshold = self.predicted_margin * threshold_margin/100
            # print("margin:", self.predicted_margin, "accepted margin:", threshold)
        self.xai_predicted_margin = self.predicted_margin

        for feature in i_expression.copy():
            # print("\n---removed", feature)
            i_expression.remove(feature)

            if self.is_proved_sat(And(And(i_expression), T_s), threshold):
                # print('proved')
                continue
            else:
                # print('not proved -- added back')
                i_expression.append(feature)
        return i_expression


    def is_proved(self, decision_exp):
        s = Solver()
        s.add(Not(decision_exp))
        if s.check() == unsat:
            return True
        else:
            # print(s.model())
            return False

    def is_proved_sat(self, decision_exp, threshold):
      decision = Real("decision")

      debug = Real("debug") == 0
      predicted_class = self.predicted_class

      if predicted_class == 0:
        estmax = Optimize()
        estmax.add(decision_exp)
        estmax.add(debug)
        maxvalue = estmax.maximize(decision)
        if estmax.check() == sat:
            # print("\nmax sat", maxvalue.value())
            try:
              if float(maxvalue.value().as_fraction()) > threshold:
                  return False # can change class
              else:
                  self.xai_predicted_margin = float(maxvalue.value().as_fraction())
            except:
              print("error max =", maxvalue.value())
              return False
        else:
            print("error")

      if predicted_class == 1:
        estmin = Optimize()
        estmin.add(decision_exp)
        estmin.add(debug)
        minvalue = estmin.minimize(decision)
        if estmin.check() == sat:
            # print("\nmin sat", minvalue.value())
            try:
              if float(minvalue.value().as_fraction()) < threshold:
                  return False # can change class
              else:
                  self.xai_predicted_margin = float(minvalue.value().as_fraction())
            except:
              print("error min =", minvalue.value())
              return False
        else:
            print("error")

      if predicted_class == 0:
        self.solvermodel = estmax.model()
      if predicted_class == 1:
        self.solvermodel = estmin.model()
      return True
  
def generate_results(explainer, X_test, y_pred, classification, path, reorder="asc"):
    results = []
    if classification == 0:
        increase_prob = -0.01
    else:
        increase_prob = 0.01
    for i in X_test[y_pred == classification].index:
        sample = X_test.loc[i].values
        xai = explainer.explain_prob(sample, reorder=reorder)
        xaiprob_initial = explainer.xai_predicted_margin
        len_xai_initial = len(xai)

        xai = explainer.explain_prob(sample, reorder=reorder, target_threshold=xaiprob_initial + increase_prob)
        xaiprob_final = explainer.xai_predicted_margin
        len_xai_final = len(xai)

        if round(xaiprob_initial, 2) == round(xaiprob_final, 2):
            len_xai_initial = len_xai_final
        results.append({
            "index": i,
            "class": classification,
            "xaiprob_initial": round(xaiprob_initial, 2),
            "len_xai_initial": len_xai_initial,
            "xaiprob_final": round(xaiprob_final, 2),
            "len_xai_final": len_xai_final
        })
    df_results = pd.DataFrame(results)
    return df_results
    # df_results.to_csv(f'{path}/results_{classification}_{reorder}.csv', index=False) 

# Prepare datasets and models

In [4]:
def prepare_dataset_model_explainer(dataset_name, dataset_params):
    # Fetch data
    dataset = fetch_data(dataset_name)
    X = dataset.drop('target', axis=1)
    y = dataset['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    
    params = dataset_params[dataset_name]

    # Train model
    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Metrics
    print("Dataset size:", len(X))
    print("Columns:", len(X.columns))
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1-score:", f1_score(y_test, y_pred))

    # Explainer
    explainer = XGBoostExplainer(model, X_train)

    return model, X, y, X_test, y_test, explainer

def prepare_all_datasets(dataset_names, dataset_params):
    context = {}
    for name in dataset_names:
        print(f"\n--- Preparing {name} ---")
        model, X, y, X_test, y_test, explainer = prepare_dataset_model_explainer(name, dataset_params)
        context[name] = (model, X, y, X_test, y_test, explainer)
    return context

In [5]:
dataset_names = [
                "magic", 
                "adult", 
                "mushroom", 
                "spambase",
                 ]

dataset_params = {
    "magic": {"n_estimators": 30, "max_depth": 3},
    "adult": {"n_estimators": 30, "max_depth": 3},
    "mushroom": {"n_estimators": 30, "max_depth": 3},
    "spambase": {"n_estimators": 30, "max_depth": 3},
}

dataset_context = prepare_all_datasets(dataset_names, dataset_params)


--- Preparing magic ---
Dataset size: 19020
Columns: 10
Accuracy: 0.8755695758850333
F1-score: 0.8060109289617486

--- Preparing adult ---
Dataset size: 48842
Columns: 14
Accuracy: 0.8652835596806114
F1-score: 0.9144491635607177

--- Preparing mushroom ---
Dataset size: 8124
Columns: 22
Accuracy: 1.0
F1-score: 1.0

--- Preparing spambase ---
Dataset size: 4601
Columns: 57
Accuracy: 0.9471397538015931
F1-score: 0.9322191272051996


In [6]:
def count_classes(dataset_name):
    model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)
    class_0_count = np.sum(y_pred == 0)
    class_1_count = np.sum(y_pred == 1)
    print(f"Dataset: {dataset_name}, Class 0: {class_0_count}, Class 1: {class_1_count}")
    return class_0_count, class_1_count

count_classes("magic")
count_classes("adult")
count_classes("mushroom")
count_classes("spambase")

Dataset: magic, Class 0: 4052, Class 1: 1654
Dataset: adult, Class 0: 2726, Class 1: 11927
Dataset: mushroom, Class 0: 1263, Class 1: 1175
Dataset: spambase, Class 0: 848, Class 1: 533


(np.int64(848), np.int64(533))

# Check explanations

In [7]:
def get_explain_results(dataset_name, print_exp = False):
    model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)

    # Function to explain a sample of a given class
    def explain_for_class(classification):
        # Get a sample of the specified class in predictions
        indices = X_test[y_pred == classification].index
        if len(indices) == 0:
            print(f"No samples predicted as class {classification}.")
            return
        
        for i in indices:
            sample = X_test.loc[i].values
            margin = model.predict([sample], output_margin=True)[0]
            if abs(margin) > 1.0:
                break
        else:
            print(f"No samples with |margin| > 1.0 for class {classification}.")
            return

        # Initial explanation
        margin = model.predict([sample], output_margin=True)[0]
        print(f"\nClass {classification} - Margin: {margin}, Sample index: {i}")

        xai = explainer.explain_prob(sample)
        if print_exp:
            print("Initial abductive explanation:", xai)
        
        xaiprob_initial = explainer.xai_predicted_margin
        print("Initial predicted margin:", xaiprob_initial)

        # Adjust margin direction
        # increase_prob = -0.01 if classification == 0 else 0.01

        # Explanation with adjusted threshold
        xai_confidence = explainer.explain_prob(sample, target_threshold= (margin / 4))
        xaiprob_final = explainer.xai_predicted_margin
        print("\n0.25% exp len:", len(xai_confidence))
        if print_exp:
            print("0.25% explanation:", xai_confidence)
        print("0.25% predicted margin:", xaiprob_final)

        xai_confidence = explainer.explain_prob(sample, target_threshold= (margin / 2))
        xaiprob_final = explainer.xai_predicted_margin
        print("\n0.5% exp len:", len(xai_confidence))
        if print_exp:
            print("0.5% explanation:", xai_confidence)
        print("0.5% predicted margin:", xaiprob_final)
        
        xai_confidence = explainer.explain_prob(sample, target_threshold= (margin / 1.33333))
        xaiprob_final = explainer.xai_predicted_margin
        print("\n0.75% exp len:", len(xai_confidence))
        if print_exp:
            print("0.75% explanation:", xai_confidence)
        print("0.75% predicted margin:", xaiprob_final)

    # Explain for both classes
    explain_for_class(0)
    explain_for_class(1)


In [8]:
get_explain_results("magic")


Class 0 - Margin: -1.5318019390106201, Sample index: 13519
Initial predicted margin: -0.305143745439

0.25% exp len: 8
0.25% predicted margin: -0.842865952239

0.5% exp len: 8
0.5% predicted margin: -0.842865952239

0.75% exp len: 9
0.75% predicted margin: -1.438409154139

Class 1 - Margin: 5.3488240242004395, Sample index: 14526
Initial predicted margin: 0.298496220521

0.25% exp len: 3
0.25% predicted margin: 3.056817650122

0.5% exp len: 3
0.5% predicted margin: 3.056817650122

0.75% exp len: 4
0.75% predicted margin: 4.514383172382


In [9]:
get_explain_results("adult")


Class 0 - Margin: -1.0879830121994019, Sample index: 3829
Initial predicted margin: -0.201337004746

0.25% exp len: 8
0.25% predicted margin: -0.623709311386

0.5% exp len: 8
0.5% predicted margin: -0.623709311386

0.75% exp len: 10
0.75% predicted margin: -0.875320690366

Class 1 - Margin: 4.173804759979248, Sample index: 30437
Initial predicted margin: 0.063445309674

0.25% exp len: 5
0.25% predicted margin: 1.69386289997

0.5% exp len: 6
0.5% predicted margin: 2.10872710087

0.75% exp len: 7
0.75% predicted margin: 3.172982332594


In [10]:
get_explain_results("mushroom")


Class 0 - Margin: -7.027366638183594, Sample index: 7176
Initial predicted margin: -4.6045556678

0.25% exp len: 3
0.25% predicted margin: -4.6045556678

0.5% exp len: 3
0.5% predicted margin: -4.6045556678

0.75% exp len: 5
0.75% predicted margin: -5.6530501859

Class 1 - Margin: 8.294289588928223, Sample index: 4961
Initial predicted margin: 0.0284762922

0.25% exp len: 4
0.25% predicted margin: 3.9462458302

0.5% exp len: 5
0.5% predicted margin: 5.1567087872

0.75% exp len: 7
0.75% predicted margin: 6.4058943492


In [11]:
get_explain_results("spambase")


Class 0 - Margin: -6.460454940795898, Sample index: 4538
Initial predicted margin: -0.04732684392

0.25% exp len: 13
0.25% predicted margin: -1.68991961052

0.5% exp len: 17
0.5% predicted margin: -3.317425186

0.75% exp len: 19
0.75% predicted margin: -4.8820868495

Class 1 - Margin: 4.380174160003662, Sample index: 1685
Initial predicted margin: 0.1421553674

0.25% exp len: 15
0.25% predicted margin: 1.1584000099

0.5% exp len: 18
0.5% predicted margin: 2.21234623721

0.75% exp len: 21
0.75% predicted margin: 3.2869784274


# Check threshold datasets

## functions

In [None]:
# def get_explain_results_n(dataset_name, n_samples):
#     model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]
#     y_pred = model.predict(X_test)

#     # Prepare result DataFrame
#     results = []

#     # Loop over both classes
#     for classification in [0, 1]:
#         # Get indices for predicted samples of this class
#         indices = X_test[y_pred == classification].index[:n_samples]
#         if len(indices) == 0:
#             print(f"No samples predicted as class {classification}.")
#             continue

#         for idx in indices:
#             sample = X_test.loc[idx].values

#             # Initial explanation
#             margin = model.predict([sample], output_margin=True)[0]
#             xai_initial = explainer.explain_prob(sample)
#             xaiprob_initial = explainer.xai_predicted_margin
#             exp_len_initial = len(xai_initial)

#             # Explanation with adjusted threshold (half the margin)
#             xai_confidence_case1 = explainer.explain_prob(sample, target_threshold=margin / 2)
#             xaiprob_case1 = explainer.xai_predicted_margin
#             exp_len_case1 = len(xai_confidence_case1)
            
#             increase_prob = -0.01 if margin < 0 else 0.01
#             xai_confidence_case2 = explainer.explain_prob(sample, target_threshold= xaiprob_initial + increase_prob)
#             xaiprob_case2 = explainer.xai_predicted_margin
#             exp_len_case2 = len(xai_confidence_case2)

#             # Append to results
#             results.append({
#                 'sample_id': idx,
#                 'class': classification,
#                 'pred_margin': round(margin, 4),
#                 'exp_len_inicial': exp_len_initial,
#                 'xaiprob_inicial': round(xaiprob_initial, 4),
#                 'exp_len_case1': exp_len_case1,
#                 'xaiprob_case1': round(xaiprob_case1, 4),
#                 'exp_len_case2': exp_len_case2,
#                 'xaiprob_case2': round(xaiprob_case2, 4),
#             })

#     # Convert to DataFrame
#     df_results = pd.DataFrame(results)
    
#     return df_results

# def summarize_explanations(df_results):
#     # Agrupar por classe e calcular as métricas
#     summary = df_results.groupby('class').agg(
#         pred_margin_mean=('pred_margin', 'mean'),
#         pred_margin_std=('pred_margin', 'std'),
#         exp_len_inicial_mean=('exp_len_inicial', 'mean'),
#         exp_len_inicial_std=('exp_len_inicial', 'std'),
#         xaiprob_inicial_mean=('xaiprob_inicial', 'mean'),
#         xaiprob_inicial_std=('xaiprob_inicial', 'std'),
#         exp_len_case1_mean=('exp_len_case1', 'mean'),
#         exp_len_case1_std=('exp_len_case1', 'std'),
#         xaiprob_case1_mean=('xaiprob_case1', 'mean'),
#         xaiprob_case1_std=('xaiprob_case1', 'std'),
#         exp_len_case2_mean=('exp_len_case2', 'mean'),
#         exp_len_case2_std=('exp_len_case2', 'std'),
#         xaiprob_case2_mean=('xaiprob_case2', 'mean'),
#         xaiprob_case2_std=('xaiprob_case2', 'std'),
#     ).reset_index()

#     # Função para combinar mean ± std com 2 casas decimais
#     def format_mean_std(mean, std):
#         return f"{mean:.2f} ± {std:.2f}"

#     # Aplicar a formatação em cada par de colunas
#     formatted = pd.DataFrame()
#     formatted['class'] = summary['class']
#     formatted['Pred Margin'] = summary.apply(lambda row: format_mean_std(row['pred_margin_mean'], row['pred_margin_std']), axis=1)
#     formatted['Exp Len Inicial'] = summary.apply(lambda row: format_mean_std(row['exp_len_inicial_mean'], row['exp_len_inicial_std']), axis=1)
#     formatted['MCT Inicial'] = summary.apply(lambda row: format_mean_std(row['xaiprob_inicial_mean'], row['xaiprob_inicial_std']), axis=1)
#     formatted['Exp Len case1'] = summary.apply(lambda row: format_mean_std(row['exp_len_case1_mean'], row['exp_len_case1_std']), axis=1)
#     formatted['MCT case1'] = summary.apply(lambda row: format_mean_std(row['xaiprob_case1_mean'], row['xaiprob_case1_std']), axis=1)
#     formatted['Exp Len case2'] = summary.apply(lambda row: format_mean_std(row['exp_len_case2_mean'], row['exp_len_case2_std']), axis=1)
#     formatted['MCT case2'] = summary.apply(lambda row: format_mean_std(row['xaiprob_case2_mean'], row['xaiprob_case2_std']), axis=1)

#     return formatted


In [50]:
def get_explain_results_n(dataset_name, n_samples):
    model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)

    # Prepare result DataFrame
    results = []

    # Loop over both classes
    for classification in [0, 1]:
        # Get indices for predicted samples of this class
        indices = X_test[y_pred == classification].index[:n_samples]
        if len(indices) == 0:
            print(f"No samples predicted as class {classification}.")
            continue

        for idx in indices:
            sample = X_test.loc[idx].values

            # Initial explanation
            margin = model.predict([sample], output_margin=True)[0]
            xai_initial = explainer.explain_prob(sample)
            xaiprob_initial = explainer.xai_predicted_margin
            exp_len_initial = len(xai_initial)

            # Explanation with adjusted threshold (half the margin)
            xai_confidence_25 = explainer.explain_prob(sample, target_threshold=margin / 4)
            xaiprob_25 = explainer.xai_predicted_margin
            exp_len_25 = len(xai_confidence_25)
            
            # Explanation with adjusted threshold (half the margin)
            xai_confidence_50 = explainer.explain_prob(sample, target_threshold=margin / 2)
            xaiprob_50 = explainer.xai_predicted_margin
            exp_len_50 = len(xai_confidence_50)

            # Explanation with adjusted threshold (half the margin)
            xai_confidence_75 = explainer.explain_prob(sample, target_threshold=margin / 1.3333)
            xaiprob_75 = explainer.xai_predicted_margin
            exp_len_75 = len(xai_confidence_75)

            # Append to results
            results.append({
                'sample_id': idx,
                'class': classification,
                'pred_margin': round(margin, 4),
                'exp_len_inicial': exp_len_initial,
                'xaiprob_inicial': round(xaiprob_initial, 4),
                'exp_len_25': exp_len_25,
                'xaiprob_25': round(xaiprob_25, 4),
                'exp_len_50': exp_len_50,
                'xaiprob_50': round(xaiprob_50, 4),
                'exp_len_75': exp_len_75,
                'xaiprob_75': round(xaiprob_75, 4),
            })

    # Convert to DataFrame
    df_results = pd.DataFrame(results)
    
    return df_results

def summarize_explanations(df_results):
    # Agrupar por classe e calcular as métricas
    summary = df_results.groupby('class').agg(
        pred_margin_mean=('pred_margin', 'mean'),
        pred_margin_std=('pred_margin', 'std'),
        exp_len_inicial_mean=('exp_len_inicial', 'mean'),
        exp_len_inicial_std=('exp_len_inicial', 'std'),
        xaiprob_inicial_mean=('xaiprob_inicial', 'mean'),
        xaiprob_inicial_std=('xaiprob_inicial', 'std'),
        exp_len_25_mean=('exp_len_25', 'mean'),
        exp_len_25_std=('exp_len_25', 'std'),
        xaiprob_25_mean=('xaiprob_25', 'mean'),
        xaiprob_25_std=('xaiprob_25', 'std'),
        exp_len_50_mean=('exp_len_50', 'mean'),
        exp_len_50_std=('exp_len_50', 'std'),
        xaiprob_50_mean=('xaiprob_50', 'mean'),
        xaiprob_50_std=('xaiprob_50', 'std'),
        exp_len_75_mean=('exp_len_75', 'mean'),
        exp_len_75_std=('exp_len_75', 'std'),
        xaiprob_75_mean=('xaiprob_75', 'mean'),
        xaiprob_75_std=('xaiprob_75', 'std'),
    ).reset_index()


    # Função para combinar mean ± std com 2 casas decimais
    def format_mean_std(mean, std):
        return f"{mean:.2f} ± {std:.2f}"

    # Aplicar a formatação em cada par de colunas
    formatted = pd.DataFrame()
    formatted['class'] = summary['class']
    formatted['Pred Margin'] = summary.apply(lambda row: format_mean_std(row['pred_margin_mean'], row['pred_margin_std']), axis=1)
    formatted['Exp Len Inicial'] = summary.apply(lambda row: format_mean_std(row['exp_len_inicial_mean'], row['exp_len_inicial_std']), axis=1)
    formatted['MCT Inicial'] = summary.apply(lambda row: format_mean_std(row['xaiprob_inicial_mean'], row['xaiprob_inicial_std']), axis=1)
    formatted['Exp Len 25'] = summary.apply(lambda row: format_mean_std(row['exp_len_25_mean'], row['exp_len_25_std']), axis=1)
    formatted['MCT 25'] = summary.apply(lambda row: format_mean_std(row['xaiprob_25_mean'], row['xaiprob_25_std']), axis=1)
    formatted['Exp Len 50'] = summary.apply(lambda row: format_mean_std(row['exp_len_50_mean'], row['exp_len_50_std']), axis=1)
    formatted['MCT 50'] = summary.apply(lambda row: format_mean_std(row['xaiprob_50_mean'], row['xaiprob_50_std']), axis=1)
    formatted['Exp Len 75'] = summary.apply(lambda row: format_mean_std(row['exp_len_75_mean'], row['exp_len_75_std']), axis=1)
    formatted['MCT 75'] = summary.apply(lambda row: format_mean_std(row['xaiprob_75_mean'], row['xaiprob_75_std']), axis=1)

    return formatted


# datasets

In [None]:
instances_to_explain = 10

In [52]:
df_saheart = get_explain_results_n("magic", instances_to_explain)
df_summary_saheart = summarize_explanations(df_saheart)
df_summary_saheart

Unnamed: 0,class,Pred Margin,Exp Len Inicial,MCT Inicial,Exp Len 25,MCT 25,Exp Len 50,MCT 50,Exp Len 75,MCT 75
0,0,-2.18 ± 1.00,6.40 ± 1.35,-0.28 ± 0.31,6.90 ± 1.29,-0.89 ± 0.42,7.70 ± 1.06,-1.33 ± 0.68,8.40 ± 0.70,-1.92 ± 0.91
1,1,2.19 ± 1.98,3.40 ± 0.70,0.18 ± 0.13,4.20 ± 0.63,0.92 ± 0.86,4.50 ± 0.71,1.41 ± 1.20,5.10 ± 0.74,1.86 ± 1.65


In [53]:
df_adult = get_explain_results_n("adult", instances_to_explain)
df_summary_adult = summarize_explanations(df_adult)
df_summary_adult

Unnamed: 0,class,Pred Margin,Exp Len Inicial,MCT Inicial,Exp Len 25,MCT 25,Exp Len 50,MCT 50,Exp Len 75,MCT 75
0,0,-1.28 ± 1.45,8.10 ± 2.28,-0.16 ± 0.18,8.60 ± 2.01,-0.47 ± 0.44,9.40 ± 1.78,-0.72 ± 0.74,10.50 ± 1.18,-1.04 ± 1.12
1,1,2.16 ± 1.32,4.60 ± 1.26,0.08 ± 0.04,5.20 ± 0.92,0.92 ± 0.54,5.60 ± 1.07,1.24 ± 0.67,6.50 ± 0.85,1.70 ± 1.00


In [54]:
df_mushroom = get_explain_results_n("mushroom", instances_to_explain)
df_summary_mushroom = summarize_explanations(df_mushroom)
df_summary_mushroom

Unnamed: 0,class,Pred Margin,Exp Len Inicial,MCT Inicial,Exp Len 25,MCT 25,Exp Len 50,MCT 50,Exp Len 75,MCT 75
0,0,-6.24 ± 2.11,3.60 ± 1.43,-2.89 ± 2.22,4.00 ± 1.76,-3.55 ± 1.73,4.30 ± 2.11,-3.77 ± 1.41,6.60 ± 1.65,-4.82 ± 1.63
1,1,6.75 ± 1.68,2.90 ± 0.88,0.69 ± 0.61,4.10 ± 0.57,2.49 ± 1.10,5.50 ± 0.85,3.95 ± 1.16,7.30 ± 1.49,5.35 ± 1.33


In [55]:
df_sonar = get_explain_results_n("spambase", instances_to_explain)
df_summary_sonar = summarize_explanations(df_sonar)
df_summary_sonar

Unnamed: 0,class,Pred Margin,Exp Len Inicial,MCT Inicial,Exp Len 25,MCT 25,Exp Len 50,MCT 50,Exp Len 75,MCT 75
0,0,-3.31 ± 1.62,13.30 ± 2.87,-0.05 ± 0.05,15.40 ± 2.27,-0.89 ± 0.41,16.70 ± 1.70,-1.73 ± 0.82,18.50 ± 1.27,-2.54 ± 1.20
1,1,3.63 ± 0.98,12.00 ± 1.94,0.06 ± 0.04,14.20 ± 1.14,0.97 ± 0.26,16.50 ± 1.51,1.87 ± 0.47,18.60 ± 1.43,2.78 ± 0.72


# Margin - Anchor - Lime - Explainer

In [19]:
def generate_samples_from_explanation(X, explanation, base_sample, n_samples=100):
    feature_names = [str(cond.arg(0)) for cond in explanation]
    explained_features = set(feature_names)

    # Gera amostras aleatórias
    synthetic_samples = X.sample(n=n_samples, replace=True).reset_index(drop=True)

    # Substitui valores das features explicadas pelos valores da base_sample
    for feature in explained_features:
        synthetic_samples[feature] = base_sample[feature]

    return synthetic_samples


In [52]:
def check_margin_similar_samples(dataset_name, explainer_name, n_per_class=5, n_synthetic=100):
    model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]

    results = []

    for target_class in [0, 1]:
        count = 0
        for i in range(len(X_test)):
            if count >= n_per_class:
                break

            sample = X_test.iloc[i]
            pred = model.predict([sample])[0]
            if pred != target_class:
                continue

            if explainer_name == "anchor":
                #anchor exp
                pass
            elif explainer_name == "lime":
                # lime exp
                pass
            elif explainer_name == "explainer":
                explanation = explainer.explain(sample, reorder="asc")
            elif explainer_name == "explainer_prob":
                margin = model.predict([sample], output_margin=True)[0]
                explanation = explainer.explain_prob(sample, reorder="asc", target_threshold=margin / 2)

            synthetic_df = generate_samples_from_explanation(X, explanation, sample, n_samples=n_synthetic)
            margins = model.predict(synthetic_df, output_margin=True)

            results.append({
                "original_class": target_class,
                "mean_margin": np.mean(margins),
                "min_margin": np.min(margins),
                "max_margin": np.max(margins),
                "explainer": explainer_name,
            })

            count += 1
    
    results = pd.DataFrame(results)

    summary = results.groupby("original_class").agg({
        "mean_margin": "mean",
        "min_margin": "mean",  # média dos mínimos
        "max_margin": "mean"   # média dos máximos
    }).reset_index()

    summary.columns = ["class", "avg_mean_margin", "avg_min_margin", "avg_max_margin"]

    return summary


## magic

In [57]:
# df_results = check_margin_similar_samples("magic", "anchor")
# df_results

In [58]:
# df_results = check_margin_similar_samples("magic", "lime")
# df_results

In [59]:
df_results = check_margin_similar_samples("magic", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-2.285163,-3.097516,-0.991033
1,1,2.111,0.317251,5.645457


In [60]:
df_results = check_margin_similar_samples("magic", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-2.34782,-2.748309,-1.438401
1,1,3.084973,1.469171,5.575043


## adult

In [61]:
# df_results = check_margin_similar_samples("adult", "anchor")
# df_results

In [62]:
# df_results = check_margin_similar_samples("adult", "lime")
# df_results

In [63]:
df_results = check_margin_similar_samples("adult", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-1.30076,-1.810563,-0.752324
1,1,2.71532,0.448645,5.117767


In [64]:
df_results = check_margin_similar_samples("adult", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-1.386895,-1.744472,-0.983192
1,1,2.710778,1.266646,4.813052


## mushroom

In [65]:
# df_results = check_margin_similar_samples("mushroom", "anchor")
# df_results

In [66]:
# df_results = check_margin_similar_samples("mushroom", "lime")
# df_results

In [67]:
df_results = check_margin_similar_samples("mushroom", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-6.253671,-7.884968,-4.884665
1,1,5.768359,2.51256,7.677604


In [68]:
df_results = check_margin_similar_samples("mushroom", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-6.284242,-7.838942,-4.838922
1,1,6.375678,4.408938,7.594402


## spambase

In [69]:
# df_results = check_margin_similar_samples("spambase", "anchor")
# df_results

In [70]:
# df_results = check_margin_similar_samples("spambase", "lime")
# df_results

In [71]:
df_results = check_margin_similar_samples("spambase", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-3.900222,-7.022644,-1.25408
1,1,3.77184,2.247439,5.551502


In [72]:
df_results = check_margin_similar_samples("spambase", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-4.635751,-7.269703,-3.002661
1,1,4.056298,2.830221,5.517482
