# Imports + class

In [8]:
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from z3 import *
from xgboost import XGBClassifier
from pmlb import fetch_data
set_option(rational_to_decimal=True)

import re
import time
import pandas as pd
from sklearn.model_selection import train_test_split
np.set_printoptions(suppress=True)

In [9]:
class XGBoostExplainer:
    def __init__(self, model, data):
        self.model = model
        self.data = data.values
        self.columns = data.columns
        self.max_categories = 2

        set_option(rational_to_decimal=True)
        self.categoric_features = self.get_categoric_features(self.data)
        self.T_model = self.model_trees_expression(self.model)
        self.T = self.T_model

    def explain(self, instance, reorder="asc"):
        self.I = self.instance_expression(instance)
        self.D, self.D_add = self.decision_function_expression(self.model, [instance])
        return self.explain_expression(self.I, And(self.T, self.D_add), self.D, self.model, reorder)

    def explain_prob(self, instance, reorder="asc", threshold_margin=0, target_threshold=None):
        try:
            self.I = self.instance_expression(instance)
            self.D, self.D_add = self.decision_function_expression(self.model, [instance])
            return self.explain_expression_prob(self.I, And(self.T, self.D_add), self.D, self.model, reorder, threshold_margin, target_threshold)
        except Exception as e:
            print("Error in explain_prob:", e)
            return []

    def get_categoric_features(self, data: np.ndarray):
        categoric_features = []
        for i in range(data.shape[1]):
            feature_values = data[:, i]
            unique_values = np.unique(feature_values)
            if len(unique_values) <= self.max_categories:
                categoric_features.append(self.columns[i])

        return categoric_features

    def feature_constraints(self, constraints=[]):
        """TODO
        esperado receber limites das features pelo usuário
        formato previso: matriz/dataframe [feaature, min/max, valor]
        constraaint_expression = "constraaint_df_to_feature()"
        """
        return

    def model_trees_expression(self, model):
        df = model.get_booster().trees_to_dataframe()
        if model.get_booster().feature_names == None:
            feature_map = {f"f{i}": col for i, col in enumerate(self.columns)}
            df["Feature"] = df["Feature"].replace(feature_map)

        df["Split"] = df["Split"].round(4)
        self.booster_df = df
        class_index = 0  # if model.n_classes_ == 2:
        all_tree_formulas = []

        for tree_index in df["Tree"].unique():
            tree_df = df[df["Tree"] == tree_index]
            o = Real(f"o_{tree_index}_{class_index}")

            if len(tree_df) == 1 and tree_df.iloc[0]["Feature"] == "Leaf":
                leaf_value = tree_df.iloc[0]["Gain"]
                all_tree_formulas.append(And(o == leaf_value))
                continue
            path_formulas = []

            def get_conditions(node_id):
                conditions = []
                current_node = tree_df[tree_df["ID"] == node_id]
                if current_node.empty:
                    return conditions

                parent_node = tree_df[
                    (tree_df["Yes"] == node_id) | (tree_df["No"] == node_id)
                ]
                if not parent_node.empty:
                    parent_data = parent_node.iloc[0]
                    feature = parent_data["Feature"]
                    split_value = parent_data["Split"]
                    x = Real(feature)
                    if parent_data["Yes"] == node_id:
                        conditions.append(x < split_value)
                    else:
                        conditions.append(x >= split_value)
                    conditions = get_conditions(parent_data["ID"]) + conditions

                return conditions

            for _, node in tree_df[tree_df["Feature"] == "Leaf"].iterrows():
                leaf_value = node["Gain"]
                leaf_id = node["ID"]
                conditions = get_conditions(leaf_id)
                path_formula = And(*conditions)
                implication = Implies(path_formula, o == leaf_value)
                path_formulas.append(implication)

            all_tree_formulas.append(And(*path_formulas))
        return And(*all_tree_formulas)

    def get_init_value(self, model, x, estimator_variables):
        estimator_pred = Solver()
        estimator_pred.add(self.I)
        estimator_pred.add(self.T)
        if estimator_pred.check() == sat:
            solvermodel = estimator_pred.model()
            total_sum = sum(
                float(solvermodel.eval(var).as_fraction()) for var in estimator_variables
            )
        else:
            total_sum = 0
            print("estimator error")
        self.predicted_margin = model.predict(x, output_margin=True)[0]
        init_value = self.predicted_margin - total_sum
        self.init_value = init_value
        return init_value

    def decision_function_expression(self, model, x):
        n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_
        predicted_class = model.predict(x)[0]
        self.predicted_class = predicted_class
        n_estimators = int(len(model.get_booster().get_dump()) / n_classes)
        estimator_variables = [Real(f"o_{i}_0") for i in range(n_estimators)] # _0 only for binary classification
        self.estimator_variables = estimator_variables
        init_value = self.get_init_value(model, x, estimator_variables)
        # print("init:", round(init_value, 2))

        equation_list = []

        estimator_sum = Real("estimator_sum")
        equation_o = estimator_sum == Sum(estimator_variables)
        equation_list.append(equation_o)

        decision = Real("decision")
        equation_list.append(decision == estimator_sum + init_value)

        if predicted_class == 0:
            final_equation = decision < 0
        else:
            final_equation = decision > 0

        return final_equation, And(equation_list)

    def instance_expression(self, instance):
        formula = [Real(self.columns[i]) == value for i, value in enumerate(instance)]
        return formula

    def explain_expression(self, I, T_s, D_s, model, reorder):
        i_expression = I.copy()

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]

        for feature in i_expression.copy():
            # print("\n---removed", feature)
            i_expression.remove(feature)

            # prove(Implies(And(And(i_expression), T), D))
            if self.is_proved(Implies(And(And(i_expression), T_s), D_s)):
                continue
                # print('proved')
            else:
                # print('not proved')
                i_expression.append(feature)
        # print(self.is_proved(Implies(And(And(i_expression), T_s), D_s)))
        return i_expression

    def explain_expression_prob(self, I, T_s, D_s, model, reorder, threshold_margin, target_threshold):
        i_expression = I.copy()

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]

        threshold = 0

        if target_threshold:
            threshold = target_threshold
        elif threshold_margin != 0:
            threshold = self.predicted_margin * threshold_margin/100
            # print("margin:", self.predicted_margin, "accepted margin:", threshold)
        self.xai_predicted_margin = self.predicted_margin

        for feature in i_expression.copy():
            # print("\n---removed", feature)
            i_expression.remove(feature)

            if self.is_proved_sat(And(And(i_expression), T_s), threshold):
                # print('proved')
                continue
            else:
                # print('not proved -- added back')
                i_expression.append(feature)
        return i_expression


    def is_proved(self, decision_exp):
        s = Solver()
        s.add(Not(decision_exp))
        if s.check() == unsat:
            return True
        else:
            # print(s.model())
            return False

    def is_proved_sat(self, decision_exp, threshold):
      decision = Real("decision")

      debug = Real("debug") == 0
      predicted_class = self.predicted_class

      if predicted_class == 0:
        estmax = Optimize()
        estmax.add(decision_exp)
        estmax.add(debug)
        maxvalue = estmax.maximize(decision)
        if estmax.check() == sat:
            # print("\nmax sat", maxvalue.value())
            try:
              if float(maxvalue.value().as_fraction()) > threshold:
                  return False # can change class
              else:
                  self.xai_predicted_margin = float(maxvalue.value().as_fraction())
            except:
              print("error max =", maxvalue.value())
              return False
        else:
            print("error")

      if predicted_class == 1:
        estmin = Optimize()
        estmin.add(decision_exp)
        estmin.add(debug)
        minvalue = estmin.minimize(decision)
        if estmin.check() == sat:
            # print("\nmin sat", minvalue.value())
            try:
              if float(minvalue.value().as_fraction()) < threshold:
                  return False # can change class
              else:
                  self.xai_predicted_margin = float(minvalue.value().as_fraction())
            except:
              print("error min =", minvalue.value())
              return False
        else:
            print("error")

      if predicted_class == 0:
        self.solvermodel = estmax.model()
      if predicted_class == 1:
        self.solvermodel = estmin.model()
      return True
  
def generate_results(explainer, X_test, y_pred, classification, path, reorder="asc"):
    results = []
    if classification == 0:
        increase_prob = -0.01
    else:
        increase_prob = 0.01
    for i in X_test[y_pred == classification].index:
        sample = X_test.loc[i].values
        xai = explainer.explain_prob(sample, reorder=reorder)
        xaiprob_initial = explainer.xai_predicted_margin
        len_xai_initial = len(xai)

        xai = explainer.explain_prob(sample, reorder=reorder, target_threshold=xaiprob_initial + increase_prob)
        xaiprob_final = explainer.xai_predicted_margin
        len_xai_final = len(xai)

        if round(xaiprob_initial, 2) == round(xaiprob_final, 2):
            len_xai_initial = len_xai_final
        results.append({
            "index": i,
            "class": classification,
            "xaiprob_initial": round(xaiprob_initial, 2),
            "len_xai_initial": len_xai_initial,
            "xaiprob_final": round(xaiprob_final, 2),
            "len_xai_final": len_xai_final
        })
    df_results = pd.DataFrame(results)
    return df_results
    # df_results.to_csv(f'{path}/results_{classification}_{reorder}.csv', index=False) 

# Prepare datasets and models

In [16]:
def prepare_dataset_model_explainer(dataset_name, dataset_params):
    # Fetch data
    dataset = fetch_data(dataset_name)
    X = dataset.drop('target', axis=1)
    y = dataset['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    
    params = dataset_params[dataset_name]

    # Train model
    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Metrics
    print("Dataset size:", len(X))
    print("Columns:", len(X.columns))
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1-score:", f1_score(y_test, y_pred))

    # Explainer
    explainer = XGBoostExplainer(model, X_train)

    return model, X, y, X_test, y_test, explainer

def prepare_all_datasets(dataset_names, dataset_params):
    context = {}
    for name in dataset_names:
        print(f"\n--- Preparing {name} ---")
        model, X, y, X_test, y_test, explainer = prepare_dataset_model_explainer(name, dataset_params)
        context[name] = (model, X, y, X_test, y_test, explainer)
    return context

In [17]:
dataset_names = [
                "magic", 
                "adult", 
                "mushroom", 
                "spambase",
                 ]

dataset_params = {
    "magic": {"n_estimators": 30, "max_depth": 3},
    "adult": {"n_estimators": 30, "max_depth": 3},
    "mushroom": {"n_estimators": 30, "max_depth": 3},
    "spambase": {"n_estimators": 30, "max_depth": 3},
}

dataset_context = prepare_all_datasets(dataset_names, dataset_params)


--- Preparing magic ---
Dataset size: 19020
Columns: 10
Accuracy: 0.8755695758850333
F1-score: 0.8060109289617486

--- Preparing adult ---
Dataset size: 48842
Columns: 14
Accuracy: 0.8652835596806114
F1-score: 0.9144491635607177

--- Preparing mushroom ---
Dataset size: 8124
Columns: 22
Accuracy: 1.0
F1-score: 1.0

--- Preparing spambase ---
Dataset size: 4601
Columns: 57
Accuracy: 0.9471397538015931
F1-score: 0.9322191272051996


In [18]:
def count_classes(dataset_name):
    model, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)
    class_0_count = np.sum(y_pred == 0)
    class_1_count = np.sum(y_pred == 1)
    print(f"Dataset: {dataset_name}, Class 0: {class_0_count}, Class 1: {class_1_count}")
    return class_0_count, class_1_count

count_classes("magic")
count_classes("adult")
count_classes("mushroom")
count_classes("spambase")

ValueError: too many values to unpack (expected 4)

# Check explanations

In [63]:
def get_explain_results(dataset_name):
    model, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)

    # Function to explain a sample of a given class
    def explain_for_class(classification):
        # Get a sample of the specified class in predictions
        indices = X_test[y_pred == classification].index
        if len(indices) == 0:
            print(f"No samples predicted as class {classification}.")
            return
        
        for i in indices:
            sample = X_test.loc[i].values
            margin = model.predict([sample], output_margin=True)[0]
            if abs(margin) > 1.0:
                break
        else:
            print(f"No samples with |margin| > 1.0 for class {classification}.")
            return

        # Initial explanation
        margin = model.predict([sample], output_margin=True)[0]
        print(f"\nClass {classification} - Margin: {margin}, Sample index: {i}")

        xai = explainer.explain_prob(sample)
        print("Initial abductive explanation:", xai)
        
        xaiprob_initial = explainer.xai_predicted_margin
        print("Initial predicted margin:", xaiprob_initial)

        # Adjust margin direction
        # increase_prob = -0.01 if classification == 0 else 0.01

        # Explanation with adjusted threshold
        xai_confidence = explainer.explain_prob(sample, target_threshold= (margin / 2))
        xaiprob_final = explainer.xai_predicted_margin
        print("Confidence-aware abductive explanation:", xai_confidence)
        print("Final predicted margin:", xaiprob_final)

    # Explain for both classes
    explain_for_class(0)
    explain_for_class(1)


In [20]:
get_explain_results("magic")


Class 0 - Margin: -1.5318019390106201, Sample index: 13519
Initial abductive explanation: [FConc1 == 0.2305, FSize == 2.4793, FDist == 54.4322, FM3Long == 24.5614, FWidth == 16.14, FLength == 26.7476, FAlpha == 39.463]
Initial predicted margin: -0.305143745439
Confidence-aware abductive explanation: [FConc1 == 0.2305, FConc == 0.4378, FSize == 2.4793, FDist == 54.4322, FM3Long == 24.5614, FWidth == 16.14, FLength == 26.7476, FAlpha == 39.463]
Final predicted margin: -0.842865952239

Class 1 - Margin: 5.3488240242004395, Sample index: 14526
Initial abductive explanation: [FDist == 258.2984, FLength == 37.6176, FAlpha == 70.2243]
Initial predicted margin: 0.298496220521
Confidence-aware abductive explanation: [FSize == 2.9171, FWidth == 7.7810000000?, FAlpha == 70.2243]
Final predicted margin: 3.056817650122


In [66]:
get_explain_results("adult")


Class 0 - Margin: -1.0879830121994019, Sample index: 3829
Initial abductive explanation: [capital-loss == 0, occupation == 4, age == 40, hours-per-week == 46, capital-gain == 0, education-num == 14, relationship == 0]
Initial predicted margin: -0.201337004746
Confidence-aware abductive explanation: [capital-loss == 0, occupation == 4, age == 40, hours-per-week == 46, capital-gain == 0, marital-status == 2, education-num == 14, relationship == 0]
Final predicted margin: -0.623709311386

Class 1 - Margin: 4.173804759979248, Sample index: 30437
Initial abductive explanation: [occupation == 8, age == 28, capital-gain == 0, education-num == 9, relationship == 2]
Initial predicted margin: 0.063445309674
Confidence-aware abductive explanation: [capital-loss == 0, hours-per-week == 40, capital-gain == 0, marital-status == 4, education-num == 9, relationship == 2]
Final predicted margin: 2.10872710087


In [22]:
get_explain_results("mushroom")


Class 0 - Margin: -7.027366638183594, Sample index: 7176
Initial abductive explanation: [odor == 6, gill-size == 0, spore-print-color == 1]
Initial predicted margin: -4.6045556678
Confidence-aware abductive explanation: [odor == 6, gill-size == 0, spore-print-color == 1]
Final predicted margin: -4.6045556678

Class 1 - Margin: 8.294289588928223, Sample index: 4961
Initial abductive explanation: [odor == 4, spore-print-color == 3]
Initial predicted margin: 0.0284762922
Confidence-aware abductive explanation: [odor == 4, gill-spacing == 0, gill-size == 0, veil-color == 2, spore-print-color == 3]
Final predicted margin: 5.1567087872


In [32]:
get_explain_results("spambase")


Class 0 - Margin: -6.460454940795898, Sample index: 4538
Initial abductive explanation: [27 == 0, 22 == 0, 54 == 1.47, 15 == 0, 45 == 3.03, 4 == 0, 55 == 5, 6 == 0, 51 == 0, 52 == 0]
Initial predicted margin: -0.04732684392
Confidence-aware abductive explanation: [44 == 3.03, 56 == 25, 16 == 0, 41 == 3.03, 27 == 0, 36 == 3.03, 22 == 0, 54 == 1.47, 15 == 0, 26 == 0, 45 == 3.03, 4 == 0, 23 == 0, 55 == 5, 6 == 0, 51 == 0, 52 == 0]
Final predicted margin: -3.317425186

Class 1 - Margin: 4.380174160003662, Sample index: 1685
Initial abductive explanation: [56 == 886, 41 == 0, 22 == 0.53, 25 == 0, 54 == 19.26, 15 == 0.53, 26 == 0, 45 == 0, 4 == 0.53, 23 == 0.26, 20 == 0, 24 == 0, 55 == 107, 51 == 0.555]
Initial predicted margin: 0.1421553674
Confidence-aware abductive explanation: [42 == 0, 28 == 0, 11 == 0.26, 44 == 0, 56 == 886, 41 == 0, 36 == 0, 25 == 0, 54 == 19.26, 15 == 0.53, 26 == 0, 45 == 0, 4 == 0.53, 23 == 0.26, 20 == 0, 24 == 0, 55 == 107, 51 == 0.555]
Final predicted margin: 2.2

# Check threshold datasets

## functions

In [33]:
def get_explain_results_n(dataset_name, n_samples):
    model, X_test, y_test, explainer = dataset_context[dataset_name]
    y_pred = model.predict(X_test)

    # Prepare result DataFrame
    results = []

    # Loop over both classes
    for classification in [0, 1]:
        # Get indices for predicted samples of this class
        indices = X_test[y_pred == classification].index[:n_samples]
        if len(indices) == 0:
            print(f"No samples predicted as class {classification}.")
            continue

        for idx in indices:
            sample = X_test.loc[idx].values

            # Initial explanation
            margin = model.predict([sample], output_margin=True)[0]
            xai_initial = explainer.explain_prob(sample)
            xaiprob_initial = explainer.xai_predicted_margin
            exp_len_initial = len(xai_initial)

            # Explanation with adjusted threshold (half the margin)
            xai_confidence_case1 = explainer.explain_prob(sample, target_threshold=margin / 2)
            xaiprob_case1 = explainer.xai_predicted_margin
            exp_len_case1 = len(xai_confidence_case1)
            
            increase_prob = -0.01 if margin < 0 else 0.01
            xai_confidence_case2 = explainer.explain_prob(sample, target_threshold= xaiprob_initial + increase_prob)
            xaiprob_case2 = explainer.xai_predicted_margin
            exp_len_case2 = len(xai_confidence_case2)

            # Append to results
            results.append({
                'sample_id': idx,
                'class': classification,
                'pred_margin': round(margin, 4),
                'exp_len_inicial': exp_len_initial,
                'xaiprob_inicial': round(xaiprob_initial, 4),
                'exp_len_case1': exp_len_case1,
                'xaiprob_case1': round(xaiprob_case1, 4),
                'exp_len_case2': exp_len_case2,
                'xaiprob_case2': round(xaiprob_case2, 4),
            })

    # Convert to DataFrame
    df_results = pd.DataFrame(results)
    
    return df_results

def summarize_explanations(df_results):
    # Agrupar por classe e calcular as métricas
    summary = df_results.groupby('class').agg(
        pred_margin_mean=('pred_margin', 'mean'),
        pred_margin_std=('pred_margin', 'std'),
        exp_len_inicial_mean=('exp_len_inicial', 'mean'),
        exp_len_inicial_std=('exp_len_inicial', 'std'),
        xaiprob_inicial_mean=('xaiprob_inicial', 'mean'),
        xaiprob_inicial_std=('xaiprob_inicial', 'std'),
        exp_len_case1_mean=('exp_len_case1', 'mean'),
        exp_len_case1_std=('exp_len_case1', 'std'),
        xaiprob_case1_mean=('xaiprob_case1', 'mean'),
        xaiprob_case1_std=('xaiprob_case1', 'std'),
        exp_len_case2_mean=('exp_len_case2', 'mean'),
        exp_len_case2_std=('exp_len_case2', 'std'),
        xaiprob_case2_mean=('xaiprob_case2', 'mean'),
        xaiprob_case2_std=('xaiprob_case2', 'std'),
    ).reset_index()

    # Função para combinar mean ± std com 2 casas decimais
    def format_mean_std(mean, std):
        return f"{mean:.2f} ± {std:.2f}"

    # Aplicar a formatação em cada par de colunas
    formatted = pd.DataFrame()
    formatted['class'] = summary['class']
    formatted['Pred Margin'] = summary.apply(lambda row: format_mean_std(row['pred_margin_mean'], row['pred_margin_std']), axis=1)
    formatted['Exp Len Inicial'] = summary.apply(lambda row: format_mean_std(row['exp_len_inicial_mean'], row['exp_len_inicial_std']), axis=1)
    formatted['ECM Inicial'] = summary.apply(lambda row: format_mean_std(row['xaiprob_inicial_mean'], row['xaiprob_inicial_std']), axis=1)
    formatted['Exp Len case1'] = summary.apply(lambda row: format_mean_std(row['exp_len_case1_mean'], row['exp_len_case1_std']), axis=1)
    formatted['ECM case1'] = summary.apply(lambda row: format_mean_std(row['xaiprob_case1_mean'], row['xaiprob_case1_std']), axis=1)
    formatted['Exp Len case2'] = summary.apply(lambda row: format_mean_std(row['exp_len_case2_mean'], row['exp_len_case2_std']), axis=1)
    formatted['ECM case2'] = summary.apply(lambda row: format_mean_std(row['xaiprob_case2_mean'], row['xaiprob_case2_std']), axis=1)

    return formatted


# datasets

In [25]:
df_saheart = get_explain_results_n("magic", 100)
df_summary_saheart = summarize_explanations(df_saheart)
df_summary_saheart

Unnamed: 0,class,Pred Margin,Exp Len Inicial,ECM Inicial,Exp Len case1,ECM case1,Exp Len case2,ECM case2
0,0,-2.00 ± 1.03,6.48 ± 1.01,-0.25 ± 0.27,7.68 ± 0.89,-1.20 ± 0.59,7.12 ± 1.03,-0.52 ± 0.37
1,1,2.32 ± 1.61,3.52 ± 1.12,0.39 ± 0.43,4.38 ± 1.00,1.45 ± 0.99,4.43 ± 1.45,0.72 ± 0.51


In [46]:
df_adult = get_explain_results_n("adult", 100)
df_summary_adult = summarize_explanations(df_adult)
df_summary_adult

Unnamed: 0,class,Pred Margin,Exp Len Inicial,ECM Inicial,Exp Len case1,ECM case1,Exp Len case2,ECM case2
0,0,-1.79 ± 1.64,7.35 ± 2.46,-0.19 ± 0.16,8.89 ± 1.94,-1.03 ± 0.90,8.16 ± 2.65,-0.43 ± 0.30
1,1,2.65 ± 1.60,4.86 ± 1.02,0.33 ± 0.32,5.60 ± 0.86,1.53 ± 0.90,5.40 ± 1.52,0.80 ± 0.54


In [47]:
df_mushroom = get_explain_results_n("mushroom", 100)
df_summary_mushroom = summarize_explanations(df_mushroom)
df_summary_mushroom

Unnamed: 0,class,Pred Margin,Exp Len Inicial,ECM Inicial,Exp Len case1,ECM case1,Exp Len case2,ECM case2
0,0,-6.56 ± 1.48,3.21 ± 1.03,-2.88 ± 2.10,4.01 ± 1.80,-4.04 ± 1.05,4.14 ± 0.85,-3.87 ± 1.70
1,1,7.12 ± 1.52,3.30 ± 1.71,0.56 ± 0.60,6.03 ± 1.62,4.08 ± 1.01,4.29 ± 1.68,1.62 ± 0.59


In [48]:
df_sonar = get_explain_results_n("spambase", 100)
df_summary_sonar = summarize_explanations(df_sonar)
df_summary_sonar

Unnamed: 0,class,Pred Margin,Exp Len Inicial,ECM Inicial,Exp Len case1,ECM case1,Exp Len case2,ECM case2
0,0,-3.70 ± 1.49,12.45 ± 2.37,-0.10 ± 0.09,16.74 ± 1.82,-1.91 ± 0.75,13.01 ± 2.44,-0.22 ± 0.14
1,1,3.33 ± 1.47,13.25 ± 2.39,0.08 ± 0.07,17.45 ± 1.45,1.74 ± 0.73,13.51 ± 2.36,0.19 ± 0.12


# Test robustness noise

In [103]:
def get_robustness_results_n(dataset_name, n_samples, n_extra_samples, target_class=1,
                              mult_margin=0):
    model, X_test, y_test, explainer = dataset_context[dataset_name]

    dataset = fetch_data(dataset_name)
    X = dataset.drop('target', axis=1)
    
    # Armazena os resultados em lista
    results = []

    count = 0
    for i in range(len(X_test)):
        if count >= n_samples:
            break

        sample = X_test.iloc[i]
        pred_class = model.predict([sample])[0]
        if pred_class != target_class:
            continue

        margin = model.predict([sample], output_margin=True)[0]
        explanation = explainer.explain_prob(sample, reorder="asc", target_threshold=margin * mult_margin)

        satisfying_df = generate_samples_for_conditions(X, explanation, n_samples=n_extra_samples)
        noisy_df = apply_noise_to_samples(X, satisfying_df)
        noisy_pred = model.predict(noisy_df)

        class_0 = np.sum(noisy_pred == 0)
        class_1 = np.sum(noisy_pred == 1)

        results.append({
            "amostra": i,
            "classe_prevista": int(pred_class),
            "classe_0_com_ruido": int(class_0),
            "classe_1_com_ruido": int(class_1),
        })

        count += 1

    # Converte para DataFrame
    results_df = pd.DataFrame(results)

    # Cálculo de média ± desvio padrão para cada coluna
    mean_0 = results_df["classe_0_com_ruido"].mean()
    std_0 = results_df["classe_0_com_ruido"].std()

    mean_1 = results_df["classe_1_com_ruido"].mean()
    std_1 = results_df["classe_1_com_ruido"].std()

    mean_results = pd.DataFrame([{
        "explanation class": int(pred_class),
        "noise samples": n_extra_samples,
        "classified 0": f"{mean_0:.2f} ± {std_0:.2f}",
        "classified 1": f"{mean_1:.2f} ± {std_1:.2f}"
    }])

    return mean_results


def generate_samples_for_conditions(df, conditions, n_samples=10, random_state=42):
    """
    Gera amostras aleatórias que atendem a um conjunto de condições, variando apenas as outras features.
    """
    np.random.seed(random_state)
    fixed_values = {str(cond.arg(0)): float(cond.arg(1).as_fraction()) for cond in conditions}

    df_variation = df.drop(columns=fixed_values.keys())
    samples = {col: np.random.uniform(df[col].min(), df[col].max(), n_samples) for col in df_variation.columns}
    samples = {key: np.round(value, 2) for key, value in samples.items()}

    for feature, value in fixed_values.items():
        samples[feature] = [value] * n_samples

    generated_df = pd.DataFrame(samples)
    return generated_df[df.columns]


def apply_noise_to_samples(X, df):
    """
    Aplica ruído proporcional aos valores originais de cada amostra.
    O noise_level controla a intensidade do ruído (ex: 0.05 = ±5% do valor original).
    """
    noise_level=0.1
    df_noisy = df.copy()
    for col in df.columns:
        original = df[col]
        noise = original * np.random.uniform(-noise_level, noise_level, size=len(df))
        df_noisy[col] = original + noise
    return df_noisy


In [104]:
configs = [
    {"target_class": 0, "mult_margin": 0},
    {"target_class": 0, "mult_margin": 0.5},
    {"target_class": 1, "mult_margin": 0},
    {"target_class": 1, "mult_margin": 0.5},
]

In [106]:
all_results = []
for config in configs:
    df = get_robustness_results_n(
        "magic", 
        n_samples=100, 
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    df["exp. threshold"] = "0%" if config["mult_margin"] == 0 else "50%"
    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,noise samples,classified 0,classified 1,exp. threshold
0,0,100,90.35 ± 13.31,9.65 ± 13.31,0%
1,0,100,91.97 ± 12.70,8.03 ± 12.70,50%
2,1,100,1.80 ± 5.31,98.20 ± 5.31,0%
3,1,100,1.98 ± 6.10,98.02 ± 6.10,50%


In [107]:
all_results = []
for config in configs:
    df = get_robustness_results_n(
        "adult", 
        n_samples=100,
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    df["exp. threshold"] = "0%" if config["mult_margin"] == 0 else "50%"
    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,noise samples,classified 0,classified 1,exp. threshold
0,0,100,66.61 ± 29.65,33.39 ± 29.65,0%
1,0,100,69.13 ± 28.33,30.87 ± 28.33,50%
2,1,100,0.29 ± 1.12,99.71 ± 1.12,0%
3,1,100,0.08 ± 0.42,99.92 ± 0.42,50%


In [108]:
all_results = []
for config in configs:
    df = get_robustness_results_n(
        "mushroom", 
        n_samples=100,
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    df["exp. threshold"] = "0%" if config["mult_margin"] == 0 else "50%"
    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,noise samples,classified 0,classified 1,exp. threshold
0,0,100,88.75 ± 21.40,11.25 ± 21.40,0%
1,0,100,89.55 ± 19.89,10.45 ± 19.89,50%
2,1,100,33.14 ± 30.43,66.86 ± 30.43,0%
3,1,100,31.45 ± 28.81,68.55 ± 28.81,50%


In [109]:
all_results = []
for config in configs:
    df = get_robustness_results_n(
        "spambase", 
        n_samples=100,
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    df["exp. threshold"] = "0%" if config["mult_margin"] == 0 else "50%"
    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,noise samples,classified 0,classified 1,exp. threshold
0,0,100,100.00 ± 0.00,0.00 ± 0.00,0%
1,0,100,100.00 ± 0.00,0.00 ± 0.00,50%
2,1,100,0.00 ± 0.00,100.00 ± 0.00,0%
3,1,100,0.00 ± 0.00,100.00 ± 0.00,50%


# robustness similar

In [88]:
def generate_synthetic_samples(X, explanation, base_sample, n_samples=100, noise_std=0.1):
    feature_names = [str(cond.arg(0)) for cond in explanation]
    explained_features = set(feature_names)

    # Determina features categóricas com base em nunique
    categorical_cols = {col for col in X.columns if X[col].nunique() < 50}

    synthetic_samples = []
    for _ in range(n_samples):
        new_sample = {}

        for col in X.columns:
            is_categorical = col in categorical_cols

            if col in explained_features:
                value = base_sample[col]
                if not is_categorical:
                    # Adiciona pequeno ruído
                    new_sample[col] = float(value) + np.random.normal(0, noise_std)
                else:
                    # Mantém valor categórico da explicação
                    new_sample[col] = value
            else:
                if not is_categorical:
                    col_min, col_max = X[col].min(), X[col].max()
                    new_sample[col] = np.random.uniform(col_min, col_max)
                else:
                    new_sample[col] = np.random.choice(X[col].dropna().unique())

        synthetic_samples.append(new_sample)

    return pd.DataFrame(synthetic_samples)

def get_robustness_similarity(dataset_name, n_samples, n_extra_samples, target_class=1, mult_margin=0):
    model, X_test, y_test, explainer = dataset_context[dataset_name]
    dataset = fetch_data(dataset_name)
    X = dataset.drop("target", axis=1)

    results = []
    count = 0

    for i in range(len(X_test)):
        if count >= n_samples:
            break

        sample = X_test.iloc[i]
        pred_class = model.predict([sample])[0]
        if pred_class != target_class:
            continue

        margin = model.predict([sample], output_margin=True)[0]

        explanation_default = explainer.explain_prob(sample, reorder="asc")
        explanation_threshold = explainer.explain_prob(sample, reorder="asc", target_threshold=margin * mult_margin)

        for label, explanation in [("0%", explanation_default), ("50%", explanation_threshold)]:
            synthetic_df = generate_synthetic_samples(X, explanation, sample, n_samples=n_extra_samples)
            preds = model.predict(synthetic_df)

            class_0 = np.sum(preds == 0)
            class_1 = np.sum(preds == 1)

            results.append({
                "amostra": i,
                "classe_prevista": int(pred_class),
                "pred class 0": int(class_0),
                "pred class 1": int(class_1),
                "exp. threshold": label,
            })

        count += 1

    # Agrupa os resultados por tipo de explicação
    results_df = pd.DataFrame(results)
    grouped = results_df.groupby("exp. threshold").agg({
        "pred class 0": ["mean", "std"],
        "pred class 1": ["mean", "std"],
    }).reset_index()

    grouped.columns = ["exp. threshold", "classified 0 mean", "classified 0 std", "classified 1 mean", "classified 1 std"]

    grouped["classified 0"] = grouped["classified 0 mean"].round(2).astype(str) + " ± " + grouped["classified 0 std"].round(2).astype(str)
    grouped["classified 1"] = grouped["classified 1 mean"].round(2).astype(str) + " ± " + grouped["classified 1 std"].round(2).astype(str)
    grouped["explanation class"] = target_class

    return grouped[["explanation class", "classified 0", "classified 1", "exp. threshold"]]


In [89]:
configs = [
    {"target_class": 0, "mult_margin": 0.5},
    {"target_class": 1, "mult_margin": 0.5},
]

In [90]:
all_results = []
for config in configs:
    df = get_robustness_similarity(
        "magic", 
        n_samples=100, 
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,classified 0,classified 1,exp. threshold
0,0,92.58 ± 10.41,7.42 ± 10.41,0%
1,0,94.36 ± 9.81,5.64 ± 9.81,50%
2,1,0.62 ± 2.34,99.38 ± 2.34,0%
3,1,0.86 ± 3.56,99.14 ± 3.56,50%


In [91]:
all_results = []
for config in configs:
    df = get_robustness_similarity(
        "adult", 
        n_samples=100, 
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,classified 0,classified 1,exp. threshold
0,0,97.91 ± 9.22,2.09 ± 9.22,0%
1,0,98.37 ± 8.19,1.63 ± 8.19,50%
2,1,0.0 ± 0.0,100.0 ± 0.0,0%
3,1,0.0 ± 0.0,100.0 ± 0.0,50%


In [92]:
all_results = []
for config in configs:
    df = get_robustness_similarity(
        "mushroom", 
        n_samples=100, 
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,classified 0,classified 1,exp. threshold
0,0,100.0 ± 0.0,0.0 ± 0.0,0%
1,0,100.0 ± 0.0,0.0 ± 0.0,50%
2,1,0.0 ± 0.0,100.0 ± 0.0,0%
3,1,0.0 ± 0.0,100.0 ± 0.0,50%


In [93]:
all_results = []
for config in configs:
    df = get_robustness_similarity(
        "spambase", 
        n_samples=100, 
        n_extra_samples=100, 
        target_class=config["target_class"], 
        mult_margin=config["mult_margin"], 
    )

    all_results.append(df)

results_df = pd.concat(all_results, ignore_index=True)
results_df

Unnamed: 0,explanation class,classified 0,classified 1,exp. threshold
0,0,99.84 ± 0.83,0.16 ± 0.83,0%
1,0,100.0 ± 0.0,0.0 ± 0.0,50%
2,1,4.27 ± 5.74,95.73 ± 5.74,0%
3,1,2.07 ± 3.18,97.93 ± 3.18,50%


# Margin - Anchor - Lime - Explainer

In [19]:
def generate_samples_from_explanation(X, explanation, base_sample, n_samples=100):
    feature_names = [str(cond.arg(0)) for cond in explanation]
    explained_features = set(feature_names)

    # Gera amostras aleatórias
    synthetic_samples = X.sample(n=n_samples, replace=True).reset_index(drop=True)

    # Substitui valores das features explicadas pelos valores da base_sample
    for feature in explained_features:
        synthetic_samples[feature] = base_sample[feature]

    return synthetic_samples


In [52]:
def check_margin_similar_samples(dataset_name, explainer_name, n_per_class=5, n_synthetic=100):
    model, X, y, X_test, y_test, explainer = dataset_context[dataset_name]

    results = []

    for target_class in [0, 1]:
        count = 0
        for i in range(len(X_test)):
            if count >= n_per_class:
                break

            sample = X_test.iloc[i]
            pred = model.predict([sample])[0]
            if pred != target_class:
                continue

            if explainer_name == "anchor":
                #anchor exp
                pass
            elif explainer_name == "lime":
                # lime exp
                pass
            elif explainer_name == "explainer":
                explanation = explainer.explain(sample, reorder="asc")
            elif explainer_name == "explainer_prob":
                margin = model.predict([sample], output_margin=True)[0]
                explanation = explainer.explain_prob(sample, reorder="asc", target_threshold=margin / 2)

            synthetic_df = generate_samples_from_explanation(X, explanation, sample, n_samples=n_synthetic)
            margins = model.predict(synthetic_df, output_margin=True)

            results.append({
                "original_class": target_class,
                "mean_margin": np.mean(margins),
                "min_margin": np.min(margins),
                "max_margin": np.max(margins),
                "explainer": explainer_name,
            })

            count += 1
    
    results = pd.DataFrame(results)

    summary = results.groupby("original_class").agg({
        "mean_margin": "mean",
        "min_margin": "mean",  # média dos mínimos
        "max_margin": "mean"   # média dos máximos
    }).reset_index()

    summary.columns = ["class", "avg_mean_margin", "avg_min_margin", "avg_max_margin"]

    return summary


## magic

In [57]:
# df_results = check_margin_similar_samples("magic", "anchor")
# df_results

In [58]:
# df_results = check_margin_similar_samples("magic", "lime")
# df_results

In [59]:
df_results = check_margin_similar_samples("magic", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-2.285163,-3.097516,-0.991033
1,1,2.111,0.317251,5.645457


In [60]:
df_results = check_margin_similar_samples("magic", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-2.34782,-2.748309,-1.438401
1,1,3.084973,1.469171,5.575043


## adult

In [61]:
# df_results = check_margin_similar_samples("adult", "anchor")
# df_results

In [62]:
# df_results = check_margin_similar_samples("adult", "lime")
# df_results

In [63]:
df_results = check_margin_similar_samples("adult", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-1.30076,-1.810563,-0.752324
1,1,2.71532,0.448645,5.117767


In [64]:
df_results = check_margin_similar_samples("adult", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-1.386895,-1.744472,-0.983192
1,1,2.710778,1.266646,4.813052


## mushroom

In [65]:
# df_results = check_margin_similar_samples("mushroom", "anchor")
# df_results

In [66]:
# df_results = check_margin_similar_samples("mushroom", "lime")
# df_results

In [67]:
df_results = check_margin_similar_samples("mushroom", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-6.253671,-7.884968,-4.884665
1,1,5.768359,2.51256,7.677604


In [68]:
df_results = check_margin_similar_samples("mushroom", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-6.284242,-7.838942,-4.838922
1,1,6.375678,4.408938,7.594402


## spambase

In [69]:
# df_results = check_margin_similar_samples("spambase", "anchor")
# df_results

In [70]:
# df_results = check_margin_similar_samples("spambase", "lime")
# df_results

In [71]:
df_results = check_margin_similar_samples("spambase", "explainer")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-3.900222,-7.022644,-1.25408
1,1,3.77184,2.247439,5.551502


In [72]:
df_results = check_margin_similar_samples("spambase", "explainer_prob")
df_results

Unnamed: 0,class,avg_mean_margin,avg_min_margin,avg_max_margin
0,0,-4.635751,-7.269703,-3.002661
1,1,4.056298,2.830221,5.517482
