In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris, load_wine
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from z3 import *
from fractions import Fraction
from pmlb import fetch_data

In [2]:
class XGBoostExplainer():
    """ 
    """

    def __init__(self, model, data):
        """_summary_

        Args:
            model (XGBoost): xgboost model fited
            data (DataFrame): dataframe (X or X_train)
        """
        self.model = model
        self.data = data.values
        self.columns = data.columns
        self.max_categories = 2
        self.n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_

        self.categoric_features = self.get_categoric_features(self.data)
        self.trees_expression = self.get_model_trees_expression(self.model, self.n_classes)

    def explain(self, sample, reorder="asc"):
        self.sample_expression = self.get_sample_expression(sample)

        if self.n_classes > 2:
            self.D, self.D_add = self.get_decision_function_multiclass(self.model, [sample])
        else:
            self.D, self.D_add = self.get_decision_function_binary(self.model, [sample])
        return self.explain_expression(self.sample_expression, self.trees_expression, self.D, self.D_add, self.model, reorder)

    def get_categoric_features(self, data: np.ndarray):
        categoric_features = []
        for i in range(data.shape[1]):
            feature_values = data[:, i]
            unique_values = np.unique(feature_values)
            if len(unique_values) <= self.max_categories:
                categoric_features.append(self.columns[i])
        return categoric_features

    def get_sample_expression(self, sample):
        sample_exp = [Real(self.columns[i]) == value for i, value in enumerate(sample)]
        self.sample_exp = sample_exp
        return sample_exp

    def get_model_trees_expression(self, model, n_class_trees):
        df = model.get_booster().trees_to_dataframe()
        if model.get_booster().feature_names == None:
            feature_map = {f"f{i}": col for i, col in enumerate(self.columns)}
            df["Feature"] = df["Feature"].replace(feature_map)

        df["Split"] = df["Split"].round(4)
        df["Class"] = df["Tree"] % self.n_classes
        self.booster_df = df

        all_tree_formulas = []
        for class_index in range(n_class_trees):
          class_tree_formulas = []
          class_tree_df = df[df["Class"] == class_index]
          estimator_number = 0
          for tree_index in class_tree_df["Tree"].unique():
              tree_df = class_tree_df[class_tree_df["Tree"] == tree_index]
              o = Real(f"o_{estimator_number}_{class_index}")
              estimator_number += 1

              if len(tree_df) == 1 and tree_df.iloc[0]["Feature"] == "Leaf":
                  leaf_value = tree_df.iloc[0]["Gain"]
                  class_tree_formulas.append(And(o == leaf_value))
                  continue
              path_formulas = []

              def get_conditions(node_id):
                  conditions = []
                  current_node = tree_df[tree_df["ID"] == node_id]
                  if current_node.empty:
                      return conditions

                  parent_node = tree_df[
                      (tree_df["Yes"] == node_id) | (tree_df["No"] == node_id)
                  ]
                  if not parent_node.empty:
                      parent_data = parent_node.iloc[0]
                      feature = parent_data["Feature"]
                      split_value = parent_data["Split"]
                      x = Real(feature)
                      if parent_data["Yes"] == node_id:
                          conditions.append(x < split_value)
                      else:
                          conditions.append(x >= split_value)
                      conditions = get_conditions(parent_data["ID"]) + conditions

                  return conditions

              for _, node in tree_df[tree_df["Feature"] == "Leaf"].iterrows():
                  leaf_value = node["Gain"]
                  leaf_id = node["ID"]
                  conditions = get_conditions(leaf_id)
                  path_formula = And(*conditions)
                  implication = Implies(path_formula, o == leaf_value)
                  path_formulas.append(implication)

              class_tree_formulas.append(And(*path_formulas))
          all_tree_formulas.append(And(*class_tree_formulas))
        return And(all_tree_formulas)

    def get_init_value(self, model, x, estimator_variables):
        estimator_pred = Solver()
        estimator_pred.add(self.sample_expression)
        estimator_pred.add(self.trees_expression)
        estimator_pred.add(And(estimator_variables))

        if estimator_pred.check() == sat:
            solvermodel = estimator_pred.model()
            total_sum = []
            for j in range(self.n_classes):
              total_sum.append(float(solvermodel.eval(Real(f"sum_class_{j}")).as_fraction()))
        else:
            total_sum = 0
            print("estimator error")

        self.predicted_margin = model.predict(x, output_margin=True)[0]
        init_value = self.predicted_margin - total_sum
        self.init_value = init_value
        return init_value

    def get_decision_function_multiclass(self, model, x):
        predicted_class = model.predict(x)[0]
        self.predicted_class = predicted_class
        n_estimators = int(len(model.get_booster().get_dump()) / self.n_classes)

        estimator_variables = []
        for j in range(self.n_classes):
            class_est_exp = ([Real(f"o_{i}_{j}") for i in range(n_estimators)])
            estimator_variables.append(Real(f"sum_class_{j}") == Sum(class_est_exp))
        self.estimator_variables = estimator_variables

        init_value = self.get_init_value(model, x, estimator_variables)

        equation_list = []
        for j in range(self.n_classes):
          decision = Real(f"decision_class_{j}")
          equation_list.append(decision == Real(f"sum_class_{j}") + init_value[j])

        decision_list = []
        for class_number in range(self.n_classes):
          if class_number != self.predicted_class:
            decision_list.append(Real(f"decision_class_{self.predicted_class}") - Real(f"decision_class_{class_number}") > 0)
        decision_exp = And(decision_list)
        return And(decision_exp), And(And(equation_list), And(estimator_variables))
    
    def get_decision_function_binary(self, model, x):
        predicted_class = model.predict(x)[0]
        n_estimators = len(model.get_booster().get_dump())

        estimator_pred = Solver()
        estimator_pred.add(self.sample_expression)
        estimator_pred.add(self.trees_expression)
        variables = [Real(f"o_{i}_0") for i in range(n_estimators)]
        if estimator_pred.check() == sat:
            solvermodel = estimator_pred.model()
            total_sum = sum(
                float(solvermodel.eval(var).as_fraction()) for var in variables
            )
        else:
            total_sum = 0
            print("estimator error")
        init_value = model.predict(x, output_margin=True)[0] - total_sum
        self.init_value = init_value

        estimator_list = []
        for estimator_number in range(
            int(len(model.get_booster().get_dump()) / self.n_classes)
        ):
            o = Real(f"o_{estimator_number}_0")
            estimator_list.append(o)
        equation_o = Sum(estimator_list) + init_value

        if predicted_class == 0:
            final_equation = equation_o < 0
        else:
            final_equation = equation_o > 0

        return final_equation, True


    def explain_expression(self, sample_expression, trees_expression, decision_expression, estimators_expression, model, reorder):
        i_expression = sample_expression.copy()

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]


        for feature in i_expression.copy():
            # print("\n---removed", feature)
            i_expression.remove(feature)

            if self.is_proved_sat(And(And(And(i_expression), trees_expression), estimators_expression), decision_expression):
                # print('proved')
                continue
            else:
                # print("----added back")
                i_expression.append(feature)
        
        explanation = []
        for expression in i_expression:
            explanation.append(f"{expression.arg(0)} == {expression.arg(1).as_decimal(4)}")
        return explanation

    def is_proved_sat(self, expressions, decision):
      opt = Optimize()
      opt.add(Not(Implies(expressions, decision)))
      if opt.check() == unsat:
        return True
      else:
        return False

    def get_deltas(self, exp):
        if exp and isinstance(exp[0], str):
            expz3 = []
            for token in exp:
                tokens = token.split(" == ")
                expz3.append(Real(tokens[0]) == (tokens[1]))
            exp = expz3
        for expression in exp:
            if str(expression.arg(0)) in self.categoric_features:
                self.caterogic_expressions.append(expression)
                exp = list(filter(lambda expr: not expr.eq(expression), exp))
            else:
                self.cumulative_range_expresson.append(expression)

        delta_list = []
        for expression in exp:

            self.cumulative_range_expresson = list(
                filter(
                    lambda expr: not expr.eq(expression),
                    self.cumulative_range_expresson,
                )
            )
            lower_min, upper_min = self.optimize_delta(expression)

            if lower_min != None:
                delta_value_lower = self.get_delta_value(str(lower_min.value()))
                self.cumulative_range_expresson.append(
                    expression.arg(0) >= expression.arg(1) - delta_value_lower
                )
            else:
                # print("unsat == open range lower")
                delta_value_lower = None

            if upper_min != None:
                delta_value_upper = self.get_delta_value(str(upper_min.value()))
                self.cumulative_range_expresson.append(
                    expression.arg(0) <= expression.arg(1) + delta_value_upper
                )
            else:
                # print("unsat == open range upper")
                delta_value_upper = None

            delta_list.append([expression, delta_value_lower, delta_value_upper])

        self.delta_list = delta_list
        return delta_list

    def get_delta_value(self, value):
        if "+ epsilon" in value:
            delta_value = float(Fraction(value.split(" + ")[0]))
        elif "epsilon" == value:
            delta_value = 0
        elif "0" == value:
            print("ERROR: delta == 0, explanation is incorrect")
            delta_value = 0
        else:
            delta_value = round(float(Fraction(value)) - 0.01, 2)

        return delta_value

    def optimize_delta(self, expression):
        delta_upper = Real("delta_upper")
        delta_lower = Real("delta_lower")

        self.delta_features = []

        delta_expressions = []
        delta_expressions.append(expression.arg(0) >= expression.arg(1) - delta_lower)
        delta_expressions.append(expression.arg(0) <= expression.arg(1) + delta_upper)

        self.delta_expressions = delta_expressions

        expression_list = []
        expression_list.append(And(self.cumulative_range_expresson))
        expression_list.append(And(self.caterogic_expressions))
        expression_list.append(And(self.delta_expressions))
        expression_list.append(self.trees_expression)
        expression_list.append(self.D_add)
        expression_list.append(Not(self.D))
        expression_list.append(delta_upper >= 0)
        expression_list.append(delta_lower >= 0)

        opt_lower = Optimize()
        opt_lower.add(And(expression_list))
        opt_lower.add(delta_upper == 0)
        lower_min = opt_lower.minimize(delta_lower)
        if opt_lower.check() != sat:
            lower_min = None

        opt_upper = Optimize()
        opt_upper.add(And(expression_list))
        opt_upper.add(delta_lower == 0)
        upper_min = opt_upper.minimize(delta_upper)
        if opt_upper.check() != sat:
            upper_min = None

        return lower_min, upper_min

    def explain_range(self, sample, reorder="asc", dataset_bounds=True, exp=None):
        self.cumulative_range_expresson = []
        self.caterogic_expressions = []
        self.range_metric = 0
        if exp == None:
            exp = self.explain(sample, reorder)
        else:
            self.I = self.get_sample_expression(sample)
            if self.n_classes > 2:
                self.D, self.D_add = self.get_decision_function_multiclass(self.model, [sample])
            else:
                self.D, self.D_add = self.get_decision_function_binary(self.model, [sample])
        if exp != []:
            delta_list = self.get_deltas(exp)
            range_exp = []
            for expression, delta_lower, delta_upper in delta_list:
                expname = str(expression.arg(0))

                expvalue = float(expression.arg(1).as_fraction())
                lower = None
                upper = None
                if delta_lower is not None:
                    lower = round(expvalue - delta_lower, 2)
                if delta_upper is not None:
                    upper = round(expvalue + delta_upper, 2)

                if dataset_bounds == True:
                    idx = list(self.columns).index(expname)
                    min_idx = np.min(self.data[:, idx])
                    max_idx = np.max(self.data[:, idx])
                    if lower is not None and lower < min_idx:
                        lower = min_idx
                    if upper is not None and upper > max_idx:
                        upper = max_idx

                if lower == upper:
                    range_exp.append(f"{expression.arg(0)} == {expression.arg(1)}")
                else:
                    if lower is None:
                        range_exp.append(f"{expname} <= {upper}")
                    elif upper is None:
                        range_exp.append(f"{expname} >= {lower}")
                    else:
                        range_exp.append(f"{lower} <= {expname} <= {upper}")

            for expression in self.caterogic_expressions:
                range_exp.append(f"{expression.arg(0)} == {expression.arg(1)}")

            return range_exp
        else:
            return exp

In [3]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_iris = XGBClassifier(n_estimators=10, max_depth=3, learning_rate=0.1, objective='multi:softmax')
model_iris.fit(X_train, y_train)

In [4]:
model_iris.predict(X_train)

array([0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 1, 2,
       1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 2, 2,
       1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0, 2, 1, 2, 2, 2, 2, 1,
       0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 0, 2, 1, 2,
       1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 2, 1, 2,
       1, 1, 2, 2, 0, 1, 1, 0, 1, 2], dtype=int32)

In [5]:
explainer_iris = XGBoostExplainer(model_iris, X)

In [6]:
sample = X_test.values[2]
print(sample)

[7.7 2.6 6.9 2.3]


In [7]:
print(explainer_iris.explain(sample, reorder="asc"))
print(explainer_iris.init_value)

['petal length (cm) == 6.9']
[0.49999997 0.49999999 0.50000013]


In [8]:
sample_exp = ["petal length (cm) == 6.9", 
            #   "petal length (cm) == 6.9",
              ]

In [9]:
print(explainer_iris.explain_range(sample, reorder="asc", exp=sample_exp))

['petal length (cm) >= 5.1']


In [10]:
for i in range(X_test.shape[0]):
    print(explainer_iris.explain(X_test.values[i], reorder="asc"))
    print(explainer_iris.explain_range(X_test.values[i], reorder="asc"))
    print("\n-----------------------------")

['petal width (cm) == 1.2', 'petal length (cm) == 4.7']
['petal width (cm) <= 1.79', '3.0 <= petal length (cm) <= 4.99']

-----------------------------
['petal length (cm) == 1.7']
['petal length (cm) <= 2.99']

-----------------------------
['petal length (cm) == 6.9']
['petal length (cm) >= 5.1']

-----------------------------
['petal width (cm) == 1.5', 'petal length (cm) == 4.5']
['petal width (cm) <= 1.79', '3.0 <= petal length (cm) <= 4.99']

-----------------------------
['petal width (cm) == 1.4', 'petal length (cm) == 4.8']
['petal width (cm) <= 1.79', '3.0 <= petal length (cm) <= 4.99']

-----------------------------
['petal length (cm) == 1.5']
['petal length (cm) <= 2.99']

-----------------------------
['petal width (cm) == 1.3', 'petal length (cm) == 3.6']
['petal width (cm) <= 1.79', '3.0 <= petal length (cm) <= 4.99']

-----------------------------
['petal length (cm) == 5.1']
['petal length (cm) >= 5.1']

-----------------------------
['petal width (cm) == 1.5', 'petal

# check binary compatibility

In [11]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target
y[y == 2] = 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_iris_bin = XGBClassifier(n_estimators=10, max_depth=2, learning_rate=0.1)
model_iris_bin.fit(X_train, y_train)
model_iris_bin.predict(X_train)

array([0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 1, 0])

In [12]:
explainer_iris_bin = XGBoostExplainer(model_iris_bin, X)

In [13]:
sample = X_test.values[2]
print(sample)

[7.7 2.6 6.9 2.3]


In [14]:
print(explainer_iris_bin.explain(sample, reorder="asc"))
print(explainer_iris_bin.init_value)

['petal width (cm) == 2.3']
-0.6333333


In [15]:
print(explainer_iris_bin.trees_expression)

And(And(And(Implies(And(petal length (cm) < 3),
                    o_0_0 == -27573207/200000000),
            Implies(And(petal length (cm) >= 3,
                        petal width (cm) < 9/5),
                    o_0_0 == 54488387/250000000),
            Implies(And(petal length (cm) >= 3,
                        petal width (cm) >= 9/5),
                    o_0_0 == -62367577/500000000)),
        And(Implies(And(petal length (cm) < 3),
                    o_1_0 == -131089211/1000000000),
            Implies(And(petal length (cm) >= 3,
                        petal length (cm) < 24/5),
                    o_1_0 == 107927017/500000000),
            Implies(And(petal length (cm) >= 3,
                        petal length (cm) >= 24/5),
                    o_1_0 == -444295369/5000000000)),
        And(Implies(And(petal length (cm) < 3),
                    o_2_0 == -6265267/50000000),
            Implies(And(petal length (cm) >= 3,
                        petal width (cm) < 9/5),
     

In [16]:
sample_exp = ["petal width (cm) == 2.3", 
            #   "petal length (cm) == 6.9",
              ]

In [17]:
print(explainer_iris_bin.explain_range(sample, reorder="asc", exp=sample_exp))

['petal width (cm) >= 1.8']


In [18]:
for i in range(X_test.shape[0]):
    print(explainer_iris_bin.explain(X_test.values[i], reorder="asc"))
    print(explainer_iris_bin.explain_range(X_test.values[i], reorder="asc"))
    print("\n-----------------------------")

['petal length (cm) == 4.7', 'petal width (cm) == 1.2']
['3.0 <= petal length (cm) <= 4.99', 'petal width (cm) <= 1.79']

-----------------------------
['petal length (cm) == 1.7']
['petal length (cm) <= 2.99']

-----------------------------
['petal width (cm) == 2.3']
['petal width (cm) >= 1.8']

-----------------------------
['petal length (cm) == 4.5', 'petal width (cm) == 1.5']
['3.0 <= petal length (cm) <= 4.99', 'petal width (cm) <= 1.79']

-----------------------------
['petal length (cm) == 4.8', 'petal width (cm) == 1.4']
['3.0 <= petal length (cm) <= 4.99', 'petal width (cm) <= 1.79']

-----------------------------
['petal length (cm) == 1.5']
['petal length (cm) <= 2.99']

-----------------------------
['petal length (cm) == 3.6', 'petal width (cm) == 1.3']
['3.0 <= petal length (cm) <= 4.99', 'petal width (cm) <= 1.79']

-----------------------------
['petal width (cm) == 2.3']
['petal width (cm) >= 1.8']

-----------------------------
['petal length (cm) == 4.5', 'petal wi

# test datasets

In [19]:
shuttle_data = fetch_data('shuttle')
print(shuttle_data.shape)
shuttle_data.head()

(58000, 10)


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,target
0,50.0,21.0,77.0,0.0,28.0,0.0,27.0,48.0,22.0,2
1,55.0,0.0,92.0,0.0,0.0,26.0,36.0,92.0,56.0,4
2,53.0,0.0,82.0,0.0,52.0,-5.0,29.0,30.0,2.0,1
3,37.0,0.0,76.0,0.0,28.0,18.0,40.0,48.0,8.0,1
4,37.0,0.0,79.0,0.0,34.0,-26.0,43.0,46.0,2.0,1


In [20]:
set(shuttle_data.target)

{1, 2, 3, 4, 5, 6, 7}

In [21]:
xgb_shuttle = XGBClassifier(n_estimators=100, max_depth=3)

X_shuttle = shuttle_data.drop(columns=['target'])
y_shuttle = shuttle_data['target'].values
y_shuttle = y_shuttle - 1
X_shuttle_train, X_shuttle_test, y_shuttle_train, y_shuttle_test = train_test_split(
    X_shuttle, y_shuttle, test_size=0.5, random_state=101)
xgb_shuttle.fit(X_shuttle_train, y_shuttle_train)

shuttle_explainer = XGBoostExplainer(xgb_shuttle, X_shuttle)

In [22]:
sample = X_shuttle.values[0]
print(shuttle_explainer.explain(sample))
print(shuttle_explainer.init_value)

['A2 == 21', 'A9 == 22', 'A7 == 27', 'A1 == 50']
[0.49999954 0.49999958 0.49999924 0.49999869 0.49999988 0.4999997
 0.50000034]


## allbp dataset

In [23]:
allbp_data = fetch_data('allbp')
print(allbp_data.shape)
allbp_data.head()

(3772, 30)


Unnamed: 0,age,sex,on thyroxine,query on thyroxine,on antithyroid medication,sick,pregnant,thyroid surgery,I131 treatment,query hypothyroid,...,TT4 measured,TT4,T4U measured,T4U,FTI measured,FTI,TBG measured,TBG,referral source,target
0,34,1,0,0,0,0,0,0,0,0,...,1,28,1,72,1,10,0,0,1,2
1,15,1,0,0,0,0,0,0,0,0,...,1,3,0,146,0,234,0,0,4,2
2,40,2,0,0,0,0,0,0,0,0,...,1,10,1,48,1,22,0,0,4,2
3,67,1,1,0,0,0,0,0,0,0,...,1,83,0,146,0,234,0,0,4,2
4,67,1,0,0,0,0,0,0,0,0,...,1,201,1,44,1,199,0,0,3,2


In [24]:
allbp_data.describe()

Unnamed: 0,age,sex,on thyroxine,query on thyroxine,on antithyroid medication,sick,pregnant,thyroid surgery,I131 treatment,query hypothyroid,...,TT4 measured,TT4,T4U measured,T4U,FTI measured,FTI,TBG measured,TBG,referral source,target
count,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,...,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0,3772.0
mean,46.353924,1.26299,0.123012,0.013256,0.0114,0.038971,0.014051,0.014051,0.015642,0.062036,...,0.938759,121.097296,0.897402,66.144486,0.897932,110.733563,0.0,0.0,3.279427,1.953075
std,20.843843,0.522908,0.328494,0.114382,0.106174,0.193552,0.117716,0.117716,0.124101,0.241253,...,0.239803,98.638463,0.303473,32.692626,0.302778,97.524103,0.0,0.0,1.092085,0.228385
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,28.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,23.0,1.0,46.0,1.0,17.0,0.0,0.0,3.0,2.0
50%,49.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,85.0,1.0,58.0,1.0,58.0,0.0,0.0,4.0,2.0
75%,63.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,226.0,1.0,72.0,1.0,224.0,0.0,0.0,4.0,2.0
max,93.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,241.0,1.0,146.0,1.0,234.0,0.0,0.0,4.0,2.0


In [25]:
set(allbp_data.target)

{0, 1, 2}

In [26]:
xgb_allbp = XGBClassifier(n_estimators=20, max_depth=3,tree_method="exact")

X_allbp = allbp_data.drop(columns=['target'])
y_allbp = allbp_data['target'].values
X_allbp_train, X_allbp_test, y_allbp_train, y_allbp_test = train_test_split(
    X_allbp, y_allbp, test_size=0.5, random_state=101)
xgb_allbp.fit(X_allbp_train, y_allbp_train)

allbp_explainer = XGBoostExplainer(xgb_allbp, X_allbp)

In [27]:
sample = X_allbp.values[0]
print(allbp_explainer.explain(sample))
print(allbp_explainer.init_value)

['T3 == 27', 'T4U == 72']
[0.49999979 0.50000021 0.50000005]


In [28]:
sample = X_allbp.values[1]
print(allbp_explainer.explain(sample))
print(allbp_explainer.init_value)

['T3 == 22', 'T4U == 146', 'FTI == 234']
[0.49999979 0.5        0.50000026]


## 