In [1]:
import numpy as np
import sklearn
import sklearn.ensemble
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from Anchor.anchor import Anchor, Tasktype, AnchorCandidate

In [2]:
data = np.genfromtxt('../datasets/titanic.txt', delimiter=',')
y_train = data[:, -1]
X_train = data[:, :-1]

c = sklearn.ensemble.RandomForestClassifier(n_estimators=100, n_jobs=5)
c.fit(X_train, y_train)
print('Train', sklearn.metrics.accuracy_score(y_train, c.predict(X_train)))

explainer = Anchor(Tasktype.TABULAR)

def optim_func(candidate: AnchorCandidate) -> float:
    """
    Allows a custom optimization function that will be minimized by smac.

    Args:
        candidate (AnchorCandidate): AnchorCandiate that is being evaluted. 
        Allows access to precision, positive_pred_samples, num_samples, feature_mask
        and coverage.

    Returns:
        (float): Calculated float value.
    """

    # This is also the default function which we used in the library. 
    return ((1-candidate.precision) + (len(candidate.feature_mask)/X_train.shape[1])) / 2

task_paras = {"dataset": X_train, "column_names": ["PcClass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked"]}

# optim is an optional parameter.
method_paras = {"run_time": 60*5, "optim": optim_func} 

anchor = explainer.explain_instance(
    input=X_train[759].reshape(1, -1),
    predict_fn=c.predict,
    method="smac",
    task_specific=task_paras,
    method_specific=method_paras,
    num_coverage_samples=100
)

INFO:root: Start Sampling
INFO:root: Start SMAC Search
INFO:smac.utils.io.cmd_reader.CMDReader:Output to smac3-output_2022-02-14_22:51:28_973444
INFO:smac.facade.smac_bb_facade.SMAC4BB:Optimizing a deterministic scenario for quality without a tuner timeout - will make SMAC deterministic and only evaluate one configuration per iteration!
INFO:smac.initial_design.sobol_design.SobolDesign:Running initial design for 80 configurations
INFO:smac.facade.smac_bb_facade.SMAC4BB:<class 'smac.facade.smac_bb_facade.SMAC4BB'>
INFO:smac.optimizer.smbo.SMBO:Running initial design
INFO:smac.intensification.intensification.Intensifier:First run, no incumbent provided; challenger is assumed to be the incumbent


Train 1.0


INFO:smac.intensification.intensification.Intensifier:First run, no incumbent provided; challenger is assumed to be the incumbent
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 0.2500
INFO:smac.intensification.intensification.Intensifier:Challenger (0.2000) is better than incumbent (0.2500) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensification.Intensifier:  1 : 0 -> 1
INFO:smac.intensification.intensification.Intensifier:  3 : 1 -> 0
INFO:smac.intensification.intensification.Intensifier:  5 : 1 -> 0
INFO:smac.intensification.intensification.Intensifier:  8 : 1 -> 0
INFO:smac.intensification.intensification.Intensifier:  9 : 0 -> 1
INFO:smac.intensification.intensification.Intensifier:Challenger (0.1500) is better than incumbent (0.2000) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensifica

In [5]:
anchor

AnchorCandidate(feature_mask=[0, 2], precision=1.0, n_samples=0, positive_samples=0, coverage=0.1)

In [6]:
visu = explainer.visualize(anchor, X_train[759])
print(visu)

PcClass = 1.0 AND Sex = 1.0
