Testing 

In [1]:
import numpy as np
import logzero
import pickle

from glocalx import GLocalX, shut_up_tensorflow
from rule_loaders.lore_to_glocalx import lore_to_glocalx

from lore_explainer.datamanager import prepare_adult_dataset, prepare_dataset

# Set log profile: INFO for normal logging, DEBUG for verbosity
logzero.loglevel(logzero.logging.DEBUG)
shut_up_tensorflow()

# Load black box: optional! Use black_box = None to use the dataset labels
model_file = "RandomForestClassifier_for_adult"
# Load the model
with open(f"data/models/{model_file}.pkl", 'rb') as model_file:
    black_box = pickle.load(model_file)

In [2]:
# Load data and header

# data = np.genfromtxt('data/dummy/dummy_dataset.csv', delimiter=',', names=True)
# features_names = data.dtype.names

data_filename = 'adult'
# Prepare data
if data_filename == "adult":
    df, class_name = prepare_adult_dataset(f'data/{data_filename}.csv')
else:
    raise NotImplementedError
df, feature_names, *_ = prepare_dataset(
    df, class_name)

In [3]:
data = df.to_numpy()
data = [list(sample) for sample in data]
data = np.array(data)
data

array([[   39,  2174,     0, ...,     0,     0,     0],
       [   50,     0,     0, ...,     0,     0,     0],
       [   38,     0,     0, ...,     0,     0,     0],
       ...,
       [   58,     0,     0, ...,     0,     0,     0],
       [   22,     0,     0, ...,     0,     0,     0],
       [   52, 15024,     0, ...,     0,     0,     1]])

In [4]:
# Load local explanations
lore_rules_file = "lore_rules_adult_30"
info_file = "adult_info"
glocal_rules = lore_to_glocalx(f"data/lore_rules/{lore_rules_file}.pkl", f"data/info_files/{info_file}.json")
print(glocal_rules)

[{26: (-inf, 0.5), 27: (0.5, inf), 3: (-inf, 39.0)}-> 0, {1: (-inf, 5591.656005859375), 57: (0.5, inf), 3: (-inf, 48.5)}-> 0, {3: (-inf, 30.0), 35: (-inf, 0.5)}-> 0, {60: (-inf, 0.5), 53: (-inf, 0.5), 32: (-inf, 0.5), 0: (22.5, inf), 21: (-inf, 0.5), 1: (-inf, 5190.6376953125)}-> 0, {1: (-inf, 3844.0), 59: (0.5, inf)}-> 0, {1: (-inf, 4804.0)}-> 0, {32: (-inf, 0.5), 37: (-inf, 0.5)}-> 1, {1: (-inf, 12111.0), 32: (0.5, inf)}-> 0, {60: (-inf, 0.5), 27: (-inf, 0.5), 21: (-inf, 0.5), 59: (0.5, inf)}-> 0, {0: (-inf, 40.38766098022461), 38: (0.5, inf), 17: (-inf, 0.5)}-> 0, {1: (-inf, 5381.0), 16: (0.5, inf)}-> 0, {32: (-inf, 0.5), 3: (-inf, 38.5), 49: (-inf, 0.5), 27: (-inf, 0.5)}-> 1, {5: (-inf, 0.5), 0: (-inf, 41.5), 1: (-inf, 5819.0), 27: (-inf, 0.5)}-> 0, {50: (-inf, 0.5), 3: (37.5, 41.0), 0: (-inf, 34.5), 40: (-inf, 0.5)}-> 1, {3: (-inf, 39.5)}-> 0, {1: (-inf, 6625.64453125), 21: (-inf, 0.5), 54: (-inf, 0.5)}-> 0, {1: (-inf, 9228.0), 60: (0.5, inf)}-> 0, {1: (-inf, 3662.5), 38: (-inf, 0

In [14]:
# Create a GLocalX instance for `black_box`
glocalx = GLocalX(model_ai=black_box, name='sample_name')
# Fit the model, use batch_size=128 for larger datasets
glocalx.fit(glocal_rules, data, batch_size=128)

[D 240725 20:17:23 glocalx:398] sample_name | ************************ Iteration 1 with num of theories: 30
[D 240725 20:17:23 glocalx:404] Computing distances
[D 240725 20:17:23 glocalx:410] sample_name|  sorting candidates queue
[D 240725 20:17:24 glocalx:423] sample_name creating fine boundary
[D 240725 20:17:24 glocalx:437] sample_name merged candidate 0
[D 240725 20:17:24 evaluators:226] Log likelihood: 0.7830840576149382 | Complexity: 0.019417475728155338
[D 240725 20:17:24 glocalx:447] sample_name Merged candidate
[D 240725 20:17:24 glocalx:398] sample_name | ************************ Iteration 2 with num of theories: 29
[D 240725 20:17:24 glocalx:404] Computing distances
[D 240725 20:17:24 glocalx:410] sample_name|  sorting candidates queue
[D 240725 20:17:24 glocalx:423] sample_name creating fine boundary
[D 240725 20:17:24 glocalx:437] sample_name merged candidate 0
[D 240725 20:17:24 evaluators:226] Log likelihood: 0.7702773256349621 | Complexity: 0.014563106796116504
[D 2407

In [15]:
glocalx.boundary

[{{3: (-inf, 44.0), 0: (-inf, 53.5), 2: (-inf, 957.5)}-> 0,
  {1: (-inf, 12111.0)}-> 0,
  {1: (-inf, 5381.0), 16: (0.5, inf)}-> 0,
  {60: (-inf, 0.5), 21: (-inf, 0.5)}-> 0},
 {{1: (-inf, 9228.0), 60: (0.5, inf)}-> 0, {3: (-inf, 39.5)}-> 0}]

In [16]:
glocalx.fine_boundary

{{3: (-inf, 44.0), 0: (-inf, 53.5), 2: (-inf, 957.5)}-> 0,
 {1: (-inf, 12111.0)}-> 0,
 {1: (-inf, 5381.0), 16: (0.5, inf)}-> 0,
 {1: (-inf, 9228.0), 60: (0.5, inf)}-> 0,
 {60: (-inf, 0.5), 21: (-inf, 0.5)}-> 0,
 {3: (-inf, 39.5)}-> 0}

In [18]:
# Retrieve global explanations by fidelity percentile
alpha = 0.5
global_explanations = glocalx.get_fine_boundary_alpha(alpha, data)
global_explanations

[{1: (-inf, 6155.5), 27: (0.5, inf)}-> 0,
 {3: (-inf, 48.5)}-> 0,
 {3: (47.5, 52.5), 0: (38.5, inf)}-> 1,
 {2: (1789.5, inf)}-> 1]

In [29]:
# Retrieve exactly `alpha` global explanations
alpha = 4
global_explanations = glocalx.get_fine_boundary_alpha(alpha, data)
global_explanations

[{}-> 0,
 {3: (-inf, 48.5)}-> 0,
 {1: (-inf, 6155.5), 27: (0.5, inf)}-> 0,
 {}-> 1,
 {3: (47.5, 52.5), 0: (38.5, inf)}-> 1,
 {2: (1789.5, inf)}-> 1]

In [26]:
glocalx.evaluator.bic(glocalx.fine_boundary, data)

-0.7702767789885829

In [None]:
glocalx.evaluator.binary_fidelity_model(glocalx.fine_boundary)

In [30]:
glocalx.evaluator.perfect_coverages

{}