In [1]:
import numpy as np
import pandas as pd
import sklearn.ensemble

from xaibenchmark import load_adult as la
from xaibenchmark.explainer import AnchorsExplainer, LimeExplainer

np.random.seed(1)

Load Dataset

In [2]:
data = la.load_csv_data('adult', root_path='../data')

def preprocess(*data_df):
    def process_single(df):

        cat_df = pd.get_dummies(df, columns=data.categorical_features.keys())
        missing_cols = {cat+'_'+str(attr) for cat in data.categorical_features \
                        for attr in data.categorical_features[cat]} - set(cat_df.columns)
        for c in missing_cols:
            cat_df[c] = 0

        cont_idx = list(set(data.data.keys()) - set(data.categorical_features.keys()))
        cat_idx = [cat+'_'+str(attr) for cat in data.categorical_features \
                   for attr in data.categorical_features[cat]]
        idx = cont_idx + cat_idx
        return cat_df[idx]

    # Preprocess function for one-hot encoding categorical data
    return [process_single(df) for df in data_df]

train, dev, test = preprocess(data.data, data.data_dev, data.data_test)
labels_train, labels_dev, labels_test = data.target, data.target_dev, data.target_test

Train Random Forest Model with Anchor

In [18]:
from anchor import utils
from xaibenchmark import explainer

dataset_folder = '../data/'
adult_dataset = utils.load_dataset('adult', balance=True, dataset_folder=dataset_folder, discretize=True)

rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(adult_dataset.train, adult_dataset.labels_train)
exp = explainer.AnchorsExplainer(rf, adult_dataset)

explanation = exp.explain_instance(exp.dataset.test[0], "test", threshold=0.65)
anchor_reports = exp.report()
anchor_reports

{('accuracy', 0.9414893617021277),
 ('area', 0.010416666666666666),
 ('balance_data', 0.8563829787234043),
 ('balance_explanation', 1),
 ('balance_model', 0.5319148936170213),
 ('coverage', 0.1183),
 ('precision', 0.872865275142315)}

Train Random Forest Model with LIME

In [19]:
rf2 = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
rf2.fit(train, labels_train.to_numpy().reshape(-1))

lime_exp = explainer.LimeExplainer(data, rf2, discretize_continuous=False)
lime_exp.explain_instance(data.data.iloc[[0]], num_features=10)
lime_reports = lime_exp.report()
lime_reports

{('accuracy', 0.8031413153161058),
 ('area', 2.589477453233665e+139),
 ('balance', 0.05944807616373448),
 ('coverage', 4.412848867733944e-05),
 ('furthest_distance', 0.14855435371005044)}

Normalization

In [41]:
def normalize(lst):
    """
    only normalize non-relative metrics, which is out of the [0,1] range
    """
    new_lst = []
    max_val = max([i for _, i in lst])
    min_val = min([i for _, i in lst])
    for metric, score in lst:
        if not 0<score<1:
            temp = (score - min_val) / (max_val - min_val)
            new_lst.append((metric, round(temp, 4)))
        else:
            new_lst.append((metric, round(score, 4)))
    return sorted(new_lst, key=lambda x: x[0])


Get metric figures from both explainers

In [42]:
normalize(list(anchor_reports))

[('accuracy', 0.9415),
 ('area', 0.0104),
 ('balance_data', 0.8564),
 ('balance_explanation', 1.0),
 ('balance_model', 0.5319),
 ('coverage', 0.1183),
 ('precision', 0.8729)]

In [43]:
normalize(list(lime_reports))

[('accuracy', 0.8031),
 ('area', 1.0),
 ('balance', 0.0594),
 ('coverage', 0.0),
 ('furthest_distance', 0.1486)]

## Visualizing
Bar chart plus Table

In [44]:
import plotly.graph_objects as go

anchors = normalize(list(anchor_reports))
metrics = [i for i, _ in anchors]
scores = [i for _, i in anchors]

fig = go.Figure(data=[go.Table(
    header=dict(values=['metric', 'Anchor'],
                # line_color='darkslategray',
                # fill_color='lightskyblue',
                align='left'),
    cells=dict(values=[metrics, # 1st column
                       scores], # 2nd column
               # line_color='darkslategray',
               # fill_color='lightcyan',
               align='left'))
])

fig.show()

In [45]:
import plotly.express as px

fig = px.bar(pd.DataFrame(anchors, columns=['metric', 'score']), x='metric', y='score')
fig.show()

In [46]:
limes = normalize(list(lime_reports))
metrics = [i for i, _ in limes]
scores = [i for _, i in limes]

fig = go.Figure(data=[go.Table(
    header=dict(values=['metric', 'LIME'],
                align='left'),
    cells=dict(values=[metrics, # 1st column
                       scores], # 2nd column
               align='left'))
])

fig.show()

In [47]:
fig = px.bar(pd.DataFrame(limes, columns=['metric', 'score']), x='metric', y='score')
fig.show()