In [1]:
from data import *
from explainers import *
from metrics import *

import numpy as np
import pandas as pd

from itertools import product
import random
from time import time

# Load the data
n_features = 10
imdb = Imdb(n_features=n_features)




In [2]:
random.seed(42)
np.random.seed(42)

explainer_lst = [
    # AnchorExplainer,
    LimeExplainer,
    ShapExplainer,
    RandomFeatureImportanceExplainer,
    RandomRuleExplainer,
    SurrogateDecisionTreeExplainer,
    SurrogateLogisticRegressionExplainer,
    SurrogateMultinomialNBExplainer,
    # SageExplainer,
    # C45Explainer,
    # ID3Explainer,
    # CHAIDExplainer,
    # QIIExplainer,
]

metric_lst = [
    FidelityMetric,
    NumberRulesMetric,
    AvgRuleLengthMetric,
    RuleCoverageMetric,
    RuleOverlapMetric,
    ComplexityMetric,
    AvgCorrectRuleMetric,
    FaithfulnessCorrelationMetric,
    SensitivityMetric,
    ChangeSeedMetric,
    ExecutionTimeMetric,
]


def run_experiment(data):
    # Initialize the results list
    results = []

    # Evaluate all explainers and metrics
    for exp in explainer_lst:
        exp_start = time()
        # Initialize the explainer and metric
        exp = exp(data)
        exp_timer = time() - exp_start
        for metric in metric_lst:
            if isinstance(exp, AnchorExplainer) and data.n_features > 100:
                continue
            if isinstance(exp, ShapExplainer) and data.n_features > 1000:
                continue
            if isinstance(exp, SageExplainer) and data.n_features > 100:
                continue
            if isinstance(exp, SageExplainer) and isinstance(metric, FidelityMetric):
                continue
            if isinstance(exp, C45Explainer) and data.n_features > 100:
                continue
            if isinstance(exp, ID3Explainer) and data.n_features > 100:
                continue
            if isinstance(exp, CHAIDExplainer) and data.n_features > 100:
                continue
            # Start the timer
            metric_start = time()

            try:
                metric = metric(data, exp)
            except ValueError as e:
                # Skip incompatible metrics
                # Metrics raise a ValueError if they are incompatible with the explainer
                continue
            print(
                f'Running: {exp} - {metric} ({exp.scope.value}, {exp.explanation_type.value})')

            try:
                # Evaluate the metric and obtain a numeric value
                m = metric()
            except Exception as e:
                print('!!! Error !!!')
                print(f'Error: {e}')
                continue

            # Stop the timer
            metric_timer = time() - metric_start
            print(f'Metric value: {m}')

            # Create a dict with the results
            results_tuple = {
                'metric_category': metric.metric_category.value,
                'scope': exp.scope.value,
                'explanation_type': exp.explanation_type.value,
                'explainer': str(exp),
                'metric': str(metric),
                'n_features': data.n_features,
                'value': m,
                'time': metric_timer + exp_timer,
            }
            # Append the results to the list
            results.append(results_tuple)

    # Create a DataFrame with the results
    results = pd.DataFrame(results)
    return results

In [3]:
def runall():
    for n in [10]:  # [10, 100, 1000, 10000]
        print(f'Running experiment with {n} features')
        data = Imdb(n_features=n)
        results = run_experiment(data)
        results.to_csv(f'results/results_{n}.csv', index=False)
        display(results)


# runall()
# run_experiment(imdb)

In [4]:
random.seed(42)
np.random.seed(42)

explainer_lst = [
    SageExplainer,
    C45Explainer,
    ID3Explainer,
    CHAIDExplainer,
    QIIExplainer,
]


def runextra():
    for n in [10, 100, 1000, 10000]:  #
        print(f'Running experiment with {n} features')
        data = Imdb(n_features=n)
        results = run_experiment(data)
        results.to_csv(f'results/results_{n}_extra.csv', index=False)
        display(results)


runextra()

Running experiment with 10 features


  0%|          | 0/1 [00:00<?, ?it/s]

Running: SageExplainer - FidelityMetric (global, feature_importance)
!!! Error !!!
Error: axis 1 is out of bounds for array of dimension 1
Running: SageExplainer - ComplexityMetric (global, feature_importance)
Metric value: 0.18893919453632235
Running: SageExplainer - SensitivityMetric (global, feature_importance)


Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.


Metric value: 1.0
Running: SageExplainer - ChangeSeedMetric (global, feature_importance)
Metric value: 1.0
Running: SageExplainer - ExecutionTimeMetric (global, feature_importance)


Liblinear failed to converge, increase the number of iterations.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Metric value: 3.362572503089905
[INFO]:  4 CPU cores will be allocated in parallel running
That's why, the algorithm is set to Regression to handle the data set.
Regression  tree is going to be built...
-------------------------
finished in  56.88160824775696  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.1259583333333333
RMSE:  0.354906090865363
RAE:  0.4736124430228618
RRSE:  0.7152504078847485
Mean:  0.56154
MAE / Mean:  22.448385986157117 %
RMSE / Mean:  63.202281380732096 %
Running: C45Explainer - FidelityMetric (global, rule_based)
Metric value: 0.889228767821669
Running: C45Explainer - NumberRulesMetric (global, rule_based)
Metric value: 295
Running: C45Explainer - AvgRuleLengthMetric (global, rule_based)
!!! Error !!!
Error: unsupported operand type(s) for +: 'dict' and 'dict'
Running: C45Explainer - RuleCoverageMetric (global, rule_based)
Metric value: 0.2529
Running: C45Explainer - RuleOverlapMetric (global, 

Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.


Metric value: 1.0
Running: C45Explainer - ChangeSeedMetric (global, rule_based)
Metric value: 1.0
Running: C45Explainer - ExecutionTimeMetric (global, rule_based)
[INFO]:  4 CPU cores will be allocated in parallel running
Regression  tree is going to be built...
-------------------------
finished in  60.085758447647095  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.1259583333333333
RMSE:  0.354906090865363
RAE:  0.4736124430228618
RRSE:  0.7152504078847485
Mean:  0.56154
MAE / Mean:  22.448385986157117 %
RMSE / Mean:  63.202281380732096 %
Metric value: 62.25426530838013
[INFO]:  4 CPU cores will be allocated in parallel running
That's why, the algorithm is set to Regression to handle the data set.
Regression  tree is going to be built...
-------------------------
finished in  60.30548691749573  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.125958333333

Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.


Metric value: 1.0
Running: ID3Explainer - ChangeSeedMetric (global, rule_based)
Metric value: 1.0
Running: ID3Explainer - ExecutionTimeMetric (global, rule_based)
[INFO]:  4 CPU cores will be allocated in parallel running
Regression  tree is going to be built...
-------------------------
finished in  60.24894380569458  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.1259583333333333
RMSE:  0.354906090865363
RAE:  0.4736124430228618
RRSE:  0.7152504078847485
Mean:  0.56154
MAE / Mean:  22.448385986157117 %
RMSE / Mean:  63.202281380732096 %
Metric value: 62.383721351623535
[INFO]:  4 CPU cores will be allocated in parallel running
That's why, the algorithm is set to Regression to handle the data set.
Regression  tree is going to be built...
-------------------------
finished in  60.645246505737305  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.12595833333

Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.


Metric value: 1.0
Running: CHAIDExplainer - ChangeSeedMetric (global, rule_based)
Metric value: 1.0
Running: CHAIDExplainer - ExecutionTimeMetric (global, rule_based)
[INFO]:  4 CPU cores will be allocated in parallel running
Regression  tree is going to be built...
-------------------------
finished in  60.60457491874695  seconds
-------------------------
Evaluate  train set
-------------------------
MAE:  0.12605666666666668
MSE:  0.1259583333333333
RMSE:  0.354906090865363
RAE:  0.4736124430228618
RRSE:  0.7152504078847485
Mean:  0.56154
MAE / Mean:  22.448385986157117 %
RMSE / Mean:  63.202281380732096 %
Metric value: 62.762168645858765
Running: QIIExplainer - ComplexityMetric (local, feature_importance)
Percent: [####################] 100% Count: 600 Approx: 72.23 | Done...
Metric value: 0.023873843067749534
Running: QIIExplainer - FaithfulnessCorrelationMetric (local, feature_importance)
Percent: [####################] 100% Count: 600 Approx: 72.23 | Done...
Percent: [###########

An input array is constant; the correlation coefficient is not defined.


Percent: [####################] 100% Count: 600 Approx: 72.23 | Done...
Percent: [####################] 100% Count: 600 Approx: 79.39 | Done...
Metric value: 0.9881777777777778
Running: QIIExplainer - ChangeSeedMetric (local, feature_importance)
Percent: [####################] 100% Count: 600 Approx: 72.23 | Done...
Percent: [####################] 100% Count: 600 Approx: 75.74 | Done...
Metric value: 0.9986722222222223
Running: QIIExplainer - ExecutionTimeMetric (local, feature_importance)
Percent: [####################] 100% Count: 600 Approx: 72.23 | Done...
Percent: [####################] 100% Count: 600 Approx: 76.25 | Done...
Percent: [####################] 100% Count: 600 Approx: 81.50 | Done...
Percent: [####################] 100% Count: 600 Approx: 114.02 | Done...
Percent: [####################] 100% Count: 600 Approx: 105.13 | Done...
Percent: [####################] 100% Count: 600 Approx: 112.20 | Done...
Percent: [####################] 100% Count: 600 Approx: 125.71 | Done.

Unnamed: 0,metric_category,scope,explanation_type,explainer,metric,n_features,value,time
0,parsimony,global,feature_importance,SageExplainer,ComplexityMetric,10,0.188939,5.183185
1,consistency,global,feature_importance,SageExplainer,SensitivityMetric,10,1.0,75.139812
2,stability,global,feature_importance,SageExplainer,ChangeSeedMetric,10,1.0,5.183185
3,parsimony,global,feature_importance,SageExplainer,ExecutionTimeMetric,10,3.362573,38.809406
4,soundness,global,rule_based,C45Explainer,FidelityMetric,10,0.889229,60.970622
5,parsimony,global,rule_based,C45Explainer,NumberRulesMetric,10,295.0,58.93802
6,contextfulness,global,rule_based,C45Explainer,RuleCoverageMetric,10,0.2529,60.664591
7,contextfulness,global,rule_based,C45Explainer,RuleOverlapMetric,10,0.0,66.650301
8,consistency,global,rule_based,C45Explainer,SensitivityMetric,10,1.0,113.457188
9,stability,global,rule_based,C45Explainer,ChangeSeedMetric,10,1.0,59.072436


Running experiment with 100 features


  0%|          | 0/1 [00:00<?, ?it/s]

Running: SageExplainer - FidelityMetric (global, feature_importance)
!!! Error !!!
Error: axis 1 is out of bounds for array of dimension 1
Running: SageExplainer - ComplexityMetric (global, feature_importance)
Metric value: 0.0515009019591276
Running: SageExplainer - SensitivityMetric (global, feature_importance)


Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.


Metric value: 1.0
Running: SageExplainer - ChangeSeedMetric (global, feature_importance)
Metric value: 1.0
Running: SageExplainer - ExecutionTimeMetric (global, feature_importance)


Liblinear failed to converge, increase the number of iterations.


  0%|          | 0/1 [00:00<?, ?it/s]

Metric value: 517.0670754909515
[INFO]:  4 CPU cores will be allocated in parallel running
That's why, the algorithm is set to Regression to handle the data set.
Regression  tree is going to be built...


In [None]:
# explainer_lst = [
# ]
# metric_lst = [
#     ExecutionTimeMetric,
# ]


# def runtimes():
#     for n in [10, 100, 1000, 10000]:
#         print(f'Running experiment with {n} features')
#         data = Imdb(n_features=n)
#         results = run_experiment(data)
#         results.to_csv(f'results/results_times_{n}.csv', index=False)
#         display(results)


# runtimes()

# Visualise Results


In [None]:
import os
# Load the results for each file in the results folder
df = {}
for file in os.listdir('results'):
    if file.endswith('.csv'):
        df[file] = pd.read_csv(f'results/{file}')
df = pd.concat(df.values(), ignore_index=True)
df.head()

Unnamed: 0,metric_category,scope,explanation_type,explainer,metric,n_features,value,time
0,stability,local,feature_importance,LimeExplainer,ChangeSeedMetric,10,1.0,0.925535
1,consistency,local,feature_importance,LimeExplainer,SensitivityMetric,10,0.999998,0.942897
2,completeness,local,feature_importance,LimeExplainer,FaithfulnessCorrelationMetric,10,-0.022409,4.028512
3,parsimony,local,feature_importance,LimeExplainer,ComplexityMetric,10,0.994487,0.453343
4,stability,local,feature_importance,ShapExplainer,ChangeSeedMetric,10,0.982367,0.058032
