In [1]:
import autosklearn.classification
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sklearn.metrics

In [2]:
dataset = pd.read_csv('../Datasets/Hepatitis/hepatitis_csv.csv')

In [3]:
dataset.drop('protime', axis=1, inplace=True)

In [4]:
dataset.isna().sum()/len(dataset)

age                0.000000
sex                0.000000
steroid            0.006452
antivirals         0.000000
fatigue            0.006452
malaise            0.006452
anorexia           0.006452
liver_big          0.064516
liver_firm         0.070968
spleen_palpable    0.032258
spiders            0.032258
ascites            0.032258
varices            0.032258
bilirubin          0.038710
alk_phosphate      0.187097
sgot               0.025806
albumin            0.103226
histology          0.000000
class              0.000000
dtype: float64

In [5]:
continuous_features = ['age', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin']
for column in continuous_features:
    dataset[column] = dataset[column].fillna(dataset[column].mean())
for column in dataset.columns.drop(continuous_features):
    dataset[column] = dataset[column].fillna(dataset[column].mode().sample(1, random_state=1).values[0])
for column in dataset.select_dtypes('bool'):
    dataset[column] = dataset[column].astype(np.int)
dataset['sex'] = dataset['sex'].replace({'female': 0,'male': 1})

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys


In [6]:
dataset['class'] = dataset['class'].replace({'live': 0,'die': 1})

In [7]:
X, y = dataset.drop('class', axis=1), dataset['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# AUTOML

In [19]:
automl = autosklearn.classification.AutoSklearnClassifier()
automl.fit(X_train, y_train)
y_hat = automl.predict(X_test)

In [20]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.7692307692307693

In [21]:
import pickle
with open('./models/hepatitis_automl.pkl', 'wb') as f:
    pickle.dump(automl, f)

In [8]:
import pickle
with open('./models/hepatitis_automl.pkl', 'rb') as f:
    automl = pickle.load(f)

len(automl.show_models())

FileNotFoundError: [Errno 2] No such file or directory: './models/hepatitis_automl.pkl'

# RANDOM FOREST

In [8]:
import sklearn.ensemble

model = sklearn.ensemble.RandomForestClassifier(n_estimators=512, random_state=42)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)

In [9]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.8717948717948718

In [10]:
import pickle
with open('./models/hepatitis_rf2.pkl', 'wb') as f:
    pickle.dump(model, f)

In [8]:
import sklearn.ensemble
import pickle
with open('./models/hepatitis_rf2.pkl', 'rb') as f:
    model = pickle.load(f)

In [9]:
y_hat = model.predict(X_test)
sklearn.metrics.accuracy_score(y_test, y_hat)

0.8717948717948718

# LIME

In [58]:
import lime
import lime.lime_tabular
import tqdm

In [59]:
continuous_features = ['age', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin']
categorical_features = X_train.columns.drop(continuous_features).tolist()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=['No', 'Yes'], categorical_features=categorical_features, discretize_continuous=True)

In [13]:
test_x = X_test.values

In [14]:
exp_fn = lambda i: explainer.explain_instance(X_test.iloc[i], model.predict_proba, num_features=len(X_test.columns))
def exp_fn_blk(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x, exp_fn))

In [None]:
exp1 = exp_fn_wrap(test_x)
exp2 = exp_fn_wrap(test_x)

In [None]:
np.save('./hepatitis_lime1.npy', exp1)
np.save('./hepatitis_lime2.npy', exp2)

# LIME Global

In [60]:
from lime import submodular_pick
import time

start_time = time.time()
exp1 = submodular_pick.SubmodularPick(explainer, X_test.values, model.predict_proba, sample_size=500, num_features=len(X_test.columns), num_exps_desired=5)
print("--- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
exp2 = submodular_pick.SubmodularPick(explainer, X_test.values, model.predict_proba, sample_size=500, num_features=len(X_test.columns), num_exps_desired=5)
print("--- %s seconds ---" % (time.time() - start_time))

                              size of input data. Using all data
  size of input data. Using all data""")


--- 133.02945971488953 seconds ---


                              size of input data. Using all data
  size of input data. Using all data""")


--- 158.08447742462158 seconds ---


In [78]:
def get_feature_imp(sp_obj):
    W_pick=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.sp_explanations]).fillna(0)
    W_pick['prediction'] = [this.available_labels()[0] for this in sp_obj.sp_explanations]
    W=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.explanations]).fillna(0)
    W['prediction'] = [this.available_labels()[0] for this in sp_obj.explanations]
    np.abs(W.drop("prediction", axis=1)).mean(axis=0).sort_values(ascending=False).head(25).sort_values(ascending=True)
    grped_coeff = W.groupby("prediction").mean()
    grped_coeff = grped_coeff.T
    return grped_coeff[0].values

In [79]:
feat_imp1 = get_feature_imp(exp1)
feat_imp2 = get_feature_imp(exp2)

In [80]:
feat_imp1

array([ 0.18744669,  0.08023318,  0.09831124,  0.01976555,  0.03615287,
        0.0331906 , -0.02994793,  0.00493468, -0.02086241,  0.01185868,
        0.01147735, -0.00334327,  0.00638221, -0.0063429 ,  0.00145957,
       -0.00027922, -0.0022545 ,  0.        , -0.01809966, -0.02058307,
       -0.03281419, -0.02034057,  0.01025667, -0.01015185, -0.00488674,
       -0.00157352, -0.00114962, -0.00223699,  0.02531155, -0.01491504,
        0.0022497 , -0.02231687, -0.00936111,  0.01108054, -0.0095815 ,
        0.00523939,  0.00042867,  0.00086861,  0.00369731,  0.00962114,
        0.01251484,  0.00195863,  0.00095824,  0.00034751, -0.00839321])

In [81]:
def global_identity(feat_imp1, feat_imp2):
    sum = 0
    for i in range(len(feat_imp1)):
        if(feat_imp1[i] == feat_imp2[i]):
            sum += 1
    return sum/len(feat_imp1)

In [82]:
i = global_identity(feat_imp1, feat_imp2)
i

0.022222222222222223

In [83]:
def normal_fi(feat_imp):
    return np.abs(feat_imp) / np.sum(np.abs(feat_imp))

In [84]:
normal_feat_imp = normal_fi(feat_imp1 + 1e-9)

In [85]:
#Entropy Ratio
Ser = np.sum(normal_feat_imp*np.log(normal_feat_imp))/np.log(1/len(normal_feat_imp))

# Kullback-Leibler Divergence
Skl = np.sum(normal_feat_imp*np.log(normal_feat_imp/(1/len(normal_feat_imp))))

In [86]:
def calc_gini(pfi):
    sum = 0
    for i in range(len(pfi)):
        sum_curr = 0
        for j in range(len(pfi)):
            sum_curr += np.abs(pfi[i]-pfi[j])
        sum += sum_curr
    
    return sum/(2*len(pfi)**2)*(np.sum(pfi)/len(pfi))

In [87]:
Sg = calc_gini(normal_feat_imp)

In [88]:
Ser, Skl, Sg

(0.7797565416242837, 0.83839251161613, 0.00032078091530043587)

In [89]:
def calc_alpha_fi(normal_pfi, alpha):
    j_inst = 0
    sum = 0
    for i in range(len(normal_pfi)-1, -1, -1):
        sum += normal_pfi[i]
        if sum<=alpha:
            j_inst = i
        else:
            break
    return 1- (j_inst/len(normal_pfi))

In [90]:
calc_alpha_fi(normal_feat_imp, 0.8)

0.9777777777777777

In [91]:
temp =explainer.explain_instance(X_test.iloc[0], model.predict_proba, num_features=5)

In [92]:
def get_feature_imp_all(sp_obj):
    W_pick=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.sp_explanations]).fillna(0)
    W_pick['prediction'] = [this.available_labels()[0] for this in sp_obj.sp_explanations]
    W=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.explanations]).fillna(0)
    W['prediction'] = [this.available_labels()[0] for this in sp_obj.explanations]
    np.abs(W.drop("prediction", axis=1)).mean(axis=0).sort_values(ascending=False).head(25).sort_values(ascending=True)
    grped_coeff = W.groupby("prediction").mean()
    grped_coeff = grped_coeff.T
    return grped_coeff

In [93]:
class1_feat_imp, class2_feat_imp = get_feature_imp_all(exp1)[0].values, get_feature_imp_all(exp1)[1].values
normal_class1_fi, normal_class2_fi = normal_fi(class1_feat_imp), normal_fi(class2_feat_imp)

In [94]:
np.linalg.norm(normal_class1_fi - normal_class2_fi, ord=2)

0.2672899583401707

# CIU

In [11]:
from ciu import determine_ciu
import tqdm

In [12]:
feat_list = X_train.columns.tolist()

In [13]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = determine_ciu(X_test.iloc[i:i+1], model.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

In [14]:
exp1 = exp_fn_blk(X_test)
exp2 = exp_fn_blk(X_test)

100%|██████████| 39/39 [01:18<00:00,  2.02s/it]
100%|██████████| 39/39 [00:54<00:00,  1.40s/it]


In [15]:
np.save('./explanations/hepatitis_ciu1.npy', exp1)
np.save('./explanations/hepatitis_ciu2.npy', exp2)

In [29]:
import metrics

In [31]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [32]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(feat_list))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [33]:
i, s, sb

((17.94871794871795, 32, 39), (0, 39, 1521, 0.0), (14, 39))

In [34]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [35]:
sim

0.6829878898876597

In [36]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(X_test))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(2, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(model, X_test.to_numpy()[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 39/39 [01:17<00:00,  1.99s/it]


In [37]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

0.20520679866470337
0.48717948717948717
8.23076923076923
0.21164522230736263
0.0
0.0


In [38]:
metrics.calc_trust_score(model, X_test.to_numpy(), exp1, 3, X_train.columns.to_list())

100%|██████████| 39/39 [03:28<00:00,  5.36s/it]


0.3034188034188034

# RULEFIT

In [8]:
from skrules import SkopeRules
import metrics_rules
import time

In [9]:
clf = SkopeRules(max_depth_duplication=2,
                    n_estimators=512,
                    precision_min=0.3,
                    recall_min=0.1,
                    feature_names=X_train.columns.tolist())

In [10]:
start_time = time.time()
clf.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

--- 11.780784368515015 seconds ---


In [11]:
start_time = time.time()
top_rules1 = clf.score_top_rules(X_test)
top_rules2 = clf.score_top_rules(X_test)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.07526111602783203 seconds ---


In [12]:
i = metrics_rules.calc_identity_rules(top_rules1, top_rules2)
print(i)

s = metrics_rules.calc_separability_rules(top_rules1)
print(s)

enc_rules = metrics_rules.exp_enc(clf, top_rules1)
sb = metrics_rules.calc_stability_rules(enc_rules, y_test)
print(sb)

(0.0, 39, 39)
(332, 39, 1521, 21.82774490466798)
(17, 39)


  self._check_params(X)


In [65]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(exp1, X_test_norm)
print(sim)

0.9688432163003059


# RULEMATRIX

In [10]:
continuous_features = ['age', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin']
categorical_features = X_train.columns.drop(continuous_features).tolist()

In [11]:
import rulematrix
import time
import metrics_rules

In [12]:
is_continuous = [True if i in continuous_features else False for i in X_train.columns.tolist()]
is_categorical = [True if i in categorical_features else False for i in X_train.columns.tolist()]

In [24]:
surrogate = rulematrix.surrogate.rule_surrogate(
    model.predict,
    X_train,
    sampling_rate=4,
    is_continuous=is_continuous,
    is_categorical=is_categorical,
    seed=42
)

In [25]:
test_x = X_test.to_numpy()

In [92]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in range(len(xtest)):
        queried_rules = np.arange(surrogate.student.n_rules)[surrogate.student.decision_path(test_x[i].reshape(1,-1)).reshape(-1)]
        exp1.append(queried_rules[-1])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x))

In [93]:
start_time = time.time()
exp1 = exp_fn_blk(test_x)
exp2 = exp_fn_blk(test_x)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.01602959632873535 seconds ---


In [94]:
def enc_exp(exp, n_features):
    enc = []
    for i in range(exp.shape[0]):
        new = np.zeros(n_features)
        for j in surrogate.student.rule_list[exp[i]].clauses:
            new[j.feature_idx] = 1
        enc.append(new)
    return np.array(enc)

In [95]:
enc_exp = enc_exp(exp1, X_train.shape[1])

In [96]:
i = metrics_rules.calc_identity_rules(exp1, exp2)
print(i)

s = metrics_rules.calc_separability_rules(exp1)
print(s)

sb = metrics_rules.calc_stability_rules(enc_exp, y_test)
print(sb)

(0.0, 39, 39)
(440, 39, 1521, 28.928336620644313)
(15, 39)


  self._check_params(X)


In [97]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(enc_exp, X_test_norm)

In [98]:
sim

1.1212315681063432

# RuleMatrix Global

In [31]:
test_x.shape

(39, 18)

In [35]:
surrogate.student.rule_list

[Rule(clauses=[Clause(feature_idx=1, category=1)], output=array([0.97727273, 0.02272727])),
 Rule(clauses=[Clause(feature_idx=16, category=0), Clause(feature_idx=4, category=1)], output=array([0.02173913, 0.97826087])),
 Rule(clauses=[Clause(feature_idx=13, category=2)], output=array([0.04347826, 0.95652174])),
 Rule(clauses=[Clause(feature_idx=10, category=0), Clause(feature_idx=11, category=0)], output=array([0.99196787, 0.00803213])),
 Rule(clauses=[Clause(feature_idx=6, category=1)], output=array([0.76, 0.24])),
 Rule(clauses=[Clause(feature_idx=13, category=1)], output=array([0.1, 0.9])),
 Rule(clauses=[Clause(feature_idx=8, category=1)], output=array([0.93617021, 0.06382979])),
 Rule(clauses=[], output=array([0.38888889, 0.61111111]))]

# ANCHOR Global


In [10]:
from anchor import anchor_tabular
import anchor_utils

In [11]:
explainer = anchor_tabular.AnchorTabularExplainer(
    y_train.unique().tolist(),
    X_train.columns.tolist(),
    X_train.values
)

In [13]:
# Feature Importance using Anchor
def calc_fi(X_test, model, explainer):
    all_exps = []
    for i in range(len(X_test)):
        exp = explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95)
        all_exps.append(exp.exp_map)
    fi = anchor_utils.greedy_pick_anchor(all_exps, X_test.values, k = len(X_test.columns))
    return fi
        

In [14]:
exp1 = calc_fi(X_test, model, explainer)
exp2 = calc_fi(X_test, model, explainer)

0 0.4358974358974359
1 0.5897435897435898
2 0.6666666666666666
3 0.6923076923076923
4 0.717948717948718
5 0.7435897435897436
6 0.7692307692307693
7 0.7948717948717948
8 0.8205128205128205
9 0.8461538461538461
10 0.8717948717948718
11 0.8974358974358975
12 0.9230769230769231
13 0.9487179487179487
14 0.9743589743589743
15 1.0
16 1.0
17 1.0
0 0.4358974358974359
1 0.5897435897435898
2 0.6153846153846154
3 0.6410256410256411
4 0.6666666666666666
5 0.6923076923076923
6 0.717948717948718
7 0.7435897435897436
8 0.7692307692307693
9 0.7948717948717948
10 0.8205128205128205
11 0.8461538461538461
12 0.8717948717948718
13 0.8974358974358975
14 0.9230769230769231
15 0.9487179487179487
16 0.9743589743589743
17 1.0


In [21]:
def normal_fi(feat_imp):
    feat_imp = np.array(feat_imp) + 1e-9
    return np.abs(feat_imp) / np.sum(np.abs(feat_imp))

In [22]:
normal_feat_imp1 = normal_fi(exp1)
normal_feat_imp2 = normal_fi(exp2)

In [24]:
def global_identity(feat_imp1, feat_imp2):
    sum = 0
    for i in range(len(feat_imp1)):
        if(feat_imp1[i] == feat_imp2[i]):
            sum += 1
    return sum/len(feat_imp1)

i = global_identity(normal_feat_imp1, normal_feat_imp2)
i


0.0

In [27]:
#Entropy Ratio
Ser = np.sum(normal_feat_imp1*np.log(normal_feat_imp1))/np.log(1/len(normal_feat_imp1))

# Kullback-Leibler Divergence
Skl = np.sum(normal_feat_imp1*np.log(normal_feat_imp1/(1/len(normal_feat_imp1))))

In [28]:
def calc_gini(pfi):
    sum = 0
    for i in range(len(pfi)):
        sum_curr = 0
        for j in range(len(pfi)):
            sum_curr += np.abs(pfi[i]-pfi[j])
        sum += sum_curr
    
    return sum/(2*len(pfi)**2)*(np.sum(pfi)/len(pfi))

In [29]:
Sg = calc_gini(normal_feat_imp1)

In [30]:
Ser, Skl, Sg

(0.8728877639924251, 0.3674016170393269, 0.001338302340968637)

In [31]:
def calc_alpha_fi(normal_pfi, alpha):
    j_inst = 0
    sum = 0
    for i in range(len(normal_pfi)-1, -1, -1):
        sum += normal_pfi[i]
        if sum<=alpha:
            j_inst = i
        else:
            break
    return 1- (j_inst/len(normal_pfi))

In [32]:
calc_alpha_fi(normal_feat_imp1, 0.8)

0.6111111111111112

# Global

In [37]:
# Permutation Feature Importance
def calc_pfi(model, X_test, y_test, feat):
    pfi = []
    for i in range(len(feat)):
        X_test_copy = X_test.copy()
        X_test_copy[feat] = np.random.permutation(X_test_copy[feat])
        y_hat = model.predict(X_test_copy)
        loss_perm = sklearn.metrics.accuracy_score(y_test, y_hat)
        loss_orig = sklearn.metrics.accuracy_score(y_test, model.predict(X_test))
        pfi.append(loss_perm-loss_orig)
    
    sum_pfi = np.sum(np.abs(pfi))
    normal_pfi = np.abs(pfi)/sum_pfi
    return np.array(normal_pfi)

In [38]:
normal_pfi = calc_pfi(model, X_test, y_test, X_test.columns.tolist())

In [68]:
#Entropy Ratio
Ser = np.sum(normal_pfi*np.log(normal_pfi))/np.log(1/len(normal_pfi))

# Kullback-Leibler Divergence
Skl = np.sum(normal_pfi*np.log(normal_pfi/(1/len(normal_pfi))))

In [69]:
def calc_gini(pfi):
    sum = 0
    for i in range(len(pfi)):
        sum_curr = 0
        for j in range(len(pfi)):
            sum_curr += np.abs(pfi[i]-pfi[j])
        sum += sum_curr
    
    return sum/(2*len(pfi)**2)*(np.sum(pfi)/len(pfi))

In [70]:
Sg = calc_gini(normal_pfi)

In [71]:
Ser, Skl, Sg

(0.9860372710737533, 0.040357477351582974, 0.0004198003878719077)

In [60]:
# Prediction Groups Contrasts (PCG)

groups = []
# Get 4 quartiles based on output
quartiles = np.quantile(model.predict_proba(X_test)[:,1], [0.25, 0.5, 0.75])

# Group 1: 0-0.25
# Group 2: 0.25-0.5
# Group 3: 0.5-0.75
# Group 4: 0.75-1

groups.append(np.where(model.predict_proba(X_test)[:,1] <= quartiles[0])[0])
groups.append(np.where((model.predict_proba(X_test)[:,1] > quartiles[0]) & (model.predict_proba(X_test)[:,1] <= quartiles[1]))[0])
groups.append(np.where((model.predict_proba(X_test)[:,1] > quartiles[1]) & (model.predict_proba(X_test)[:,1] <= quartiles[2]))[0])
groups.append(np.where(model.predict_proba(X_test)[:,1] > quartiles[2])[0])

In [63]:
y_group0 = y_test.iloc[groups[0]]

In [65]:
x_group0 = X_test.iloc[groups[0]]

In [82]:
def calc_pgc(model, X_test, normal_pfi):
    groups = []
    quartiles = np.quantile(model.predict_proba(X_test)[:,1], [0.25, 0.5, 0.75])
    groups.append(np.where(model.predict_proba(X_test)[:,1] <= quartiles[0])[0])
    groups.append(np.where((model.predict_proba(X_test)[:,1] > quartiles[0]) & (model.predict_proba(X_test)[:,1] <= quartiles[1]))[0])
    groups.append(np.where((model.predict_proba(X_test)[:,1] > quartiles[1]) & (model.predict_proba(X_test)[:,1] <= quartiles[2]))[0])
    groups.append(np.where(model.predict_proba(X_test)[:,1] > quartiles[2])[0])
    
    group_pfi = []
    for i in range(len(groups)):
        group_pfi.append(calc_pfi(model, X_test.iloc[groups[i]], y_test.iloc[groups[i]], X_test.columns.tolist()))
    
    c_score = 0
    for i in range(len(groups)):
        print(i)
        for k in range(X_test.columns.shape[0]):
            print(k)
            ik = np.argsort(normal_pfi, axis=0)[-k:]
            ikg = np.argsort(group_pfi[i], axis=0)[-k:]
            # print(ik, ikg)
            position_overlap = 0
            for j in range(k):
                if ik[j] == ikg[j]:
                    position_overlap += 1
            c_score += position_overlap
    
    return c_score/(len(groups)*X_test.columns.shape[0])

In [50]:
def calc_alpha_fi(normal_pfi, alpha):
    j_inst = 0
    sum = 0
    for i in range(len(normal_pfi)-1, -1, -1):
        sum += normal_pfi[i]
        if sum<=alpha:
            j_inst = i
        else:
            break
    return 1- (j_inst/len(normal_pfi))

In [51]:
calc_alpha_fi(normal_pfi, 0.8)

0.7222222222222222

In [11]:
from anchor import utils
from anchor import anchor_tabular

In [23]:
explainer = anchor_tabular.AnchorTabularExplainer(
    y_train.unique().tolist(),
    X_train.columns.tolist(),
    X_train.values
)

In [33]:
# Feature Importance using Anchor
all_exps = []
for i in range(len(X_test)):
    exp = explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95)
    all_exps.append(exp.exp_map)

In [31]:
temp = explainer.explain_instance(X_test.values[0], model.predict, threshold=0.95)

In [32]:
temp.exp_map

{'feature': [7, 11, 13],
 'mean': [0.21712671158443816, 0.8010204081632653, 1.0],
 'precision': [0.21712671158443816, 0.8010204081632653, 1.0],
 'coverage': [1.0, 0.121, 0.0578],
 'examples': [{'covered': array([[ 72.        ,   0.        ,   1.        ,   1.        ,
             1.        ,   0.        ,   0.        ,   1.        ,
             1.        ,   0.        ,   0.        ,   0.        ,
             0.        ,   1.        , 115.        ,  52.        ,
             3.4       ,   1.        ],
          [ 45.        ,   0.        ,   0.        ,   0.        ,
             1.        ,   0.        ,   0.        ,   1.        ,
             1.        ,   1.        ,   0.        ,   0.        ,
             0.        ,   1.2       ,  81.        ,  65.        ,
             3.        ,   0.        ],
          [ 32.        ,   0.        ,   1.        ,   1.        ,
             1.        ,   0.        ,   0.        ,   1.        ,
             1.        ,   0.        ,   1.     

In [41]:
num_feats = len(X_train.columns)
imp = np.zeros(num_feats).astype(int)
for i in range(len(X_test)):
    exp = explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95).features()
    for j in exp:
        imp[j] = imp[j] + 1

In [17]:
import anchor_utils

In [36]:
p = anchor_utils.greedy_pick_anchor(all_exps, X_test.values, k = len(X_test.columns))

0 0.4358974358974359
1 0.5897435897435898
2 0.6666666666666666
3 0.6923076923076923
4 0.717948717948718
5 0.7435897435897436
6 0.7692307692307693
7 0.7948717948717948
8 0.8205128205128205
9 0.8461538461538461
10 0.8717948717948718
11 0.8974358974358975
12 0.9230769230769231
13 0.9487179487179487
14 0.9743589743589743
15 1.0
16 1.0
17 1.0


In [37]:
p

[18, 1, 20, 0, 3, 5, 6, 7, 13, 14, 15, 16, 19, 26, 29, 32, 0, 0]

In [42]:
all_exps2 = []
for i in range(len(X_test)):
    exp = explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95)
    all_exps2.append(exp)

In [42]:
imp

array([ 8,  3,  1,  0,  8,  2,  0,  0,  1,  2, 17,  5,  2, 13,  4,  2, 21,
       11])

In [1]:
from anchor import anchor_tabular

In [14]:
anc_exp = anchor_tabular.AnchorTabularExplainer(class_names=y_train.unique().tolist(), feature_names=X_train.columns.values.tolist(), train_data=X_train.values)

In [25]:
anc_exp.explain_instance(X_test.iloc[0].values, model.predict)

<anchor.anchor_explanation.AnchorExplanation at 0x7f19903b9eb8>

In [None]:
anchor_utils.greedy_pick_anchor()