In [1]:
import autosklearn.classification
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sklearn.metrics

In [2]:
dataset = pd.read_csv('../Datasets/Thyroid/thyroidDF.csv')

In [3]:
diagnoses = {'-': 'negative',
             'A': 'hyperthyroid', 
             'C': 'hyperthyroid', 
             'B': 'hyperthyroid', 
             'D': 'hyperthyroid',
             'E': 'hypothyroid', 
             'F': 'hypothyroid', 
             'G': 'hypothyroid', 
             'H': 'hypothyroid'}

dataset['target'] = dataset['target'].map(diagnoses) # re-mapping
dataset.dropna(subset=['target'], inplace=True)

In [4]:
dataset.drop(['TSH_measured', 'T3_measured', 'TT4_measured', 'T4U_measured', 'FTI_measured', 'TBG_measured', 'patient_id', 'referral_source', 'TBG'], axis=1, inplace=True)

In [5]:
dataset[dataset['age']>100]

dataset.drop(dataset[dataset['age']>100].index, inplace=True)

In [6]:
dataset['sex'] = np.where((dataset.sex.isnull()) & (dataset.pregnant == 't'), 'F', dataset.sex)

In [7]:
dataset.drop('T3', axis=1, inplace=True)

In [8]:
dataset.dropna(inplace=True)

In [9]:
dataset.replace('f', 0, inplace=True)
dataset.replace('t', 1, inplace=True)
dataset.replace('M', 0, inplace=True)
dataset.replace('F', 1, inplace=True)

In [10]:
diagnoses = {'negative': 0,
             'hypothyroid': 1, 
             'hyperthyroid': 2}

dataset['target'] = dataset['target'].map(diagnoses) # re-mapping

# train and test split --> stratified
X = dataset.drop('target', axis=1).copy()
y = dataset['target'].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# TRAIN MODEL

In [11]:
automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=120)
automl.fit(X_train.values, y_train)
y_hat = automl.predict(X_test.values)

In [12]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.9839743589743589

In [13]:
import pickle
with open('./models/thyroid_automl.pkl', 'wb') as f:
    pickle.dump(automl, f)

# LOAD MODEL

In [11]:
import pickle
with open('./models/thyroid_automl.pkl', 'rb') as f:
    automl = pickle.load(f)

In [12]:
len(automl.show_models())

8

# LIME

In [13]:
import lime
import lime.lime_tabular
import tqdm 

In [14]:
continuous_features = ['age', 'TSH', 'TT4', 'T4U', 'FTI']
categorical_features = X_train.columns.drop(continuous_features).tolist()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=['negative', 'hypothyroid', 'hyperthyroid'], categorical_features=categorical_features, discretize_continuous=True)

In [15]:
text_x = X_test.values

In [16]:
exp_fn = lambda i: explainer.explain_instance(X_test.iloc[i], automl.predict_proba, num_features=len(X_test.columns))
def exp_fn_blk(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x, exp_fn))

In [17]:
import metrics

In [None]:
exp1 = exp_fn_wrap(text_x)
exp2 = exp_fn_wrap(text_x)

In [19]:
np.save('./explanations/thyroid1.npy', exp1)
np.save('./explanations/thyroid2.npy', exp2)

In [18]:
exp1 = np.load('./explanations/thyroid1.npy')
exp2 = np.load('./explanations/thyroid2.npy')

In [19]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [20]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(X_test.columns))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [21]:
i, s, sb

((100.0, 0, 1560), (0, 1560, 2433600, 0.0), (138, 1560))

In [22]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [23]:
sim

0.1767860825689674

In [24]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(text_x))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(3, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(automl, text_x[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 1560/1560 [1:51:13<00:00,  4.28s/it]


In [25]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

-0.14397518127304595
1.0
0.0
-0.16240601503759397
1.0
0.0


In [26]:
metrics.calc_trust_score(automl, text_x, exp1, 3, X_train.columns.to_list())

100%|██████████| 1560/1560 [5:29:17<00:00, 12.67s/it]  


0.2674145299145299

# LIME Global

In [15]:
from lime import submodular_pick
import time

start_time = time.time()
exp1 = submodular_pick.SubmodularPick(explainer, X_test.values, automl.predict_proba, sample_size=200, num_features=len(X_test.columns), num_exps_desired=5)
print("--- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
exp2 = submodular_pick.SubmodularPick(explainer, X_test.values, automl.predict_proba, sample_size=200, num_features=len(X_test.columns), num_exps_desired=5)
print("--- %s seconds ---" % (time.time() - start_time))

--- 798.0019969940186 seconds ---
--- 921.5835075378418 seconds ---


In [21]:
def get_feature_imp(sp_obj):
    W_pick=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.sp_explanations]).fillna(0)
    W_pick['prediction'] = [this.available_labels()[0] for this in sp_obj.sp_explanations]
    W=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.explanations]).fillna(0)
    W['prediction'] = [this.available_labels()[0] for this in sp_obj.explanations]
    np.abs(W.drop("prediction", axis=1)).mean(axis=0).sort_values(ascending=False).head(25).sort_values(ascending=True)
    grped_coeff = W.groupby("prediction").mean()
    grped_coeff = grped_coeff.T
    return grped_coeff[0].values

In [22]:
feat_imp1 = get_feature_imp(exp1)
feat_imp2 = get_feature_imp(exp2)

In [23]:
feat_imp2.shape, feat_imp1.shape

((48,), (49,))

In [24]:
def global_identity(feat_imp1, feat_imp2):
    sum = 0
    for i in range(len(feat_imp1)):
        if(feat_imp1[i] == feat_imp2[i]):
            sum += 1
    return sum/len(feat_imp1)

In [25]:
i = global_identity(feat_imp1, feat_imp2)
i

IndexError: index 48 is out of bounds for axis 0 with size 48

In [26]:
def normal_fi(feat_imp):
    return np.abs(feat_imp) / np.sum(np.abs(feat_imp))

In [27]:
normal_feat_imp = normal_fi(feat_imp1 + 1e-9)

In [28]:
normal_fi(feat_imp1 + 1e-9)

array([7.99609784e-02, 3.07692817e-02, 5.81832837e-03, 1.05394971e-02,
       7.28577068e-02, 2.01550467e-02, 9.78516120e-02, 5.04075938e-03,
       2.54064964e-02, 1.49685471e-04, 3.16700788e-03, 1.59606400e-02,
       4.40884326e-03, 3.02853236e-03, 2.59310827e-04, 4.13526321e-03,
       3.82956894e-02, 4.70571569e-02, 5.20951781e-03, 4.20392383e-09,
       1.38929367e-01, 2.99447122e-03, 2.08153803e-04, 1.74962344e-03,
       5.40776972e-03, 1.62262688e-03, 4.18557848e-03, 7.82336695e-05,
       1.19731763e-01, 1.49543194e-02, 1.29952279e-04, 2.05063585e-01,
       1.93435006e-03, 5.14887112e-03, 5.72761692e-03, 1.43034196e-03,
       7.18727223e-03, 7.80007110e-04, 4.65038938e-03, 6.12246955e-04,
       8.20207231e-04, 1.11158042e-03, 1.03041430e-04, 3.62825910e-03,
       1.82480028e-04, 2.79196546e-04, 4.69179338e-04, 3.99714859e-04,
       4.08442981e-04])

In [29]:
#Entropy Ratio
Ser = np.sum(normal_feat_imp*np.log(normal_feat_imp))/np.log(1/len(normal_feat_imp))

# Kullback-Leibler Divergence
Skl = np.sum(normal_feat_imp*np.log(normal_feat_imp/(1/len(normal_feat_imp))))

In [30]:
def calc_gini(pfi):
    sum = 0
    for i in range(len(pfi)):
        sum_curr = 0
        for j in range(len(pfi)):
            sum_curr += np.abs(pfi[i]-pfi[j])
        sum += sum_curr
    
    return sum/(2*len(pfi)**2)*(np.sum(pfi)/len(pfi))

In [31]:
Sg = calc_gini(normal_feat_imp)

In [32]:
Ser, Skl, Sg

(0.683249720430941, 1.2327351674590799, 0.00032479863749558166)

In [33]:
def calc_alpha_fi(normal_pfi, alpha):
    j_inst = 0
    sum = 0
    for i in range(len(normal_pfi)-1, -1, -1):
        sum += normal_pfi[i]
        if sum<=alpha:
            j_inst = i
        else:
            break
    return 1- (j_inst/len(normal_pfi))

In [34]:
calc_alpha_fi(normal_feat_imp, 0.8)

0.8775510204081632

In [35]:
def get_feature_imp_all(sp_obj):
    W_pick=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.sp_explanations]).fillna(0)
    W_pick['prediction'] = [this.available_labels()[0] for this in sp_obj.sp_explanations]
    W=pd.DataFrame([dict(this.as_list(this.available_labels()[0])) for this in sp_obj.explanations]).fillna(0)
    W['prediction'] = [this.available_labels()[0] for this in sp_obj.explanations]
    np.abs(W.drop("prediction", axis=1)).mean(axis=0).sort_values(ascending=False).head(25).sort_values(ascending=True)
    grped_coeff = W.groupby("prediction").mean()
    grped_coeff = grped_coeff.T
    return grped_coeff

In [36]:
class1_feat_imp, class2_feat_imp, class3_feat_imp = get_feature_imp_all(exp1)[0].values, get_feature_imp_all(exp1)[1].values, get_feature_imp_all(exp1)[2]
normal_class1_fi, normal_class2_fi, normal_class3_fi = normal_fi(class1_feat_imp), normal_fi(class2_feat_imp), normal_fi(class3_feat_imp)

In [38]:
min({np.linalg.norm(normal_class1_fi - normal_class2_fi, ord=2), np.linalg.norm(normal_class1_fi - normal_class3_fi, ord=2), np.linalg.norm(normal_class3_fi - normal_class2_fi, ord=2)})

0.4736830707585958

# CIU

In [13]:
from ciu import determine_ciu
import tqdm
import metrics

In [14]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [15]:
feat_list = X_train.columns.tolist()

In [16]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = determine_ciu(X_test.iloc[i:i+1], automl.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

In [31]:
exp1 = exp_fn_blk(X_test)
exp2 = exp_fn_blk(X_test)

  0%|          | 0/1560 [00:00<?, ?it/s]

100%|██████████| 1560/1560 [1:28:05<00:00,  3.39s/it]
100%|██████████| 1560/1560 [1:20:04<00:00,  3.08s/it]


In [32]:
np.save('./explanations/thyroid_ciu1.npy', exp1)
np.save('./explanations/thyroid_ciu2.npy', exp2)

In [17]:
exp1 = np.load('./explanations/thyroid_ciu1.npy')
exp2 = np.load('./explanations/thyroid_ciu2.npy')

In [33]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(feat_list))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [34]:
i, s, sb

((100.0, 0, 1560), (0, 1560, 2433600, 0.0), (-659, 1560))

In [35]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [36]:
sim

0.0996306921268756

In [38]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(X_test))):
    atr = exp1[i]   
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(3, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(automl, X_test.to_numpy()[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 1560/1560 [2:08:29<00:00,  4.94s/it] 


In [39]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

0.5781865931413919
0.9993589743589744
0.0
0.6314404283531938
1.0
0.0


In [18]:
metrics.calc_trust_score(automl, X_test.to_numpy(), exp1, 3, X_train.columns.to_list())

100%|██████████| 1560/1560 [5:50:29<00:00, 13.48s/it]  


0.28237179487179487

# RULEFIT

In [13]:
from skrules import SkopeRules
import metrics_rules
import time

In [14]:
clf = SkopeRules(max_depth_duplication=2,
                    n_estimators=512,
                    precision_min=0.3,
                    recall_min=0.1,
                    feature_names=X_train.columns.tolist())

In [15]:
start_time = time.time()
clf.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

  % set(self.classes_))


--- 46.34715509414673 seconds ---


In [16]:
start_time = time.time()
top_rules1 = clf.score_top_rules(X_test)
top_rules2 = clf.score_top_rules(X_test)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.06779360771179199 seconds ---


In [17]:
i = metrics_rules.calc_identity_rules(top_rules1, top_rules2)
print(i)

s = metrics_rules.calc_separability_rules(top_rules1)
print(s)

enc_rules = metrics_rules.exp_enc(clf, top_rules1)
sb = metrics_rules.calc_stability_rules(enc_rules, y_test)
print(sb)

(0.0, 1560, 1560)


(1928974, 1560, 2433600, 79.26421761998685)
(50, 1560)


  self._check_params(X)


In [18]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(enc_rules, X_test_norm)
print(sim)

0.0


# RULEMATRIX

In [13]:
continuous_features = ['age', 'TSH', 'TT4', 'T4U', 'FTI']
categorical_features = X_train.columns.drop(continuous_features).tolist()

In [14]:
import rulematrix
import time
import metrics_rules

In [15]:
is_continuous = [True if i in continuous_features else False for i in X_train.columns.tolist()]
is_categorical = [True if i in categorical_features else False for i in X_train.columns.tolist()]

In [16]:
surrogate = rulematrix.surrogate.rule_surrogate(
    automl.predict,
    X_train,
    sampling_rate=4,
    is_continuous=is_continuous,
    is_categorical=is_categorical,
    seed=42
)

In [17]:
test_x = X_test.to_numpy()

In [18]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in range(len(xtest)):
        queried_rules = np.arange(surrogate.student.n_rules)[surrogate.student.decision_path(test_x[i].reshape(1,-1)).reshape(-1)]
        exp1.append(queried_rules[-1])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x))

In [19]:
start_time = time.time()
exp1 = exp_fn_blk(test_x)
exp2 = exp_fn_blk(test_x)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.8688592910766602 seconds ---


In [20]:
def enc_exp(exp, n_features):
    enc = []
    for i in range(exp.shape[0]):
        new = np.zeros(n_features)
        for j in surrogate.student.rule_list[exp[i]].clauses:
            new[j.feature_idx] = 1
        enc.append(new)
    return np.array(enc)

In [21]:
enc_exp = enc_exp(exp1, X_train.shape[1])

In [22]:
i = metrics_rules.calc_identity_rules(exp1, exp2)
print(i)

s = metrics_rules.calc_separability_rules(exp1)
print(s)

sb = metrics_rules.calc_stability_rules(enc_exp, y_test)
print(sb)

(0.0, 1560, 1560)
(1265948, 1560, 2433600, 52.01955950032873)
(101, 1560)


  self._check_params(X)


In [23]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(enc_exp, X_test_norm)

In [24]:
sim

0.0

# ANCHOR Global


In [13]:
from anchor import anchor_tabular
import anchor_utils
import tqdm

In [14]:
explainer = anchor_tabular.AnchorTabularExplainer(
    y_train.unique().tolist(),
    X_train.columns.tolist(),
    X_train.values
)

In [15]:
# Feature Importance using Anchor
def calc_fi(X_test, model, explainer):
    all_exps = []
    for i in tqdm.tqdm(range(len(X_test))):
        exp = explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95)
        all_exps.append(exp.exp_map)
    fi = anchor_utils.greedy_pick_anchor(all_exps, X_test.values, k = len(X_test.columns))
    return fi
        

In [20]:
X_test_sample, _, y_test_sample, _ = train_test_split(X_test, y_test, test_size=0.6, random_state=42, stratify=y_test)

In [22]:
exp1 = calc_fi(X_test_sample, automl, explainer)
exp2 = calc_fi(X_test_sample, automl, explainer)

100%|██████████| 624/624 [2:13:57<00:00, 12.88s/it]  


0 0.3173076923076923
1 0.41346153846153844
2 0.4358974358974359
3 0.4583333333333333
4 0.4791666666666667
5 0.4983974358974359
6 0.5176282051282052
7 0.5352564102564102
8 0.5512820512820513
9 0.5673076923076923
10 0.5817307692307693
11 0.594551282051282
12 0.6073717948717948
13 0.6201923076923077
14 0.6330128205128205
15 0.6442307692307693
16 0.655448717948718
17 0.6650641025641025
18 0.6746794871794872
19 0.6842948717948718


 12%|█▏        | 73/624 [2:48:28<71:38:54, 468.12s/it]  

In [None]:
def normal_fi(feat_imp):
    feat_imp = np.array(feat_imp) + 1e-9
    return np.abs(feat_imp) / np.sum(np.abs(feat_imp))

In [None]:
normal_feat_imp1 = normal_fi(exp1)
normal_feat_imp2 = normal_fi(exp2)

In [None]:
def global_identity(feat_imp1, feat_imp2):
    sum = 0
    for i in range(len(feat_imp1)):
        if(feat_imp1[i] == feat_imp2[i]):
            sum += 1
    return sum/len(feat_imp1)

i = global_identity(normal_feat_imp1, normal_feat_imp2)
i


0.0

In [None]:
#Entropy Ratio
Ser = np.sum(normal_feat_imp1*np.log(normal_feat_imp1))/np.log(1/len(normal_feat_imp1))

# Kullback-Leibler Divergence
Skl = np.sum(normal_feat_imp1*np.log(normal_feat_imp1/(1/len(normal_feat_imp1))))

In [None]:
def calc_gini(pfi):
    sum = 0
    for i in range(len(pfi)):
        sum_curr = 0
        for j in range(len(pfi)):
            sum_curr += np.abs(pfi[i]-pfi[j])
        sum += sum_curr
    
    return sum/(2*len(pfi)**2)*(np.sum(pfi)/len(pfi))

In [None]:
Sg = calc_gini(normal_feat_imp1)

In [None]:
Ser, Skl, Sg

(0.8819555297531368, 0.25937021124504167, 0.00445169138315067)

In [None]:
def calc_alpha_fi(normal_pfi, alpha):
    j_inst = 0
    sum = 0
    for i in range(len(normal_pfi)-1, -1, -1):
        sum += normal_pfi[i]
        if sum<=alpha:
            j_inst = i
        else:
            break
    return 1- (j_inst/len(normal_pfi))

In [None]:
calc_alpha_fi(normal_feat_imp1, 0.8)

0.8888888888888888