In [1]:
import autosklearn.classification
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sklearn.metrics

In [2]:
dataset = pd.read_csv('../Datasets/Breast Cancer/breast_cancer.csv')

In [3]:
dataset['class'].replace({'Benign': 0, 'Malignant': 1}, inplace=True)

In [4]:
X, y = dataset.iloc[:, 1:-1], dataset.iloc[:, -1]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [15]:
automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=120)
automl.fit(X_train.values, y_train)
y_hat = automl.predict(X_test.values)

In [16]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.9707602339181286

In [18]:
import pickle
with open('./models/breast_cancer_automl.pkl', 'wb') as f:
    pickle.dump(automl, f)

In [19]:
import pickle
with open('./models/breast_cancer_automl.pkl', 'rb') as f:
    automl = pickle.load(f)

In [20]:
for col in dataset.columns:
    print(col, dataset[col].unique())

Clump thickness [ 1  3  2  8  5  4  6  7  9 10]
Cell Size [ 1  2  3  4  9  7  8  6  5 10]
Cell Shape [ 3  1  2  6  4  7  5  8 10  9]
Marginal Adhesion [ 1  2  3  4  6 10  5  9  8  7]
Signle Cell size [ 2 10  1  5  3  4  7  6  8  9]
Bare Nuclei [ 1  2  3  4  5 10  8  7  9  6]
Bland [ 1  2  3  5  6  7  4  8  9 10]
Nucleoli [ 1  2  3  4  5  6  7  8  9 10]
Mitoses [ 1  2  3  5  7  8  4  6 10]
class [0 1]


# LIME

In [21]:
import lime
import lime.lime_tabular
import tqdm 

In [22]:
categorical_features = X_train.columns.tolist()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=['Benign', 'Malignant'], categorical_features=categorical_features, discretize_continuous=True)

In [23]:
text_x = X_test.values

In [24]:
exp_fn = lambda i: explainer.explain_instance(X_test.iloc[i], automl.predict_proba, num_features=len(X_test.columns))
def exp_fn_blk(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x, exp_fn))

In [25]:
import metrics

In [26]:
exp1 = exp_fn_wrap(text_x)
exp2 = exp_fn_wrap(text_x)

100%|██████████| 171/171 [14:15<00:00,  5.00s/it]
100%|██████████| 171/171 [10:27<00:00,  3.67s/it]


In [27]:
np.save('./explanations/breast_cancer1.npy', exp1)
np.save('./explanations/breast_cancer2.npy', exp2)

In [28]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [29]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(X_test.columns))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [30]:
i, s, sb

((100.0, 0, 171), (0, 171, 29241, 0.0), (12, 171))

In [31]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [32]:
sim

0.13380635721310355

In [34]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in range(len(text_x)):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(2, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(automl, text_x[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

In [35]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

-0.15970481759955446
0.0
5.847953216374269
-0.4761904761904762
0.0
8.0


In [36]:
trust = metrics.calc_trust_score(automl, text_x, exp1, 3, X_train.columns.tolist())

100%|██████████| 171/171 [1:06:09<00:00, 23.21s/it]


In [37]:
trust

0.7573099415204678