In [1]:
#UCI Primary Tumor
import autosklearn.classification
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sklearn.metrics

In [2]:
table = pd.read_table('../Datasets/Primary Tumor/primary-tumor.data', sep=',', header=None)

In [3]:
table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,1,1,1,?,3,2,2,1,2,2,2,2,2,2,2,2,2,2
1,1,1,1,?,3,2,2,2,2,2,1,2,2,2,1,2,1,2
2,1,1,2,2,3,1,2,2,2,2,2,2,2,2,2,2,1,2
3,1,1,2,?,3,1,2,1,1,2,2,2,2,2,2,2,1,2
4,1,1,2,?,3,1,2,1,1,2,2,2,2,2,2,2,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334,22,2,2,2,?,2,2,2,2,2,2,2,2,2,2,1,2,2
335,22,2,2,2,?,2,2,2,2,2,2,2,2,2,2,1,2,2
336,22,2,2,?,?,1,2,2,2,2,2,2,2,2,1,1,2,2
337,22,3,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2


In [4]:
columns = ['class', 'age', 'sex', 'histologic-type', 'degree-of-diffe', 'bone', 'bone-marrow', 'lung', 'pleura', 'peritoneum', 'liver', 'brain', 'skin', 'neck', 'supraclavicular', 'axillar', 'mediastinum', 'abdominal']

In [5]:
table = table.rename(columns={i: columns[i] for i in range(len(columns))})

In [6]:
table.replace('?', np.nan, inplace=True)

In [7]:
table = table.astype(float)

In [8]:
table = table - 1

In [9]:
table.drop('degree-of-diffe', axis=1, inplace=True)

In [10]:
table.dropna(inplace=True)

In [11]:
table = table.astype(int)

In [12]:
X, y = table.drop('class', axis=1), table['class']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# AUTOML

In [23]:
automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=600)
automl.fit(X_train, y_train)
y_hat = automl.predict(X_test)

In [24]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.27941176470588236

In [21]:
import pickle
with open('./models/primary_tumor_automl2.pkl', 'wb') as f:
    pickle.dump(automl, f)

In [25]:
import pickle
with open('./models/primary_tumor_automl2.pkl', 'rb') as f:
    automl = pickle.load(f)

len(automl.show_models())

15

# RANDOM FOREST

In [14]:
import sklearn.ensemble

model = sklearn.ensemble.RandomForestClassifier(n_estimators=100, n_jobs=5, random_state=42)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)

In [21]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.3382352941176471

In [23]:
import pickle
with open('./models/primary_tumor_rf.pkl', 'wb') as f:
    pickle.dump(model, f)

# SVC

In [14]:
import sklearn.svm
import sklearn.model_selection

model = sklearn.svm.SVC(random_state=42, probability=True)
# params = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['linear', 'ploy', 'rbf'], 'degree': [3, 4, 5]}
# model = sklearn.model_selection.GridSearchCV(svc, param_grid=params, cv=5, n_jobs=5, verbose=3)
model.fit(X_train, y_train)
y_hat = model.predict(X_test)
sklearn.metrics.accuracy_score(y_test, y_hat)

0.35294117647058826

In [15]:
import pickle
with open('./models/primary_tumor_svm.pkl', 'wb') as f:
    pickle.dump(model, f)

# LIME

In [16]:
import lime
import lime.lime_tabular
import tqdm 

In [17]:
categorical_features = X_train.columns.tolist()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=[str(i) for i in range(22)], categorical_features=categorical_features, discretize_continuous=True)

In [18]:
text_x = X_test.values

In [19]:
exp_fn = lambda i: explainer.explain_instance(X_test.iloc[i], model.predict_proba, num_features=len(X_test.columns))
def exp_fn_blk(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x, exp_fn))

In [20]:
import metrics

In [21]:
exp1 = exp_fn_wrap(text_x)
exp2 = exp_fn_wrap(text_x)

100%|██████████| 68/68 [16:27<00:00, 14.52s/it]
100%|██████████| 68/68 [13:43<00:00, 12.12s/it]


In [22]:
np.save('./explanations/primaryTumor1.npy', exp1)
np.save('./explanations/primaryTumor2.npy', exp2)

In [23]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [24]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(X_test.columns))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [25]:
i, s, sb

((100.0, 0, 68), (0, 68, 4624, 0.0), (-286, 68))

In [26]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [27]:
sim

0.028377327560562837

In [28]:
sort_uni_y = np.unique(y_test)

In [29]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(text_x))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(19, dtype=int)
    np.put(y, np.where(sort_uni_y == y_test.iloc[i])[0][0], 1)
    example = metrics.FeatureAttribution(model, text_x[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 68/68 [00:02<00:00, 25.98it/s]


In [30]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

0.03860625118215097
9.0
3.5441176470588234
0.012964074471043287
9.0
0.0


In [31]:
trust = metrics.calc_trust_score(model, text_x, exp1, 3, X_train.columns.tolist())

  0%|          | 0/68 [00:00<?, ?it/s]

100%|██████████| 68/68 [00:01<00:00, 53.65it/s]


In [32]:
trust

0.3602941176470588

# CIU

In [33]:
from ciu import determine_ciu
import tqdm
import metrics

In [34]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [35]:
feat_list = X_train.columns.tolist()

In [36]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = determine_ciu(X_test.iloc[i:i+1], model.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

In [37]:
exp1 = exp_fn_blk(X_test)
exp2 = exp_fn_blk(X_test)

100%|██████████| 68/68 [01:11<00:00,  1.06s/it]
100%|██████████| 68/68 [01:27<00:00,  1.29s/it]


In [38]:
np.save('./explanations/primaryTumor_ciu1.npy', exp1)
np.save('./explanations/primaryTumor_ciu2.npy', exp2)

In [40]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(feat_list))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [41]:
i, s, sb

((100.0, 0, 68), (0, 68, 4624, 0.0), (-277, 68))

In [42]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [43]:
sim

0.5613371902156711

In [44]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(X_test))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(19, dtype=int)
    np.put(y, np.where(sort_uni_y == y_test.iloc[i])[0][0], 1)
    example = metrics.FeatureAttribution(model, X_test.to_numpy()[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 68/68 [00:04<00:00, 16.55it/s]


In [45]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

0.04766435986159169
0.0
3.2058823529411766
0.05735294117647059
0.0
0.0


In [46]:
metrics.calc_trust_score(model, X_test.to_numpy(), exp1, 3, X_train.columns.to_list())

100%|██████████| 68/68 [00:02<00:00, 23.59it/s]


0.3897058823529411

# RULEFIT

In [70]:
from skrules import SkopeRules
import metrics_rules
import time

In [71]:
clf = SkopeRules(max_depth_duplication=2,
                    n_estimators=512,
                    precision_min=0.3,
                    recall_min=0.1,
                    feature_names=X_train.columns.tolist())

In [72]:
start_time = time.time()
clf.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

  % set(self.classes_))


--- 13.05927062034607 seconds ---


In [73]:
start_time = time.time()
top_rules1 = clf.score_top_rules(X_test)
top_rules2 = clf.score_top_rules(X_test)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.06283974647521973 seconds ---


In [74]:
i = metrics_rules.calc_identity_rules(top_rules1, top_rules2)
print(i)

s = metrics_rules.calc_separability_rules(top_rules1)
print(s)

enc_rules = metrics_rules.exp_enc(clf, top_rules1)
sb = metrics_rules.calc_stability_rules(enc_rules, y_test)
print(sb)

(0.0, 68, 68)
(2290, 68, 4624, 49.5242214532872)
(-449, 68)


  self._check_params(X)
  ct.fit(top_rules)


In [75]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(enc_rules, X_test_norm)
print(sim)

0.8491365391806703


# RULEMATRIX

In [76]:
categorical_features = X_train.columns.tolist()
continuous_features = X_train.columns.drop(categorical_features).tolist()

In [77]:
import rulematrix
import time
import metrics_rules

In [78]:
is_continuous = [True if i in continuous_features else False for i in X_train.columns.tolist()]
is_categorical = [True if i in categorical_features else False for i in X_train.columns.tolist()]

In [80]:
surrogate = rulematrix.surrogate.rule_surrogate(
    model.predict,
    X_train,
    sampling_rate=4,
    is_continuous=is_continuous,
    is_categorical=is_categorical,
    seed=42
)

In [81]:
test_x = X_test.to_numpy()

In [82]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in range(len(xtest)):
        queried_rules = np.arange(surrogate.student.n_rules)[surrogate.student.decision_path(test_x[i].reshape(1,-1)).reshape(-1)]
        exp1.append(queried_rules[-1])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x))

In [83]:
start_time = time.time()
exp1 = exp_fn_blk(test_x)
exp2 = exp_fn_blk(test_x)
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.017077207565307617 seconds ---


In [84]:
def enc_exp(exp, n_features):
    enc = []
    for i in range(exp.shape[0]):
        new = np.zeros(n_features)
        for j in surrogate.student.rule_list[exp[i]].clauses:
            new[j.feature_idx] = 1
        enc.append(new)
    return np.array(enc)

In [85]:
enc_exp = enc_exp(exp1, X_train.shape[1])

In [86]:
i = metrics_rules.calc_identity_rules(exp1, exp2)
print(i)

s = metrics_rules.calc_separability_rules(exp1)
print(s)

sb = metrics_rules.calc_stability_rules(enc_exp, y_test)
print(sb)

(0.0, 68, 68)
(1510, 68, 4624, 32.655709342560556)
(-401, 68)


  self._check_params(X)
  ct.fit(top_rules)


In [87]:
X_test_norm = metrics_rules.normalize_test(X_train, X_test)
sim = metrics_rules.calc_similarity(enc_exp, X_test_norm)

In [88]:
sim

1.0255943009748296