In [1]:
import autosklearn.classification
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sklearn.metrics

In [2]:
# Load data
data746 = pd.read_csv('../Datasets/HIV-1 Protease/746Data.txt', sep=',', header=None)
data1625 = pd.read_csv('../Datasets/HIV-1 Protease/1625Data.txt', sep=',', header=None)

In [3]:
array = []
for i in range(0, len(data746)):
    array.append(list(data746[0][i])+[data746[1][i]])

In [4]:
for i in range(0, len(data1625)):
    array.append(list(data1625[0][i])+[data1625[1][i]])

In [5]:
dataset = pd.DataFrame(array)

In [6]:
dataset.replace({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'I': 9, 'J': 10, 'K': 11, 'L': 12, 'M': 13, 'N': 14, 'O': 15, 'P': 16, 'Q': 17, 'R': 18, 'S': 19, 'T': 20, 'U': 21, 'V': 22, 'W': 23, 'X': 24, 'Y': 25, 'Z': 26}, inplace=True)

In [7]:
X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [8]:
automl = autosklearn.classification.AutoSklearnClassifier()
automl.fit(X_train.values, y_train)
y_hat = automl.predict(X_test.values)

In [None]:
sklearn.metrics.accuracy_score(y_test, y_hat)

0.9645868465430016

In [22]:
import pickle
with open('./models/hiv_automl.pkl', 'wb') as f:
    pickle.dump(automl, f)

In [8]:
import pickle
with open('./models/hiv_automl.pkl', 'rb') as f:
    automl = pickle.load(f)

len(automl.show_models())

32

# LIME

In [9]:
import lime
import lime.lime_tabular
import tqdm 

In [10]:
y_test.unique()

array([ 1, -1])

In [11]:
categorical_features = X_train.columns.tolist()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=['Not Cleaved', 'Cleaved'], categorical_features=categorical_features, discretize_continuous=True)

In [12]:
text_x = X_test.values

In [13]:
exp_fn = lambda i: explainer.explain_instance(X_test.iloc[i], automl.predict_proba, num_features=len(X_test.columns))
def exp_fn_blk(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)
exp_fn_wrap = lambda x: np.array(exp_fn_blk(x, exp_fn))

In [14]:
import metrics

In [30]:
exp1 = exp_fn_wrap(text_x)
exp2 = exp_fn_wrap(text_x)

100%|██████████| 593/593 [25:40<00:00,  2.60s/it]
100%|██████████| 593/593 [28:03<00:00,  2.84s/it]


In [15]:
np.save('./explanations/hiv1.npy', exp1)
np.save('./explanations/hiv2.npy', exp2)

NameError: name 'exp1' is not defined

In [16]:
exp1 = np.load('./explanations/hiv1.npy')
exp2 = np.load('./explanations/hiv2.npy')

In [32]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [33]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(X_test.columns))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [34]:
i, s, sb

((100.0, 0, 593), (0, 593, 351649, 0.0), (79, 593))

In [35]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [36]:
sim

0.15575770270549877

In [37]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(text_x))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(2, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(automl, text_x[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 593/593 [1:41:09<00:00, 10.23s/it]


In [38]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

-0.04869894204226964
0.0
2.8431703204047216
-0.04761904761904763
0.0
0.0


In [17]:
trust = metrics.calc_trust_score(automl, text_x, exp1, 3, X_train.columns.tolist())

100%|██████████| 593/593 [3:35:34<00:00, 21.81s/it]  


In [18]:
trust

0.7360876897133221

# CIU

In [9]:
from ciu import determine_ciu
import tqdm
import metrics

In [10]:
def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

In [11]:
feat_list = X_train.columns.tolist()

In [12]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = determine_ciu(X_test.iloc[i:i+1], automl.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

In [15]:
exp1 = exp_fn_blk(X_test)
exp2 = exp_fn_blk(X_test)

100%|██████████| 593/593 [59:58<00:00,  6.07s/it]  
100%|██████████| 593/593 [57:37<00:00,  5.83s/it]  


In [16]:
np.save('./explanations/hiv_ciu1.npy', exp1)
np.save('./explanations/hiv_ciu2.npy', exp2)

In [13]:
exp1 = np.load('./explanations/hiv_ciu1.npy')
exp2 = np.load('./explanations/hiv_ciu2.npy')

In [17]:
i = metrics.calc_identity(exp1, exp2)
s = metrics.calc_separability(exp1)
enc1 = enc_exp(exp1, len(feat_list))
sb = metrics.calc_stability(enc1, y_test)

  self._check_params(X)


In [18]:
i, s, sb

((55.1433389544688, 266, 593),
 (30, 593, 351649, 0.008531234270536814),
 (114, 593))

In [19]:
X_test_norm = metrics.normalize_test(X_train, X_test)
sim = metrics.calc_similarity(exp1, X_test_norm)

In [20]:
sim

0.8989604502706201

In [14]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in tqdm.tqdm(range(len(X_test))):
    atr = exp1[i]
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(2, dtype=int)
    np.put(y, y_test.iloc[i], 1)
    example = metrics.FeatureAttribution(automl, X_test.to_numpy()[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

100%|██████████| 593/593 [1:07:57<00:00,  6.88s/it]


In [15]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

-0.2037290042856533
0.0
2.5952782462057336
-0.30952380952380953
0.0
0.0


In [16]:
metrics.calc_trust_score(automl, X_test.to_numpy(), exp1, 3, X_train.columns.to_list())

100%|██████████| 593/593 [2:55:57<00:00, 17.80s/it]  


0.738336143901068