#Anchors for Heart Disease dataset:

In [None]:
!pip install alibi

Collecting alibi
[?25l  Downloading https://files.pythonhosted.org/packages/2c/0f/1de259336ecb2eeeb06d703c210effb66febf9f9273ff146fb29b66f17a7/alibi-0.5.4-py3-none-any.whl (215kB)
[K     |████████████████████████████████| 225kB 2.8MB/s 
Collecting prettyprinter
[?25l  Downloading https://files.pythonhosted.org/packages/9f/d0/9effbeca8f1b8df9d33154de3477a51e55a9c46cb15612dd7791a1624397/prettyprinter-0.18.0-py2.py3-none-any.whl (48kB)
[K     |████████████████████████████████| 51kB 5.2MB/s 
Collecting shap>=0.36
[?25l  Downloading https://files.pythonhosted.org/packages/d2/17/37ee6c79cafbd9bb7423b54e55ea90beec66aa7638664d607bcc28de0bae/shap-0.36.0.tar.gz (319kB)
[K     |████████████████████████████████| 327kB 8.5MB/s 
Collecting colorful>=0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/b0/8e/e386e248266952d24d73ed734c2f5513f34d9557032618c8910e605dfaf6/colorful-0.5.4-py2.py3-none-any.whl (201kB)
[K     |████████████████████████████████| 204kB 8.4MB/s 
Collecting sp

In [None]:
import tensorflow as tf
tf.get_logger().setLevel(40) # suppress deprecation messages
tf.compat.v1.disable_v2_behavior() # disable TF2 behaviour as alibi code still relies on TF1 constructs
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

TF version:  2.3.0
Eager execution enabled:  False


In [None]:
df = pd.read_csv('/content/heartu.csv')
df.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0


In [None]:
df.shape

(297, 14)

In [None]:
feature_names=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
data = df[feature_names]
target = df.condition

Splitting the data into training and testing set

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data,target, random_state=0)

In [None]:
from sklearn.linear_model import LogisticRegression
model=LogisticRegression(C=0.01,solver='liblinear',max_iter=500)
model.fit(x_train,y_train)

LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=500,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

Apllying AnchorTabular method 

In [None]:
from alibi.explainers import AnchorTabular
predict_func=lambda x:model.predict_proba(x)
explainer=AnchorTabular(predict_func,feature_names)

In [None]:
model.predict_proba(x_test)

array([[0.58247803, 0.41752197],
       [0.77450519, 0.22549481],
       [0.75681729, 0.24318271],
       [0.18091057, 0.81908943],
       [0.86033967, 0.13966033],
       [0.11171929, 0.88828071],
       [0.40723555, 0.59276445],
       [0.19783263, 0.80216737],
       [0.11024544, 0.88975456],
       [0.18950502, 0.81049498],
       [0.56097183, 0.43902817],
       [0.91997135, 0.08002865],
       [0.64081597, 0.35918403],
       [0.47946202, 0.52053798],
       [0.92404139, 0.07595861],
       [0.19096939, 0.80903061],
       [0.78200054, 0.21799946],
       [0.41555401, 0.58444599],
       [0.20363026, 0.79636974],
       [0.53557304, 0.46442696],
       [0.86416571, 0.13583429],
       [0.60047315, 0.39952685],
       [0.19753243, 0.80246757],
       [0.19349917, 0.80650083],
       [0.81447944, 0.18552056],
       [0.77601218, 0.22398782],
       [0.60509221, 0.39490779],
       [0.27504511, 0.72495489],
       [0.54251071, 0.45748929],
       [0.62958067, 0.37041933],
       [0.

In [None]:
x_train=x_train.to_numpy()
x_test=x_test.to_numpy()

In [None]:
explainer.fit(x_train)

AnchorTabular(meta={
    'name': 'AnchorTabular',
    'type': ['blackbox'],
    'explanations': ['local'],
    'params': {'seed': None, 'disc_perc': (25, 50, 75)}
})

In [None]:
from sklearn.metrics import jaccard_similarity_score
yjc = model.predict(x_test)
jaccard_similarity_score(y_test, yjc)

jaccard_similarity_score has been deprecated and replaced with jaccard_score. It will be removed in version 0.23. This implementation has surprising behavior for binary and multiclass classification tasks.


0.76

Taking an instance where the person has no disease

In [None]:
inst=1
target_label=['no heart disease','heart disease']
print('Person has',target_label[explainer.predictor(x_test[inst].reshape(1, -1))[0]])
anchor=explainer.explain(x_test[inst])
print('Anchor generated feature(/s)',anchor.anchor)

Person has no heart disease
Anchor generated feature(/s) ['thalach > 138.00', 'thal <= 0.00', 'age <= 48.00']


Here, we can see that as the person's maximum heart rate is 152 (which is greater than 138) and thal value is 0 'normal' and age is less than 48, the above features act as anchors for the patient and deduce that the person has no heart disease 

Taking another instance where the person has a heart disease

In [None]:
print('Person has',target_label[explainer.predictor(x_test[9].reshape(1, -1))[0]])
anchor=explainer.explain(x_test[9])
print('Anchor generated feature(/s)',anchor.anchor)

Person has heart disease
Anchor generated feature(/s) ['thalach <= 138.00', 'ca > 1.00']


Here, we can see that as the person's maximum heart rate is 131 (which is less than 138) and blood vessels coloured by fluoroscopy are 3 (greater than 1), the above features act as anchors for the patient and deduce that the person has a heart disease 