In [1]:
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')

### Import Library

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt

import tensorflow as tf
tf.get_logger().setLevel(40) # suppress deprecation messages
tf.compat.v1.disable_v2_behavior() # disable TF2 behaviour as alibi code still relies on TF1 constructs
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical

import os
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from alibi.explainers import CounterfactualProto

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

TF version:  2.14.0
Eager execution enabled:  False


### Import data

In [3]:
heart = pd.read_csv("cardio_min.csv")

heart = heart.drop(columns=['id'])
heart = heart[['age', 'gender', 'cholesterol', 'gluc', 'smoke', 'bmi', 'systolic_bp', 'diastolic_bp', 'cardio']]

heart.head()

Unnamed: 0,age,gender,cholesterol,gluc,smoke,bmi,systolic_bp,diastolic_bp,cardio
0,50.39,2,1,1,0,21.97,110,80,0
1,51.66,1,3,1,0,23.51,130,70,1
2,48.41,1,1,1,0,28.44,110,70,0
3,51.55,2,1,1,0,20.05,120,80,0
4,58.35,1,1,1,0,25.95,130,70,0


In [4]:
X = heart.drop("cardio", axis = 1)
y = heart["cardio"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state=42
)

In [5]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=0)
model.fit(X_train, y_train)

In [6]:
y_pred = model.predict(X_test.to_numpy())

In [7]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.70      0.73      0.72      1927
           1       0.71      0.68      0.70      1874

    accuracy                           0.71      3801
   macro avg       0.71      0.71      0.71      3801
weighted avg       0.71      0.71      0.71      3801



In [8]:
predict_fn = lambda x: model.predict_proba(x)

In [9]:
class_names=['low risk', 'high risk']

#### Original instance

In [10]:
idx = 5

In [11]:
print(X_test.iloc[idx])

age              56.21
gender            2.00
cholesterol       1.00
gluc              1.00
smoke             0.00
bmi              23.88
systolic_bp     120.00
diastolic_bp     80.00
Name: 12439, dtype: float64


In [12]:
original = X_test.iloc[idx].values.reshape((1,) + X_test.iloc[idx].shape)
shape = original.shape
print(shape)

(1, 8)


#### Counterfactual instance

In [13]:
# initialize and fit the explainer
cf = CounterfactualProto(predict_fn, shape, use_kdtree=True, theta=10., max_iterations=1000,
                         feature_range=(X_train.min(axis=0), X_train.max(axis=0)),
                         c_init=1., c_steps=10)

cf.fit(X_test.values)

No encoder specified. Using k-d trees to represent class prototypes.


CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.1,
              'gamma': 0.0,
              'theta': 10.0,
              'cat_vars': None,
              'ohe': False,
              'use_kdtree': True,
              'learning_rate_init': 0.01,
              'max_iterations': 1000,
              'c_init': 1.0,
              'c_steps': 10,
              'eps': (0.001, 0.001),
              'clip': (-1000.0, 1000.0),
              'update_num_grad': 1,
              'write_dir': None,
              'feature_range': (age             29.58
gender           1.00
cholesterol      1.00
gluc             1.00
smoke            0.00
bmi              3.47
systolic_bp     10.00
diastolic_bp     0.00
dtype: float64, age              64.90
gender            2.00
cholesterol       3.00
gluc              3.00
smoke             1.00
bmi             178.

In [14]:
explanation = cf.explain(original)

2024-02-23 21:51:00.744537: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled


In [15]:
print(f'Original prediction: {explanation.orig_class}')
print(f'Counterfactual prediction: {explanation.cf["class"]}')

Original prediction: 0
Counterfactual prediction: 1


In [16]:
feature_names = X.columns
print(feature_names)

Index(['age', 'gender', 'cholesterol', 'gluc', 'smoke', 'bmi', 'systolic_bp',
       'diastolic_bp'],
      dtype='object')


In [17]:
orig = original
counterfactual = explanation.cf['X']
delta = counterfactual - orig

for i, f in enumerate(feature_names):
    if np.abs(delta[0][i]) > 1e-4:
        print(f'{f}: {delta[0][i]}')


bmi: 0.6966143798828135


In [18]:
pd.DataFrame(orig, columns=feature_names)

Unnamed: 0,age,gender,cholesterol,gluc,smoke,bmi,systolic_bp,diastolic_bp
0,56.21,2.0,1.0,1.0,0.0,23.88,120.0,80.0


In [19]:
pd.DataFrame(counterfactual, columns=feature_names)

Unnamed: 0,age,gender,cholesterol,gluc,smoke,bmi,systolic_bp,diastolic_bp
0,56.209999,2.0,1.0,1.0,0.0,24.576614,120.0,80.0


# Counterfactual Explanation Prototype

### Type of explanation

Type of explanation: **Counterfactual** 

Scope: **Local** 

Techniques: **Counterfactual by Alibi** 

### Stakeholders

| Stakeholders          | Project Role                   |
|-----------------------|--------------------------------|
| Developer Team        | AI developer                   |
| Subject Matter Expert | Senior consultant cardiologist |
| Decision Maker        | Cardiologist                   |
| Affected users        | Patient                        |


## User Stories collected from stakeholders

Question-type: How to

| Persona                         | Need                                                              | Timeliness                | Goal                                                                                         |
| ------------------------------- | ----------------------------------------------------------------- | ------------------------- | -------------------------------------------------------------------------------------------- |
| As an AI developer              | I want to understand **how to** lower the patient's risk          | after testing the system, | so that I can analyze changes to the patient health metrics                                  |
| As a senior consultant cardiologist                  | I want to understand **how to** adjust a patient’s health metrics | after using the system,   | so that I can decrease patient's heart disease risk assessment                               |
| As a physician                  | I want to understand **how to** adjust a patient’s health metrics | after using the system,   | so that I can provide specific guidance on addressing his eating habits and lifestyle.       |
| As a general population patient | I want to understand **how to** modify my health metrics          | after using the system,   | so that I can take actionable steps to lower my risk level based on the system's prediction. |

# Explanation for AI develper

In [34]:
def explain_to_developer(idx):
    print(X_test.iloc[idx])
    original = X_test.iloc[idx].values.reshape((1,) + X_test.iloc[idx].shape)
    
    explanation = cf.explain(original)
    
    print(f'\nOriginal prediction: {explanation.orig_class}')
    print(f'\nCounterfactual prediction: {explanation.cf["class"]}')
    print()
    
    feature_names = X.columns
    
    orig = original
    counterfactual = explanation.cf['X']
    delta = counterfactual - orig

    for i, f in enumerate(feature_names):
        if np.abs(delta[0][i]) > 1e-4:
            print(f'reduce {f}: {delta[0][i]}')
    
    return (orig, counterfactual)

In [35]:
example_1 = explain_to_developer(5)

age              56.21
gender            2.00
cholesterol       1.00
gluc              1.00
smoke             0.00
bmi              23.88
systolic_bp     120.00
diastolic_bp     80.00
Name: 12439, dtype: float64

Original prediction: 0

Counterfactual prediction: 1

reduce bmi: 0.6966143798828135


# Explanation for Senior Consultant Cardiologist

Since age and sex are non-modifiable feature

In [32]:
def explain_to_cardiologist(idx):
    print(X_test.iloc[idx])
    original = X_test.iloc[idx].values.reshape((1,) + X_test.iloc[idx].shape)
    
    explanation = cf.explain(original)
    
    if explanation is None: return False
    
    print(f'\nOriginal prediction: {explanation.orig_class}')
    print(f'Counterfactual prediction: {explanation.cf["class"]}')
    print()
    
    feature_names = X.columns
    
    orig = original
    counterfactual = explanation.cf['X']
    delta = counterfactual - orig
    
    for i, f in enumerate(feature_names):
        if f == "age" or f == "sex":
            print(f'{f}: Non-modifiable')
        elif np.abs(delta[0][i]) > 1e-4:
            if (delta[0][i] < 0):
                print(f'Reduce {feature_names[f]} from {orig[0][i]} to {counterfactual[0][i]}')
            elif delta[0][i] > 0:
                print(f'Increase {feature_names[f]} from {orig[0][i]} to {counterfactual[0][i]}')
            else:
                print(f'No improvement needed for {f}')
    
    print(f"\nAfter the modification above, the patient will be predicted as {class_names[explanation.cf['class']]}")
    
    return (orig, counterfactual)

In [28]:
def further_explain_to_cardiologist(example):
    print("Let's predict the heart disease risk again based on modifiable variable...")
    print(example)
#     age = example[0][0][0] # Non-modifiable
#     sex = example[0][0][1] # Non-modifiable
#     trestbps = example[1][0][2] # Counterfactual variable
#     chol = example[1][0][3] # Counterfactual variable
#     fbs = example[1][0][4] # Counterfactual variable
    
#     age              56.21
# gender            2.00
# cholesterol       1.00
# gluc              1.00
# smoke             0.00
# bmi              23.88
# systolic_bp     120.00
# diastolic_bp     80.00

#     old_prediction = model.predict(example[0])
#     new_prediction = model.predict([[age, sex, trestbps, chol, fbs]])

#     if (new_prediction[0] == old_prediction[0]):
#         print("After modifying counterfactual variables, the heart disease prediction still remain the same ")
#         print("You may contact the development team to make changes")
#     else:
#         print('After modifying counterfactual variables, the heart disease prediction has improved')

### Example 1

In [33]:
example_2 = explain_to_cardiologist(5)

age              56.21
gender            2.00
cholesterol       1.00
gluc              1.00
smoke             0.00
bmi              23.88
systolic_bp     120.00
diastolic_bp     80.00
Name: 12439, dtype: float64

Original prediction: 0
Counterfactual prediction: 1

age: Non-modifiable


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
further_explain_to_cardiologist(example_2)

### Example 2

In [None]:
example_3 = explain_to_cardiologist(14)

Based on the above explanation, the patient is predicted as low risk currently.

However, if the cholestrol level is increased from **184mmHg to 194.15 mmHg** , the patient will be predicted as high risk.

In [None]:
pd.DataFrame(example_3[0], columns=feature_names)

In [None]:
pd.DataFrame(example_3[1], columns=feature_names)

In [None]:
further_explain_to_cardiologist(example_3)

# Explanation for Physician

In [None]:
def explain_to_physician(df_sample):
    
    original = df_sample.reshape((1,) + df_sample.shape)
    
    print('Age:', df_sample[0])
    print('Sex:', 'Male' if df_sample[1] == 1 else 'Female')
    print('Resting Blood Pressure:', df_sample[2])
    print('Cholestrol level:', df_sample[3])
    print('Fasting blood sugar level:', '> 120 mg/dl' if df_sample[4] == 1 else '< 120 mg/dl')
    
    explanation = cf.explain(original)
    
    print(f'\nOriginal prediction: {explanation.orig_class}')
    print(f'Counterfactual prediction: {explanation.cf["class"]}')
    print()
    
    feature_names = X.columns
    
    orig = original
    counterfactual = explanation.cf['X']
    delta = counterfactual - orig
    
    for i, f in enumerate(feature_names):
        if f == "age" or f == "sex":
            continue
        elif np.abs(delta[0][i]) > 1e-4:
            if (delta[0][i] < 0):
                print(f'Reduce {full_name[f]} from {orig[0][i]} to {counterfactual[0][i]}')
            else:
                print(f'Increase {full_name[f]} from {orig[0][i]} to {counterfactual[0][i]}')
    
    return (orig, counterfactual)

In [None]:
t = X_test.iloc[1].values
print(type(t))

In [None]:
input_patient = np.array([60, 1, 136, 195, 0])

# print(input_patient)
# original = input_patient.reshape((1,) + input_patient.shape)
example_4 = explain_to_physician(input_patient)

Based to table below, although the patient is predicted as low risk, the blood pressure is at above borderline at his age.

|             | Women        | Men          |
| ----------- | ------------ | ------------ |
| 18-39 years | 110/68 mm Hg | 119/70 mm Hg |
| 40-59 years | 122/74 mm Hg | 124/77 mm Hg |
| **60+ years**   | 139/68 mm Hg | **133/69 mm Hg** |


**Recommendation**

According the explanation model, your blood pressure is at borderline for high risk of heart disease. Once the blood pressure is increased to 137.25mmHg, the patient will be predicted as high risk.

Since the patient is 60 years old, the patient may reduce his blood pressure with **low PA intensity activitiy**, such as 
1. Spending 30 minutes a day walking (4.0km/h - 4.8km/h) or 
2. Hook on a step tracker, and aim for an extra 1,000 steps a day

> based on the handbook **[Primary & Secondary Prevention of Cardiovascular Disease 2017](https://www.moh.gov.my/moh/resources/Penerbitan/CPG/CARDIOVASCULAR/3.pdf)** by the **Ministry of Health**

In [None]:
input_patient = np.array([65, 0, 162, 296, 0])

# print(input_patient)
# original = input_patient.reshape((1,) + input_patient.shape)
example_4 = explain_to_physician(input_patient)

# Explanation for Patient

In [None]:
input_patient = np.array([45, 0, 145, 180, 0])

# print(input_patient)
# original = input_patient.reshape((1,) + input_patient.shape)
example_5 = explain_to_physician(input_patient)

**Recommendation**

However, according the explanation model, your blood pressure and cholestrol is at borderline for high risk of heart disease. 

Since the patient is 45 years old, the patient may reduce her blood pressure and cholestrol level by low PA intensity activitiy, such as 
1. Spending 60 minutes a day walking (4.0km/h - 4.8km/h) or 
2. Hook on a step tracker, and aim for an extra 2,000 steps a day
3. Half of the plate being fruits and vegetables
4. Consume non – fried & santan free dishes everyday

> based on the handbook **[Primary & Secondary Prevention of Cardiovascular Disease 2017](https://www.moh.gov.my/moh/resources/Penerbitan/CPG/CARDIOVASCULAR/3.pdf)** by the **Ministry of Health**

In [None]:
def explain_to_physician(df_sample):
    
    original = df_sample.reshape((1,) + df_sample.shape)
    
    print('Age:', df_sample[0])
    print('Sex:', 'Male' if df_sample[1] == 1 else 'Female')
    print('Resting Blood Pressure:', df_sample[2])
    print('Cholestrol level:', df_sample[3])
    print('Fasting blood sugar level:', '> 120 mg/dl' if df_sample[4] == 1 else '< 120 mg/dl')
    
    explanation = cf.explain(original)
    
    print(f'\nOriginal prediction: {explanation.orig_class}')
    print(f'Counterfactual prediction: {explanation.cf["class"]}')
    print()
    
    feature_names = ['chol']
    return (orig, counterfactual)
    orig = original
    counterfactual = explanation.cf['X']
    delta = counterfactual - orig
    
    for i, f in enumerate(feature_names):
        if f == "age" or f == "sex":
            continue
        elif np.abs(delta[0][i]) > 1e-4:
            if (delta[0][i] < 0):
                print(f'Reduce {full_name[f]} from {orig[0][i]} to {counterfactual[0][i]}')
            else:
                print(f'Increase {full_name[f]} from {orig[0][i]} to {counterfactual[0][i]}')
    
    return (orig, counterfactual)

In [None]:
input_patient = np.array([45, 0, 145, 180, 0])

# print(input_patient)
# original = input_patient.reshape((1,) + input_patient.shape)
example_6 = explain_to_physician(input_patient)

In [None]:
example_6[0]

In [None]:
example_6[1]