In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from CFGenerators import CFGenerators
from FeatureTweaking import feature_tweaking

def cost_func(a, b):
    return np.linalg.norm(a-b)

In [2]:
data = pd.read_csv('data/pima.csv')
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
data.dtypes

Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
Outcome                       int64
dtype: object

In [22]:

feature_names = list(data.columns)[:-1]
feature_cons = {'immutable':['DiabetesPedigreeFunction'],
                'increasing':['Pregnancies', 'Age'],
                'decreasing':None,
                'data types':['int', 'int', 'int', 'int', 'int', 'float', 'float', 'int']}
dist_type = 'L1'
X = np.array(data.iloc[:,:-1])
y = np.array(data.iloc[:,-1]) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
clf = RandomForestClassifier(n_estimators=100, max_depth=20)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

explainer = CFGenerators(clf, X_train, feature_names, feature_cons, dist_type)
explainer.fit()

epsilon = 0.005
class_labels = [0, 1]
tc_eece = 0
tc_ft = 0
for i in range(len(y_test)):
    
    prediction = predictions[i]
    if prediction == 1:
        print(i,'=====================================================================')
        target = 1 - prediction
        x = X_test[i]
        print('Query instance:\n', x)
        
        cf, tc = feature_tweaking(clf, x, class_labels , target, epsilon, cost_func)
        print('Counterfactual generated by FT: \n', cf)

        for method in ['mo', 'discern', 'lire', 'eece']:
            cf_info = explainer.generate_cf(x, target, method, dist_type)
            print(f'Counterfactual generated by {method}: \n', cf_info['cf'])
        print('=====================================================================')
        # if i==10: break

['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'] ['DiabetesPedigreeFunction'] ['Pregnancies', 'Age'] None
Building candidate regions


100%|██████████████████████████████████| 100/100 [00:01<00:00, 86.93it/s]


Query instance:
 [  0.    188.     82.     14.    185.     32.      0.682  22.   ]
Counterfactual generated by FT: 
 [  0.         188.          82.          14.         185.
  29.85499943   0.61749998  23.505     ]
Your feature constrains are too strict for this instance! Can't generate satisfied counterfactual example!
Counterfactual generated by mo: 
 None
Your feature constrains are too strict for this instance! Can't generate satisfied counterfactual example!
Counterfactual generated by discern: 
 None
Counterfactual generated by lire: 
 [  0.         107.          77.           5.           7.
  44.90099962   0.682       24.        ]
Counterfactual generated by eece: 
 [  0.         188.          82.          15.         185.
  28.44999981   0.682       23.        ]
Query instance:
 [  1.    128.     98.     41.     58.     32.      1.321  33.   ]
Counterfactual generated by FT: 
 [  1.         128.495       98.          41.          58.
  32.           1.11050003  33.        ]
Y