In [1]:
%load_ext autoreload

import sys
sys.path.insert(0, "../src")

import os
os.chdir("../src")

In [2]:
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from data_generator import DataGenerator
from counterfactual import Counterfactual
from utils import lift_analysis
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
np.random.seed(0)
tf.random.set_seed(0)
disable_eager_execution()

In [None]:
data_handler = DataGenerator("../data/imputed_bank_data_mice.csv",
                             imputed=True, preprocessed=False)
x_train, x_test, y_train, y_test = data_handler.load_data()

In [20]:
# load the retrained model
with open('../results/retrain_best_lgbm_with_binary_cat/LGBM/model.pkl', 'rb') as file:
    lgbm_model = joblib.load(file)

# define a CFP explainer object
cfp_explainer = Counterfactual(lgbm_model, x_train, data_handler)

No encoder specified. Using k-d trees to represent class prototypes.


### Good economy - poor

In [17]:
# select an instance you want to study
x = ["technician", "single", "professional course",  "no", "no", "cellular", "may", "tue",
     "nonexistent", "200sec - 400sec", 35, 2.0, 0.0, -3.0, 93.0, -30.0, 1.0, 5018]
instance = data_handler.transform(x)

# calculate the counterfactuals if any.
counterfactual = cfp_explainer.define_counterfactuals(instance)

# convert to original form and display
instance_orig = data_handler.rev_transform(instance)
print("--------------------")
print("\nOriginal Instance:")
display(instance_orig)

if counterfactual is not None:
  counterfactual_orig = data_handler.rev_transform(counterfactual.iloc[0])
  print("\nCounterfactual Instance:")
  display(counterfactual_orig)

Original instance: no  -- proba: [0.89370299 0.10629701]
Counterfactual instance: yes  -- proba: [0.34070579 0.65929421]

Counterfactual perturbations...

Categorical:
binned_duration: 200sec - 400sec  -->   400sec - 800sec

Numerical:
--------------------

Original Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,professional course,technician,single,may,tue,200sec - 400sec,nonexistent,no,no,cellular,35.0,2.0,0.0,-3.0,93.0,-30.0,1.0,5018.0



Counterfactual Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,professional course,technician,single,may,tue,400sec - 800sec,nonexistent,no,no,cellular,35.0,2.0,5.54832e-09,-3.0,93.0,-30.0,1.0,5018.0


### Poor - Bad Economy

In [28]:
# select an instance you want to study
x = ["technician", "single", "professional course",  "no", "no", "cellular", "may", "tue",
     "nonexistent", "200sec - 400sec", 35, 2.0, 0.0, -1.8, 94.7, -48.0, 5.0, 4980]
instance = data_handler.transform(x)

# calculate the counterfactuals if any.
counterfactual = cfp_explainer.define_counterfactuals(instance)

# convert to original form and display
instance_orig = data_handler.rev_transform(instance)
print("--------------------")
print("\nOriginal Instance:")
display(instance_orig)

if counterfactual is not None:
  counterfactual_orig = data_handler.rev_transform(counterfactual.iloc[0])
  print("\nCounterfactual Instance:")
  display(counterfactual_orig)

Original instance: no  -- proba: [0.97761171 0.02238829]
Counterfactual instance: yes  -- proba: [0.35765255 0.64234745]

Counterfactual perturbations...

Categorical:
binned_duration: 200sec - 400sec  -->   400sec - 800sec

Numerical:
emp.var.rate: -1.13  -->   -1.11
cons.price.idx: 2.09  -->   2.10
euribor3m: 0.76  -->   -0.20
--------------------

Original Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,professional course,technician,single,may,tue,200sec - 400sec,nonexistent,no,no,cellular,35.0,2.0,0.0,-1.8,94.7,-48.0,5.0,4980.0



Counterfactual Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,professional course,technician,single,may,tue,400sec - 800sec,nonexistent,no,no,cellular,35.0,2.0,5.54832e-09,-1.765453,94.703278,-48.0,3.245479,4980.0


### Bad economy - rich

In [29]:
# select an instance you want to study
x = ["admin ", "married", "university degree",  "yes", "no", "cellular", "may", "tue",
     "nonexistent", "200sec - 400sec", 35, 2.0, 0.0, -1.8, 94.7, -48.0, 5.0, 4980]
instance = data_handler.transform(x)

# calculate the counterfactuals if any.
counterfactual = cfp_explainer.define_counterfactuals(instance)

# convert to original form and display
instance_orig = data_handler.rev_transform(instance)
print("--------------------")
print("\nOriginal Instance:")
display(instance_orig)

if counterfactual is not None:
  counterfactual_orig = data_handler.rev_transform(counterfactual.iloc[0])
  print("\nCounterfactual Instance:")
  display(counterfactual_orig)

Original instance: no  -- proba: [0.9719124 0.0280876]
Counterfactual instance: yes  -- proba: [0.47100976 0.52899024]

Counterfactual perturbations...

Categorical:
binned_duration: 200sec - 400sec  -->   400sec - 800sec

Numerical:
emp.var.rate: -1.13  -->   -1.12
cons.price.idx: 2.09  -->   2.10
euribor3m: 0.76  -->   0.09
--------------------

Original Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,university degree,admin,married,may,tue,200sec - 400sec,nonexistent,yes,no,cellular,35.0,2.0,0.0,-1.8,94.7,-48.0,5.0,4980.0



Counterfactual Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,university degree,admin,married,may,tue,400sec - 800sec,nonexistent,yes,no,cellular,35.0,2.0,5.54832e-09,-1.775927,94.702309,-48.0,3.781626,4980.0


### Good Economy - rich

In [22]:
# select an instance you want to study
x = ["admin ", "married", "university degree",  "yes", "no", "cellular", "may", "tue",
     "nonexistent", "200sec - 400sec", 35, 2.0, 0.0, -3.0, 93.0, -30.0, 1.0, 5018]
instance = data_handler.transform(x)

# calculate the counterfactuals if any.
counterfactual = cfp_explainer.define_counterfactuals(instance)

# convert to original form and display
instance_orig = data_handler.rev_transform(instance)
print("--------------------")
print("\nOriginal Instance:")
display(instance_orig)

if counterfactual is not None:
  counterfactual_orig = data_handler.rev_transform(counterfactual.iloc[0])
  print("\nCounterfactual Instance:")
  display(counterfactual_orig)

Original instance: no  -- proba: [0.90704487 0.09295513]
Counterfactual instance: yes  -- proba: [0.3656962 0.6343038]

Counterfactual perturbations...

Categorical:
binned_duration: 200sec - 400sec  -->   400sec - 800sec

Numerical:
--------------------

Original Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,university degree,admin,married,may,tue,200sec - 400sec,nonexistent,yes,no,cellular,35.0,2.0,0.0,-3.0,93.0,-30.0,1.0,5018.0



Counterfactual Instance:


Unnamed: 0,education,job,marital,month,day_of_week,binned_duration,poutcome,housing,loan,contact,age,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,university degree,admin,married,may,tue,400sec - 800sec,nonexistent,yes,no,cellular,35.0,2.0,5.54832e-09,-3.0,93.0,-30.0,1.0,5018.0
