In [94]:
# Sklearn & Keras imports
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
import pandas as pd
import numpy as np

# DiCE imports
import dice_ml
from dice_ml.utils import helpers  # helper functions

In [95]:
dataset = pd.read_csv('german_credit_data (1).csv')
dataset[['Checking account','Saving accounts']] = dataset[['Checking account','Saving accounts']].fillna('')
LE = LabelEncoder()
display(dataset.head())
dataset['Risk'] = LE.fit_transform(dataset['Risk'])
dataset = dataset.drop(['Unnamed: 0'],axis=1)
dataset.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,good
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,bad
2,2,49,male,1,own,little,,2096,12,education,good
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,good
4,4,53,male,2,free,little,little,4870,24,car,bad


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,67,male,2,own,,little,1169,6,radio/TV,1
1,22,female,2,own,little,moderate,5951,48,radio/TV,0
2,49,male,1,own,little,,2096,12,education,1
3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,53,male,2,free,little,little,4870,24,car,0


## Data Modelling

In [96]:
dataset['Risk'].value_counts()

1    700
0    300
Name: Risk, dtype: int64

There is a slight data imbalance in the dataset. 70% are good and 30% are bad. Therefore, given data imbalances lead to overfitting, it is appropriate to take measures to balance the dataset (under/oversampling) and to evaluate by using a variety of metrics (as opposed to only accuracy)

## Model Building

In [102]:
from collections import Counter


target = dataset["Risk"]
X = dataset.drop(['Risk'],axis=1)

ros = RandomUnderSampler()
# resampling X, y
dataset, target = ros.fit_resample(X, target)
# new class distribution 
print(Counter(target))
print(type(dataset))

dataset = pd.concat([dataset,target],axis=1)

train_dataset, test_dataset, y_train, y_test = train_test_split(dataset,
                                                                target,
                                                                test_size=0.2,
                                                                random_state=0,
                                                                stratify=target)
x_train = train_dataset.drop('Risk', axis=1)
x_test = test_dataset.drop('Risk', axis=1)

Counter({0: 300, 1: 300})
<class 'pandas.core.frame.DataFrame'>


In [98]:
d = dice_ml.Data(dataframe=train_dataset, continuous_features=['Age','Credit amount','Duration'], outcome_name='Risk')

In [99]:
numerical = ['Credit amount','Duration']
categorical = x_train.columns.difference(numerical)

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical)])

# Append classifier to preprocessing pipeline.

clf_rf = Pipeline(steps=[('preprocessor', transformations),
                      ('clf', RandomForestClassifier(random_state=42))])

clf_svm = Pipeline(steps=[('preprocessor', transformations),
                      ('clf', SVC(random_state=42))])

grid_params_rf = [{'clf__criterion': ['gini', 'entropy'],
                   'clf__max_features': ['auto', 'sqrt'],
                   'clf__n_estimators': [int(x) for x in np.linspace(start = 128, stop = 384, num = 32)],
                   'clf__min_samples_split': [2, 5, 10],
                   'clf__max_depth': [int(x) for x in np.linspace(start = 5, stop = 8, num = 1)]}]

grid_params_svm = [{'clf__kernel': ['poly', 'rbf','sigmoid'], 
        'clf__C': [0.1, 1, 10, 100, 1000],
        'clf__gamma': [1, 0.1, 0.01, 0.001, 0.0001]}]

jobs = -1

RF = GridSearchCV(estimator=clf_rf,
            param_grid=grid_params_rf,
            scoring='roc_auc',
            cv=10, 
            n_jobs=jobs)


SVM = GridSearchCV(estimator=clf_svm,
            param_grid=grid_params_svm,
            scoring='roc_auc',
            cv=10,
            n_jobs=jobs)

grids = [RF,SVM]

In [100]:
# Creating a dict for our reference
grid_dict = {0: 'Random Forest', 
        1: 'Support Vector Machine'}

best_rf = {'score': 0, 'best_model':0}
best_svm = {'score': 0, 'best_model':0}

# Fit the grid search objects
print('Performing model optimizations...')

for idx, model in enumerate(grids):
    
    print('\nEstimator: %s' % grid_dict[idx])
    model.fit(x_train, y_train)
    print('Best params are : %s' % model.best_params_)
    
    # Best training data accuracy
    print('Best training accuracy: %.3f' % model.best_score_)
    
    # Predict on test data with best params
    y_pred = model.predict(x_test)
    print('Test set accuracy score for best params: %.3f ' % accuracy_score(y_test, y_pred))
    print('Precision score: {}'.format(precision_score(y_test, y_pred)))
    print('Recall score: {}'.format(recall_score(y_test, y_pred)))
    print('F1 score: {}'.format(f1_score(y_test, y_pred)))
    print('AUC-ROC score: {}'.format(roc_auc_score(y_test, y_pred)))
    
    # Track best (highest test accuracy) model
    if idx == 0:
        if roc_auc_score(y_test, y_pred) > best_rf['score']:
            best_rf['score'] = roc_auc_score(y_test, y_pred)
            best_rf['best_model'] = model.best_params_ 
    
    elif idx == 1:
        if roc_auc_score(y_test, y_pred) > best_svm['score']:
            best_svm['score'] = roc_auc_score(y_test, y_pred)
            best_svm['best_model'] = model.best_params_
            
# save dict to file
import json

with open('best_creditscore_rf_params.txt', 'w') as file:
     file.write(json.dumps(best_rf)) # use `json.loads` to do the reverse
        
with open('best_creditscore_svm_params.txt', 'w') as file:
     file.write(json.dumps(best_svm)) # use `json.loads` to do the reverse


Performing model optimizations...

Estimator: Random Forest
Best params are : {'clf__criterion': 'gini', 'clf__max_depth': 5, 'clf__max_features': 'auto', 'clf__min_samples_split': 5, 'clf__n_estimators': 136}
Best training accuracy: 0.728
Test set accuracy score for best params: 0.725 
Precision score: 0.7454545454545455
Recall score: 0.6833333333333333
F1 score: 0.7130434782608696
AUC-ROC score: 0.725

Estimator: Support Vector Machine
Best params are : {'clf__C': 0.1, 'clf__gamma': 0.1, 'clf__kernel': 'poly'}
Best training accuracy: 0.734
Test set accuracy score for best params: 0.742 
Precision score: 0.8222222222222222
Recall score: 0.6166666666666667
F1 score: 0.7047619047619048
AUC-ROC score: 0.7416666666666667


In [103]:
rf_model = RandomForestClassifier()
svm_model = SVC()
rf_params = best_rf['best_model']
svm_params = best_svm['best_model']
rf_dict = {}
svm_dict = {}
print(rf_params)
print(svm_params)
for key in rf_params:
    rf_dict[key[5:]] = rf_params[key]
for key in svm_params:
    svm_dict[key[5:]] = svm_params[key]
print(svm_dict)
print(rf_dict)
rf_model.set_params(**rf_dict)
svm_model.set_params(**svm_dict)

{'clf__criterion': 'gini', 'clf__max_depth': 5, 'clf__max_features': 'auto', 'clf__min_samples_split': 5, 'clf__n_estimators': 136}
{'clf__C': 0.1, 'clf__gamma': 0.1, 'clf__kernel': 'poly'}
{'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
{'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 136}


SVC(C=0.1, gamma=0.1, kernel='poly')

In [104]:
clf_rf = Pipeline(steps=[('preprocessor', transformations),
                      ('clf', rf_model)])

clf_svm = Pipeline(steps=[('preprocessor', transformations),
                      ('clf', svm_model)])

clf_rf.fit(x_train, y_train)

clf_svm.fit(x_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('cat',
                                                  Pipeline(steps=[('onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  Index(['Age', 'Checking account', 'Housing', 'Job', 'Purpose',
       'Saving accounts', 'Sex'],
      dtype='object'))])),
                ('clf',
                 RandomForestClassifier(max_depth=5, min_samples_split=5,
                                        n_estimators=136))])

## Obtain inaccurate classifications (in dataframe)

In [116]:
y_pred_rf = clf_rf.predict(x_test)

In [235]:
list_index = list(y_test.index.values)
list_labels = [i for i in y_test]

actual_class = []
pred_class = []
for i in range(120):
    a = (list_index[i],list_labels[i])
    b = (list_index[i],y_pred_rf[i])
    actual_class.append(a)
    pred_class.append(b)

In [238]:
wrong_class = []
for i in range(120):
    if pred_class[i][1] != actual_class[i][1]:
        a = (pred_class[i][0], pred_class[i][1])
        wrong_class.append(a)
        
print(len(wrong_class))

33
[(381, 0), (326, 0), (313, 0), (363, 0), (503, 0), (305, 0), (334, 0), (574, 0), (392, 0), (361, 0), (28, 1), (65, 1), (17, 1), (115, 1), (580, 0), (195, 1), (477, 0), (99, 1), (186, 1), (566, 0), (436, 0), (120, 1), (595, 0), (183, 1), (420, 0), (387, 0), (324, 0), (185, 1), (441, 0), (406, 0), (372, 0), (556, 0), (394, 0)]


In [248]:
list_indices = []
for i,j in wrong_class:
    list_indices.append(i)

In [249]:
print(list_indices)

[381, 326, 313, 363, 503, 305, 334, 574, 392, 361, 28, 65, 17, 115, 580, 195, 477, 99, 186, 566, 436, 120, 595, 183, 420, 387, 324, 185, 441, 406, 372, 556, 394]


In [279]:
#obtain all x_test that are wrong misclassified
df = x_test
for idx in x_test.index:
    if idx not in list_indices:
        df = df.drop(idx)

In [280]:
df.head()

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose
381,37,male,1,own,little,rich,3949,10,car
326,46,male,2,own,little,little,2348,36,furniture/equipment
313,26,female,2,rent,,little,2812,24,car
363,36,male,3,free,little,little,1872,6,furniture/equipment
503,23,female,0,rent,quite rich,moderate,3758,24,radio/TV


In [281]:
len(df)

33

## MODEL #1

In [107]:
# Using sklearn backend
m = dice_ml.Model(model=clf_rf, backend="sklearn")
# Using method=random for generating CFs
exp = dice_ml.Dice(d, m, method="random") #change to either 'genetic' or 'kdtree'

In [288]:
e1 = exp.generate_counterfactuals(df[0:], total_CFs=3, features_to_vary=['Job','Housing','Saving accounts','Credit amount','Duration','Purpose'])
e1.visualize_as_dataframe(show_only_changes=True)

 39%|███▉      | 13/33 [00:06<00:05,  3.47it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 48%|████▊     | 16/33 [00:07<00:04,  3.96it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 55%|█████▍    | 18/33 [00:07<00:03,  4.09it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 58%|█████▊    | 19/33 [00:07<00:03,  4.04it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 61%|██████    | 20/33 [00:08<00:03,  3.97it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 67%|██████▋   | 22/33 [00:08<00:02,  3.71it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 85%|████████▍ | 28/33 [00:10<00:01,  3.68it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


 91%|█████████ | 30/33 [00:10<00:00,  3.71it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 33/33 [00:11<00:00,  2.85it/s]

Query instance (original outcome : 0)





Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,37,male,1,own,little,rich,3949,10,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,-,-,5232.0,-,repairs,1
1,-,-,1,-,quite rich,-,-,-,-,1
2,-,-,-,-,-,-,-,29.0,vacation/others,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,46,male,2,own,little,little,2348,36,furniture/equipment,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,-,25.0,-,1
1,-,-,-,-,,-,15366.0,-,-,1
2,-,-,-,-,,-,-,16.0,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,26,female,2,rent,,little,2812,24,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,0,own,-,-,-,-,radio/TV,1
1,-,-,1,own,-,-,-,-,domestic appliances,1
2,-,-,3,own,-,-,-,-,radio/TV,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,36,male,3,free,little,little,1872,6,furniture/equipment,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,own,,-,-,-,-,1
1,-,-,-,-,,-,-,-,domestic appliances,1
2,-,-,-,rent,,-,-,-,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,23,female,0,rent,quite rich,moderate,3758,24,radio/TV,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,-,-,-,1
1,-,-,1,-,,-,-,-,-,1
2,-,-,-,-,,-,-,47.0,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,29,male,2,own,little,moderate,2629,20,vacation/others,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,10345.0,-,-,1
1,-,-,-,-,,-,7376.0,-,-,1
2,-,-,-,-,,-,-,-,repairs,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,36,male,2,own,little,little,1721,15,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,quite rich,-,-,-,domestic appliances,1
1,-,-,-,-,,-,-,-,repairs,1
2,-,-,-,-,,-,-,-,domestic appliances,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,22,male,2,own,moderate,moderate,1514,15,repairs,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,-,-,furniture/equipment,1
1,-,-,2,-,,-,-,-,-,1
2,-,-,-,-,,-,-,-,education,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,37,male,1,own,little,moderate,1154,9,radio/TV,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,6797.0,-,-,1
1,-,-,-,-,,-,12050.0,-,-,1
2,-,-,-,-,quite rich,-,-,34.0,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,28,female,3,own,moderate,rich,2923,21,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,little,-,-,-,domestic appliances,1
1,-,-,-,-,quite rich,-,-,-,radio/TV,1
2,-,-,-,-,,-,13472.0,-,-,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,30,female,3,own,,little,7174,42,radio/TV,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,rich,-,-,-,domestic appliances,0
1,-,-,1,-,-,-,15775.0,-,-,0
2,-,-,2,-,-,-,-,54.0,-,0


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,23,male,0,own,,moderate,14555,6,car,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,3,-,quite rich,-,-,-,-,0
1,-,-,-,-,quite rich,-,-,-,-,0
2,-,-,-,-,rich,-,-,65.0,-,0


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,37,male,2,free,little,,1819,36,education,1



No counterfactuals found!
Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,25,female,2,own,,moderate,2762,12,furniture/equipment,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,3,-,-,-,-,-,car,0
1,-,-,-,rent,-,-,-,-,-,0
2,-,-,-,free,-,-,-,-,car,0


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,19,female,1,rent,rich,moderate,983,12,furniture/equipment,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,8839.0,-,-,1
1,-,-,-,-,,-,-,49.0,-,1
2,-,-,-,-,,-,14789.0,-,-,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,27,male,2,rent,,,7980,36,business,1



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,22,female,1,rent,little,,1258,12,furniture/equipment,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,3,-,,-,-,-,-,1
1,-,-,2,-,-,-,-,-,car,1
2,-,-,-,-,,-,-,4.0,-,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,26,male,3,own,little,,4463,36,radio/TV,1



No counterfactuals found!
Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,38,male,2,own,,,3914,48,business,1



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,51,male,3,free,little,little,1164,8,vacation/others,0



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,39,male,2,own,little,little,2522,30,radio/TV,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,0,-,,-,11029.0,21.0,domestic appliances,1
1,-,-,2,-,,-,-,60.0,domestic appliances,1
2,-,-,1,-,,-,11940.0,-,domestic appliances,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,41,male,1,own,little,,719,12,education,1



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,32,male,2,rent,moderate,moderate,11760,39,education,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,own,,-,12495.0,-,-,1
1,-,-,-,own,,-,-,-,repairs,1
2,-,-,-,own,,-,3338.0,-,-,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,22,female,2,own,little,,1808,18,furniture/equipment,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,free,-,-,-,27.0,-,0
1,-,-,-,free,-,-,2073.0,-,-,0
2,-,-,1,rent,-,-,-,-,-,0


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,31,male,2,own,moderate,moderate,1449,6,business,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,2352.0,-,-,1
1,-,-,-,-,,-,-,-,-,1
2,-,-,-,-,,-,-,40.0,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,39,male,1,rent,little,little,2122,12,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,own,,-,-,-,domestic appliances,1
1,-,-,-,own,,-,1855.0,-,domestic appliances,1
2,-,-,-,own,,-,13566.0,-,domestic appliances,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,33,male,2,own,little,rich,6289,42,business,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,rich,-,-,-,radio/TV,1
1,-,-,-,-,,-,-,71.0,-,1
2,-,-,-,-,,-,-,15.0,-,1


Query instance (original outcome : 1)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,32,male,2,own,little,,1530,18,car,1



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,58,female,1,own,little,little,385,12,radio/TV,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,1938.0,-,domestic appliances,1
1,-,-,-,-,,-,-,56.0,domestic appliances,1
2,-,-,-,-,,-,-,4.0,domestic appliances,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,22,female,2,rent,little,little,3632,24,car,0



No counterfactuals found!
Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,37,male,2,own,moderate,moderate,3878,24,car,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,-,-,radio/TV,1
1,-,-,-,-,,-,11733.0,-,-,1
2,-,-,-,-,,-,5248.0,-,-,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,50,male,3,free,little,little,7476,48,education,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,own,,-,-,-,repairs,1
1,-,-,-,own,,-,-,-,vacation/others,1
2,-,-,-,own,,-,-,-,domestic appliances,1


Query instance (original outcome : 0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,23,male,2,own,little,moderate,882,13,radio/TV,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,-,-,-,-,,-,7750.0,-,-,1
1,-,-,-,-,,-,2188.0,-,-,1
2,-,-,-,-,,-,-,25.0,-,1


## MODEL #2

In [29]:
# Using sklearn backend
m2 = dice_ml.Model(model=clf_svm, backend="sklearn")
# Using method=random for generating CFs
exp2 = dice_ml.Dice(d, m, method="random")

In [287]:
e2 = exp2.generate_counterfactuals(df[4:7], total_CFs=3, desired_class="opposite")
e2.visualize_as_dataframe(show_only_changes=True)

  0%|          | 0/3 [00:00<?, ?it/s]


ValueError: ('Feature', 'Age', 'not present in training data!')