In [None]:
pip install -r "..\requirements.txt"

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
# import DiCE
import dice_ml
import pandas as pd
import numpy as np
from dice_ml.utils import helpers  # helper functions

from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier

In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
dataset = pd.read_csv("loan_data.csv")
dataset.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [14]:
d = dice_ml.Data(dataframe=dataset,
                 continuous_features=[
                     "person_age", "person_income", "person_emp_exp", "loan_amnt",
                     "loan_int_rate", "loan_percent_income", "cb_person_cred_hist_length", "credit_score"
                 ],
                 outcome_name="loan_status")

In [28]:
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer

target = "loan_status"

#Split dataset into train and test
datasetX = dataset.drop(columns=[target])
x_train, x_test, y_train, y_test = train_test_split(datasetX, dataset[target], test_size=0.2, random_state=42, stratify=dataset[target])

numerical = x_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical = x_train.columns.difference(numerical)

# Create the preprocessing pipeline for both numerical and categorical features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

transformations = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical),
        ('cat', categorical_transformer, categorical)])

# Append classifier to the preprocessing pipeline
clf = Pipeline(steps=[
    ('preprocessor', transformations),
    ('classifier', MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=200, random_state=42))
])
model = clf.fit(x_train, y_train)



In [30]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Predict on test set
y_pred = clf.predict(x_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.913
Precision: 0.8283864004317323
Recall: 0.7675
F1 Score: 0.7967817285232287

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.95      0.94      7000
           1       0.83      0.77      0.80      2000

    accuracy                           0.91      9000
   macro avg       0.88      0.86      0.87      9000
weighted avg       0.91      0.91      0.91      9000



In [17]:
#proviode the trained model to DiCE's model object
backend = "sklearn"
m = dice_ml.Model(model=model, backend=backend)

In [18]:
# initialize DiCE explainer
exp_random = dice_ml.Dice(d, m, method="random")

In [19]:
query_instances = x_train[4:6]
query_instances

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file
23231,29.0,male,Bachelor,73033.0,7,MORTGAGE,8000.0,PERSONAL,10.51,0.11,8.0,644,Yes
27928,32.0,female,Bachelor,90582.0,10,RENT,10000.0,HOMEIMPROVEMENT,7.29,0.11,7.0,561,Yes


In [20]:
# generate counterfactuals
dice_exp_random = exp_random.generate_counterfactuals(query_instances, total_CFs=3, desired_class="opposite", verbose=False)

100%|██████████| 2/2 [00:00<00:00,  2.30it/s]


In [21]:
dice_exp_random.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : 0)


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,29.0,male,Bachelor,73033.0,7,MORTGAGE,8000.0,PERSONAL,10.51,0.11,8.0,644,Yes,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,-,-,-,-,48,-,-,-,-,-,-,-,No,1
1,-,-,-,-,44,-,-,-,-,-,-,-,No,1
2,-,-,-,-,-,-,-,-,-,0.54,-,-,No,1


Query instance (original outcome : 0)


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,32.0,female,Bachelor,90582.0,10,RENT,10000.0,HOMEIMPROVEMENT,7.29,0.11,7.0,561,Yes,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,-,-,-,-,24,-,-,-,-,0.6,-,-,-,1
1,-,-,-,-,-,-,-,-,15.92,-,-,-,No,1
2,-,-,Doctorate,-,-,-,-,-,-,0.67,-,-,-,1
