In [1]:
import numpy as np
from tqdm import tqdm
import time
import psutil
from tabulate import tabulate

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import GermanDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from aif360.metrics.utils  import compute_boolean_conditioning_vector
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_german
from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
from common_utils import compute_metrics

# models
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

from IPython.display import Markdown, display
import matplotlib.pyplot as plt 
%matplotlib inline


  Referenced from: <346F52D5-3FB3-3E9B-86A7-99AFC2266C4F> /Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/_pywrap_tfe.so
  Reason: tried: '/Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/../../_solib_darwin_x86_64/_U_S_Stensorflow_Spython_C_Upywrap_Utfe.so___Utensorflow/_pywrap_tensorflow_internal.so' (no such file), '/Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/../../_solib_darwin_x86_64/_U_S_Stensorflow_Spython_C_Upywrap_Utensorflow_Uinternal_Umacos___Utensorflow_Spython/_pywrap_tensorflow_internal.so' (no such file), '/Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so' (no such file), '/Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/../_pywrap_tensorflow_internal.so' (no such file), '/Users/Peeradon/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/../../../../_pywrap_tensorflow_internal.so' (no such file

#### Load data

In [2]:
model_used = "RF" 

In [3]:
dataset = load_preproc_data_german(['age'])
privileged_groups = [{'age': 1}]
unprivilege_groups = [{'age': 0}]

np.random.seed(42)

allowed_metrics = ["Statistical parity difference",
                   "Average odds difference",
                   "Equal opportunity difference"]

split data

In [4]:
og_train_set, og_vt_set = dataset.split([0.7], shuffle=True)
og_valid_set, og_test_set = og_vt_set.split([0.5], shuffle=True)

In [5]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(og_train_set.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(og_train_set.favorable_label, og_train_set.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(og_train_set.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(og_train_set.privileged_protected_attributes,
      og_train_set.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(og_train_set.feature_names)

#### Training Dataset shape

(700, 11)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['age', 'sex', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed']


#### Mean difference

In [6]:
og_diff_mean = BinaryLabelDatasetMetric(og_train_set, privileged_groups=privileged_groups, unprivileged_groups=unprivilege_groups)

og_diff_mean_value = og_diff_mean.mean_difference()

print("Original Dataset mean difference: %.4f" % og_diff_mean_value)

Original Dataset mean difference: -0.1452


#### Train on original data

In [7]:

y_train = og_train_set.labels.ravel()

if model_used == "SVC":
    clf_model = LinearSVC()
    scaler = StandardScaler()
    X_train = scaler.fit_transform(og_train_set.features)
if model_used == "RF":
    clf_model = RandomForestClassifier()
    X_train = og_train_set.features

# ==================================================
timer_str = time.time()
mem_str = psutil.Process().memory_info().rss / (1024 * 1024)

clf_model.fit(X_train, y_train, sample_weight=og_train_set.instance_weights)

timer_stp = time.time()
mem_stp = psutil.Process().memory_info().rss / (1024 * 1024)

og_train_time = timer_stp - timer_str
og_train_mem = mem_stp - mem_str

# ==================================================
# positive class index 
pos_ind = np.where(clf_model.classes_ == og_train_set.favorable_label)[0][0]


Scores for validation and test set

In [8]:
og_valid_set_pred = og_valid_set.copy(deepcopy=True)
og_test_pred = og_test_set.copy(deepcopy=True)

y_valid = og_valid_set_pred.labels
y_test = og_test_pred.labels

if model_used == "SVC":
    X_valid = scaler.fit_transform(og_valid_set_pred.features)
    # predict on vaidation set
    og_valid_set_pred.scores = clf_model._predict_proba_lr(X_valid)[:, pos_ind].reshape(-1,1)

    X_test = scaler.fit_transform(og_test_pred.features)
    # predict on test set
    og_test_pred.scores = clf_model._predict_proba_lr(X_test)[:, pos_ind].reshape(-1,1)

if model_used == "RF":
    X_valid = og_valid_set_pred.features
    og_valid_set_pred.scores = clf_model.predict_proba(X_valid)[:, pos_ind].reshape(-1,1)

    X_test = og_test_pred.features
    og_test_pred.scores = clf_model.predict_proba(X_test)[:, pos_ind].reshape(-1,1)

#### Find the optimal parameter from validation set

Best threshold (no fairness)

In [9]:
# Find optimal threshold based on balanced accuracy
num_thresh = 100    # 0.00 - 1.00
ba_acc = np.zeros(num_thresh)   # balance accuracy for each threshold
recall_acc = np.zeros(num_thresh)   # recall for each threshold
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)  # search space 

for idx, threshold in enumerate(class_thresh_arr):
    # set labels if probability > threshold 
    fav_inds = og_valid_set_pred.scores > threshold
    og_valid_set_pred.labels[fav_inds] = og_valid_set_pred.favorable_label
    og_valid_set_pred.labels[~fav_inds] = og_valid_set_pred.unfavorable_label

    # classified metric 
    og_valid_class_met = ClassificationMetric(og_valid_set, og_valid_set_pred, privileged_groups=privileged_groups, unprivileged_groups=unprivilege_groups)

    ba_acc[idx] = 0.5 * (og_valid_class_met.true_positive_rate() + og_valid_class_met.true_negative_rate())

    recall_acc[idx] = og_valid_class_met.recall()

best_idx_acc = np.where(ba_acc == np.max(ba_acc))[0][0]
best_idx_recall = np.where(recall_acc == np.max(recall_acc))[0][0]

best_class_thresh_acc = class_thresh_arr[best_idx_acc]
best_class_thresh_recall = class_thresh_arr[best_idx_acc]

print("Best balanced accuracy (no fairness constraint) = %.4f" % np.max(ba_acc))
print("Best recall (no fairness constraint) = %.4f" % np.max(recall_acc))

print("Optimal threshold based on accuracy = %.4f" % best_class_thresh_acc)
print("Optimal threshold based on recall = %.4f " % best_class_thresh_recall)


Best balanced accuracy (no fairness constraint) = 0.7148
Best recall (no fairness constraint) = 1.0000
Optimal threshold based on accuracy = 0.6930
Optimal threshold based on recall = 0.6930 


Estimate optimal parameter for ROC method

In [10]:
ROC = RejectOptionClassification(unprivileged_groups=unprivilege_groups,
                                 privileged_groups=privileged_groups,
                                 low_class_thresh=0.01, high_class_thresh=0.99,
                                 num_class_thresh=100, num_ROC_margin=50,
                                 metric_name="Statistical parity difference",metric_ub=0.05, metric_lb=-0.05)
ROC = ROC.fit(og_valid_set, og_valid_set_pred)

In [11]:
print("Optimal classification threshold (with fairness) = %.4f" % ROC.classification_threshold)
print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

Optimal classification threshold (with fairness) = 0.6930
Optimal ROC margin = 0.0626


#### Prediction from validation set

In [12]:
# Metrics for the test set
fav_inds = og_valid_set_pred.scores > best_class_thresh_acc
og_valid_set_pred.labels[fav_inds] = og_valid_set_pred.favorable_label
og_valid_set_pred.labels[~fav_inds] = og_valid_set_pred.unfavorable_label

display(Markdown("#### Validation set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_valid_bef = compute_metrics(og_valid_set, og_valid_set_pred, 
                unprivilege_groups, privileged_groups)

#### Validation set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.7148
Precision = 0.9091
Recall = 0.5660
F1 = 0.6977
Disparate impact = 0.5598
Average odds difference = -0.1207
Statistical parity difference = -0.2145
Equal opportunity difference = -0.2663


In [13]:
# Transform the validation set
dataset_transf_valid_pred = ROC.predict(og_valid_set_pred)

display(Markdown("#### Validation set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_valid_aft = compute_metrics(og_valid_set, dataset_transf_valid_pred, 
                unprivilege_groups, privileged_groups)

#### Validation set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.7082
Precision = 0.8971
Recall = 0.5755
F1 = 0.7011
Disparate impact = 0.9192
Average odds difference = 0.0589
Statistical parity difference = -0.0373
Equal opportunity difference = -0.0930


#### Prediction from Test set

In [14]:
# Metrics for the test set
fav_inds = og_test_pred.scores > best_class_thresh_acc
og_test_pred.labels[fav_inds] = og_test_pred.favorable_label
og_test_pred.labels[~fav_inds] = og_test_pred.unfavorable_label

display(Markdown("#### Test set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_test_bef = compute_metrics(og_test_set, og_test_pred, 
                unprivilege_groups, privileged_groups)

#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.6158
Precision = 0.8103
Recall = 0.4608
F1 = 0.5875
Disparate impact = 0.4298
Average odds difference = -0.2059
Statistical parity difference = -0.2457
Equal opportunity difference = -0.2029


In [15]:
# Metrics for the transformed test set
dataset_transf_test_pred = ROC.predict(og_test_pred)

display(Markdown("#### Test set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_test_aft = compute_metrics(og_test_set, dataset_transf_test_pred, 
                unprivilege_groups, privileged_groups)


#### Test set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.6011
Precision = 0.8000
Recall = 0.4314
F1 = 0.5605
Disparate impact = 0.6644
Average odds difference = -0.0947
Statistical parity difference = -0.1310
Equal opportunity difference = -0.0860


In [16]:
display(Markdown("## Summary"))

performance_table = {
    'ROC': ["Before", "After"],
    'Accuracy': [metric_test_bef["Balanced accuracy"], metric_test_aft["Balanced accuracy"]],
    'Precision' : [metric_test_bef["Precision"], metric_test_aft["Precision"]],
    'Recall' : [metric_test_bef["Recall"], metric_test_aft["Recall"]],
    'F1' : [metric_test_bef["F1"], metric_test_aft["F1"]],
    'Time' : [og_train_time, "-"],
    'Memory' : [og_train_mem, "-"]
}

print(f'\nModel used: {model_used}')
display(Markdown("#### Performance metrics"))
print(tabulate(performance_table, headers='keys', tablefmt='fancy_grid', floatfmt=".4f"))

fairness_table = {
    'ROC' : ["Before", "After"],
    'Average Odds Difference' : [metric_test_bef["Average odds difference"], metric_test_aft["Average odds difference"]],
    'Statistical parity difference' : [metric_test_bef["Statistical parity difference"], metric_test_aft["Statistical parity difference"]],
    'Equal opportuniy difference' : [metric_test_bef["Equal opportunity difference"], metric_test_aft["Equal opportunity difference"]]
}

display(Markdown("#### Fairness metrics"))
print(tabulate(fairness_table, headers='keys', tablefmt='fancy_grid', floatfmt=".4f"))
display(Markdown("##### Noted for the classification to be fair"))
print("Disparate impact(DI): higher is better.\nAverage odds difference(AOD): must be close to zero.\nStatistical Parity Difference(SPD): must be equal to zero.\nEqual Opportunity Difference(EOD): must be equal to zero.")
print("Source: https://www.mathworks.com/help/risk/explore-fairness-metrics-for-credit-scoring-model.html")


## Summary


Model used: RF


#### Performance metrics

╒════════╤════════════╤═════════════╤══════════╤════════╤═════════════════════╤══════════╕
│ ROC    │   Accuracy │   Precision │   Recall │     F1 │ Time                │ Memory   │
╞════════╪════════════╪═════════════╪══════════╪════════╪═════════════════════╪══════════╡
│ Before │     0.6158 │      0.8103 │   0.4608 │ 0.5875 │ 0.08228874206542969 │ 1.09375  │
├────────┼────────────┼─────────────┼──────────┼────────┼─────────────────────┼──────────┤
│ After  │     0.6011 │      0.8000 │   0.4314 │ 0.5605 │ -                   │ -        │
╘════════╧════════════╧═════════════╧══════════╧════════╧═════════════════════╧══════════╛


#### Fairness metrics

╒════════╤═══════════════════════════╤═════════════════════════════════╤═══════════════════════════════╕
│ ROC    │   Average Odds Difference │   Statistical parity difference │   Equal opportuniy difference │
╞════════╪═══════════════════════════╪═════════════════════════════════╪═══════════════════════════════╡
│ Before │                   -0.2059 │                         -0.2457 │                       -0.2029 │
├────────┼───────────────────────────┼─────────────────────────────────┼───────────────────────────────┤
│ After  │                   -0.0947 │                         -0.1310 │                       -0.0860 │
╘════════╧═══════════════════════════╧═════════════════════════════════╧═══════════════════════════════╛


##### Noted for the classification to be fair

Disparate impact(DI): higher is better.
Average odds difference(AOD): must be close to zero.
Statistical Parity Difference(SPD): must be equal to zero.
Equal Opportunity Difference(EOD): must be equal to zero.
Source: https://www.mathworks.com/help/risk/explore-fairness-metrics-for-credit-scoring-model.html
