#### This notebook demonstrates the use of the Reject Option Classification (ROC) post-processing algorithm for bias mitigation.
- The debiasing function used is implemented in the `RejectOptionClassification` class.
- Divide the dataset into training, validation, and testing partitions.
- Train classifier on original training data.
- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints.
- Estimate the optimal classification threshold, and the critical region boundary (ROC margin) using a validation set for the desired constraint on fairness. The best parameters are those that maximize the classification threshold while satisfying the fairness constraints.
- The constraints can be used on the following fairness measures:
    * Statistical parity difference on the predictions of the classifier
    * Average odds difference for the classifier
    * Equal opportunity difference for the classifier
- Determine the prediction scores for testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.
- Using the determined optimal classification threshold and the ROC margin, adjust the predictions. Report accuracy and fairness metric on the new predictions.

In [1]:
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
import numpy as np
from tqdm import tqdm
from warnings import warn

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import BankDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.postprocessing.reject_option_classification\
        import RejectOptionClassification
from common_utils import compute_metrics

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt
from ipywidgets import interactive, FloatSlider

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'


#### Load dataset and specify options

for the bank dataset, age is the protected attribute.

  By default, this code converts the 'age' attribute to a binary value
        where privileged is `25 <= age < 60` and unprivileged is `age < 25` or `age >= 60`
        as suggested in Le Quy, Tai, et al. [1].

        References:
            .. [1] Le Quy, Tai, et al. "A survey on datasets for fairness‐aware machine 
            learning." Wiley Interdisciplinary Reviews: Data Mining and Knowledge 
            Discovery 12.3 (2022): e1452.

However, in Function Composition in Trustworthy Machine Learning:
Implementation Choices, Insights, and Questions 2023 feb, they  use ‘age’ (‘under 25’ is privileged) as a sensitive attribute.

In [4]:
dataset_orig = BankDataset()
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]
# Metric used (should be one of allowed_metrics)
metric_name = "Equal opportunity difference"
#random seed for calibrated equal odds prediction
random_seed = 12345679
np.random.seed(random_seed)
# Upper and lower bound on the fairness metric used
metric_ub = 0.05
metric_lb = -0.05



In [5]:
# 5 fold cross validation
Z =  dataset_orig.split(5, shuffle=True,seed = random_seed)
# i th fold
dataset_train1 = Z[0].copy()
dataset_train1.features = np.concatenate((Z[0].features,Z[1].features,Z[2].features,Z[3].features),axis=0)
dataset_train1.scores = np.concatenate((Z[0].scores,Z[1].scores,Z[2].scores,Z[3].scores),axis=0)
dataset_train1.labels = np.concatenate((Z[0].labels,Z[1].labels,Z[2].labels,Z[3].labels),axis=0)
dataset_train1.protected_attributes = np.concatenate((Z[0].protected_attributes,Z[1].protected_attributes,Z[2].protected_attributes,Z[3].protected_attributes),axis=0)
dataset_train1.instance_weights = np.concatenate((Z[0].instance_weights,Z[1].instance_weights,Z[2].instance_weights,Z[3].instance_weights),axis=0)
dataset_train1.instance_names = np.concatenate((Z[0].instance_names,Z[1].instance_names,Z[2].instance_names,Z[3].instance_names),axis=0)
dataset_train1.metadata = Z[0].metadata.copy()
dataset_test1= Z[4].copy()

dataset_train2 = Z[1].copy()
dataset_train2.features = np.concatenate((Z[1].features,Z[2].features,Z[3].features,Z[4].features),axis=0)
dataset_train2.scores = np.concatenate((Z[1].scores,Z[2].scores,Z[3].scores,Z[4].scores),axis=0)
dataset_train2.labels = np.concatenate((Z[1].labels,Z[2].labels,Z[3].labels,Z[4].labels),axis=0)
dataset_train2.protected_attributes = np.concatenate((Z[1].protected_attributes,Z[2].protected_attributes,Z[3].protected_attributes,Z[4].protected_attributes),axis=0)
dataset_train2.instance_weights = np.concatenate((Z[1].instance_weights,Z[2].instance_weights,Z[3].instance_weights,Z[4].instance_weights),axis=0)
dataset_train2.instance_names = np.concatenate((Z[1].instance_names,Z[2].instance_names,Z[3].instance_names,Z[4].instance_names),axis=0)
dataset_train2.metadata = Z[1].metadata.copy()
dataset_test2= Z[0].copy()

dataset_train3 = Z[2].copy()
dataset_train3.features = np.concatenate((Z[2].features,Z[3].features,Z[4].features,Z[0].features),axis=0)
dataset_train3.scores = np.concatenate((Z[2].scores,Z[3].scores,Z[4].scores,Z[0].scores),axis=0)
dataset_train3.labels = np.concatenate((Z[2].labels,Z[3].labels,Z[4].labels,Z[0].labels),axis=0)
dataset_train3.protected_attributes = np.concatenate((Z[2].protected_attributes,Z[3].protected_attributes,Z[4].protected_attributes,Z[0].protected_attributes),axis=0)
dataset_train3.instance_weights = np.concatenate((Z[2].instance_weights,Z[3].instance_weights,Z[4].instance_weights,Z[0].instance_weights),axis=0)
dataset_train3.instance_names = np.concatenate((Z[2].instance_names,Z[3].instance_names,Z[4].instance_names,Z[0].instance_names),axis=0)
dataset_train3.metadata = Z[2].metadata.copy()
dataset_test3= Z[1].copy()

dataset_train4 = Z[3].copy()
dataset_train4.features = np.concatenate((Z[3].features,Z[4].features,Z[0].features,Z[1].features),axis=0)
dataset_train4.scores = np.concatenate((Z[3].scores,Z[4].scores,Z[0].scores,Z[1].scores),axis=0)
dataset_train4.labels = np.concatenate((Z[3].labels,Z[4].labels,Z[0].labels,Z[1].labels),axis=0)
dataset_train4.protected_attributes = np.concatenate((Z[3].protected_attributes,Z[4].protected_attributes,Z[0].protected_attributes,Z[1].protected_attributes),axis=0)
dataset_train4.instance_weights = np.concatenate((Z[3].instance_weights,Z[4].instance_weights,Z[0].instance_weights,Z[1].instance_weights),axis=0)
dataset_train4.instance_names = np.concatenate((Z[3].instance_names,Z[4].instance_names,Z[0].instance_names,Z[1].instance_names),axis=0)
dataset_train4.metadata = Z[3].metadata.copy()
dataset_test4= Z[2].copy()

dataset_train5 = Z[4].copy()
dataset_train5.features = np.concatenate((Z[4].features,Z[0].features,Z[1].features,Z[2].features),axis=0)
dataset_train5.scores = np.concatenate((Z[4].scores,Z[0].scores,Z[1].scores,Z[2].scores),axis=0)
dataset_train5.labels = np.concatenate((Z[4].labels,Z[0].labels,Z[1].labels,Z[2].labels),axis=0)
dataset_train5.protected_attributes = np.concatenate((Z[4].protected_attributes,Z[0].protected_attributes,Z[1].protected_attributes,Z[2].protected_attributes),axis=0)
dataset_train5.instance_weights = np.concatenate((Z[4].instance_weights,Z[0].instance_weights,Z[1].instance_weights,Z[2].instance_weights),axis=0)
dataset_train5.instance_names = np.concatenate((Z[4].instance_names,Z[0].instance_names,Z[1].instance_names,Z[2].instance_names),axis=0)
dataset_train5.metadata = Z[4].metadata.copy()
dataset_test5= Z[3].copy()

#### Save the split dataset

In [6]:
protected_name = "age"
dataset_train1.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_train1.csv".format(protected_name),index=False)
dataset_test1.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_test1.csv".format(protected_name),index=False)
dataset_train2.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_train2.csv".format(protected_name),index=False)
dataset_test2.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_test2.csv".format(protected_name),index=False)
dataset_train3.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_train3.csv".format(protected_name),index=False)
dataset_test3.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_test3.csv".format(protected_name),index=False)
dataset_train4.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_train4.csv".format(protected_name),index=False)
dataset_test4.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_test4.csv".format(protected_name),index=False)
dataset_train5.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_train5.csv".format(protected_name),index=False)
dataset_test5.convert_to_dataframe()[0].to_csv("Huangrui/bank/{}/bank_test5.csv".format(protected_name),index=False)


#### Split into train, test and validation

In [71]:
dataset_orig_train_valid= dataset_train5.copy()
dataset_orig_test= dataset_test5.copy()

In [72]:
# Get the dataset and split into train and test
dataset_orig_train, dataset_orig_valid = dataset_orig_train_valid.split([0.8], shuffle=True)

#### Clean up training data and display properties of the data

In [73]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes, 
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

#### Training Dataset shape

(19512, 57)


#### Favorable and unfavorable labels

1.0 0.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['age', 'duration', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed', 'job=admin.', 'job=blue-collar', 'job=entrepreneur', 'job=housemaid', 'job=management', 'job=retired', 'job=self-employed', 'job=services', 'job=student', 'job=technician', 'job=unemployed', 'marital=divorced', 'marital=married', 'marital=single', 'education=basic.4y', 'education=basic.6y', 'education=basic.9y', 'education=high.school', 'education=illiterate', 'education=professional.course', 'education=university.degree', 'default=no', 'default=yes', 'housing=no', 'housing=yes', 'loan=no', 'loan=yes', 'contact=cellular', 'contact=telephone', 'month=apr', 'month=aug', 'month=dec', 'month=jul', 'month=jun', 'month=mar', 'month=may', 'month=nov', 'month=oct', 'month=sep', 'day_of_week=fri', 'day_of_week=mon', 'day_of_week=thu', 'day_of_week=tue', 'day_of_week=wed', 'poutcome=failure', 'poutcome=nonexistent', 'poutcome=success']


#### Metric for original training data

In [74]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.113588


### Train classifier on original data

In [75]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()

lmod = LogisticRegression()
lmod.fit(X_train, y_train)
y_train_pred = lmod.predict(X_train)

# positive class index
pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
dataset_orig_train_pred.labels = y_train_pred

#### Obtain scores for validation and test sets

In [76]:
dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
X_valid = scale_orig.transform(dataset_orig_valid_pred.features)
y_valid = dataset_orig_valid_pred.labels
dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)

dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels
dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

### Find the optimal parameters from the validation set

#### Best threshold for classification only (no fairness)

In [77]:
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    
    fav_inds = dataset_orig_valid_pred.scores > class_thresh
    dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
    dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label
    
    classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,
                                             dataset_orig_valid_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

print("Best balanced accuracy (no fairness constraints) = %.4f" % np.max(ba_arr))
print("Optimal classification threshold (no fairness constraints) = %.4f" % best_class_thresh)

Best balanced accuracy (no fairness constraints) = 0.8698
Optimal classification threshold (no fairness constraints) = 0.0892


#### Estimate optimal parameters for the ROC method

In [78]:
ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, 
                                 privileged_groups=privileged_groups, 
                                 low_class_thresh=0.01, high_class_thresh=0.99,
                                  num_class_thresh=100, num_ROC_margin=50,
                                  metric_name=metric_name,
                                  metric_ub=metric_ub, metric_lb=metric_lb)
ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)

In [79]:
print("Optimal classification threshold (with fairness constraints) = %.4f" % ROC.classification_threshold)
print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

Optimal classification threshold (with fairness constraints) = 0.0892
Optimal ROC margin = 0.0018


### Predictions from Validation Set

In [80]:
# Metrics for the test set
fav_inds = dataset_orig_valid_pred.scores > best_class_thresh
dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label

display(Markdown("#### Validation set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_valid_bef = compute_metrics(dataset_orig_valid, dataset_orig_valid_pred, 
                unprivileged_groups, privileged_groups)

#### Validation set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.8698
Statistical parity difference = 0.2095
Disparate impact = 1.7415
Average odds difference = 0.1062
Equal opportunity difference = 0.0226
Theil index = 0.0529


In [81]:
# Transform the validation set
dataset_transf_valid_pred = ROC.predict(dataset_orig_valid_pred)

display(Markdown("#### Validation set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_valid_aft = compute_metrics(dataset_orig_valid, dataset_transf_valid_pred, 
                unprivileged_groups, privileged_groups)

#### Validation set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.8703
Statistical parity difference = 0.2120
Disparate impact = 1.7572
Average odds difference = 0.1084
Equal opportunity difference = 0.0243
Theil index = 0.0527


In [82]:
# Testing: Check if the metric optimized has not become worse
assert np.abs(metric_valid_aft[metric_name]) <= np.abs(metric_valid_bef[metric_name])

AssertionError: 

### Predictions from Test Set

In [83]:
# Metrics for the test set
fav_inds = dataset_orig_test_pred.scores > best_class_thresh
dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label

display(Markdown("#### Test set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.8650
Statistical parity difference = 0.2312
Disparate impact = 1.8001
Average odds difference = 0.1211
Equal opportunity difference = 0.0677
Theil index = 0.0542


In [84]:
# Metrics for the transformed test set
dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)

display(Markdown("#### Test set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.8648
Statistical parity difference = 0.2336
Disparate impact = 1.8150
Average odds difference = 0.1236
Equal opportunity difference = 0.0705
Theil index = 0.0542


In [85]:
metric_test_aft["Balanced accuracy"]
metric_test_aft["Equal opportunity difference"]
print("The Error for the test dataset is {:.4}".format(1-metric_test_aft["Balanced accuracy"]))
print("The Equal opportunity difference for the test dataset is {:.4}".format(metric_test_aft["Equal opportunity difference"]))

The Error for the test dataset is 0.1352
The Equal opportunity difference for the test dataset is 0.07052
