#### This notebook demonstrates the use of the Reject Option Classification (ROC) post-processing algorithm for bias mitigation.
- The debiasing function used is implemented in the `RejectOptionClassification` class.
- Divide the dataset into training, validation, and testing partitions.
- Train classifier on original training data.
- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints.
- Estimate the optimal classification threshold, and the critical region boundary (ROC margin) using a validation set for the desired constraint on fairness. The best parameters are those that maximize the classification threshold while satisfying the fairness constraints.
- The constraints can be used on the following fairness measures:
    * Statistical parity difference on the predictions of the classifier
    * Average odds difference for the classifier
    * Equal opportunity difference for the classifier
- Determine the prediction scores for testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.
- Using the determined optimal classification threshold and the ROC margin, adjust the predictions. Report accuracy and fairness metric on the new predictions.

In [124]:
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
import numpy as np
from tqdm import tqdm
from warnings import warn

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from aif360.algorithms.postprocessing.reject_option_classification\
        import RejectOptionClassification
from common_utils import compute_metrics

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt
from ipywidgets import interactive, FloatSlider
import pandas as pd
import pickle
from sklearn.linear_model import Lasso

## Huangrui's Dataset 

In [125]:
# def load_preproc_data_german(protected_attributes=None):
#     """
#     Load and pre-process german credit dataset.
#     Args:
#         protected_attributes(list or None): If None use all possible protected
#             attributes, else subset the protected attributes to the list.

#     Returns:
#         GermanDataset: An instance of GermanDataset with required pre-processing.

#     """
#     def custom_preprocessing(df):
#         """ Custom pre-processing for German Credit Data
#         """

#         def group_credit_hist(x):
#             if x in ['A30', 'A31', 'A32']:
#                 return 'None/Paid'
#             elif x == 'A33':
#                 return 'Delay'
#             elif x == 'A34':
#                 return 'Other'
#             else:
#                 return 'NA'

#         def group_employ(x):
#             if x == 'A71':
#                 return 'Unemployed'
#             elif x in ['A72', 'A73']:
#                 return '1-4 years'
#             elif x in ['A74', 'A75']:
#                 return '4+ years'
#             else:
#                 return 'NA'

#         def group_savings(x):
#             if x in ['A61', 'A62']:
#                 return '<500'
#             elif x in ['A63', 'A64']:
#                 return '500+'
#             elif x == 'A65':
#                 return 'Unknown/None'
#             else:
#                 return 'NA'

#         def group_status(x):
#             if x in ['A22', 'A22']:
#                 return '<200'
#             elif x in ['A23']:
#                 return '200+'
#             elif x == 'A14':
#                 return 'None'
#             else:
#                 return 'NA'

#         status_map = {'A91': 1.0, 'A93': 1.0, 'A94': 1.0,
#                     'A92': 0.0, 'A95': 0.0}
#         df['sex'] = df['personal_status'].replace(status_map)
#         #drop personal_status,not leak sex information
#         df = df.drop('personal_status', axis=1)

#         # group credit history, savings, and employment
#         df['credit_history'] = df['credit_history'].apply(lambda x: group_credit_hist(x))
#         df['savings'] = df['savings'].apply(lambda x: group_savings(x))
#         df['employment'] = df['employment'].apply(lambda x: group_employ(x))
#         df['age'] = (df['age'] >= 26).astype(float)
#         df['status'] = df['status'].apply(lambda x: group_status(x))
#         df["Y"] = df["credit"].apply(lambda x: 1 if x == 1 else 0)
#         df.drop(["credit"], axis=1, inplace=True)
#         return df

#     # Feature partitions
#     D_features = ['sex', 'age'] if protected_attributes is None else protected_attributes
#     Y_features = ['Y']

#     # privileged classes
#     all_privileged_classes = {"sex": [1.0],
#                               "age": [1.0]}

#     # protected attribute maps
#     all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'},
#                                     "age": {1.0: 'Old', 0.0: 'Young'}}

#     return GermanDataset(
#         label_name=Y_features[0],
#         favorable_classes=[1],
#         protected_attribute_names=D_features,
#         privileged_classes=[all_privileged_classes[x] for x in D_features],
#         instance_weights_name=None,
#         features_to_keep=[],
#         metadata={ 'label_maps': [{1.0: 'Good Credit', 0: 'Bad Credit'}],
#                    'protected_attribute_maps': [all_protected_attribute_maps[x]
#                                 for x in D_features]},
#         custom_preprocessing=custom_preprocessing)


In [126]:
class GermanDataset(StandardDataset):
    """German credit Dataset.

    See :file:`aif360/data/raw/german/README.md`.
    """

    def __init__(self, path, label_name='Y', favorable_classes=[1],
                 protected_attribute_names=['sex', 'age'],
                 privileged_classes=[[1],[1]],
                 instance_weights_name=None,
                 categorical_features=[],
                 features_to_keep=[], features_to_drop=[],
                 na_values=[], custom_preprocessing=None,
                 metadata=None):
        
        df = pd.read_csv(path)
        

        super(GermanDataset, self).__init__(df=df, label_name=label_name,
            favorable_classes=favorable_classes,
            protected_attribute_names=protected_attribute_names,
            privileged_classes=privileged_classes,
            instance_weights_name=instance_weights_name,
            categorical_features=categorical_features,
            features_to_keep=features_to_keep,
            features_to_drop=features_to_drop, na_values=na_values,
            custom_preprocessing=custom_preprocessing, metadata=metadata)

#### Load dataset and specify options

In [127]:
## import dataset
dataset_used = "german" # "german", "german", "compas"
protected_attribute_used = 2 # 1, 2


#     dataset_orig = GermanDataset()
if protected_attribute_used == 1:
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]
else:
    privileged_groups = [{'age': 1}]
    unprivileged_groups = [{'age': 0}]
        
# Metric used (should be one of allowed_metrics)
metric_name = "Equal opportunity difference"

# Upper and lower bound on the fairness metric used
metric_ub = 0.05
metric_lb = -0.05
        
        
#random seed for calibrated equal odds prediction
random_seed = 12345679
np.random.seed(random_seed)

# Verify metric name
allowed_metrics = ["Statistical parity difference",
                   "Average odds difference",
                   "Equal opportunity difference"]
if metric_name not in allowed_metrics:
    raise ValueError("Metric name should be one of allowed metrics")

#### Split into train, test and validation

In [128]:
# # 5 fold cross validation
# Z =  dataset_orig.split(5, shuffle=True,seed = random_seed)
# # i th fold
# dataset_train1 = Z[0].copy()
# dataset_train1.features = np.concatenate((Z[0].features,Z[1].features,Z[2].features,Z[3].features),axis=0)
# dataset_train1.scores = np.concatenate((Z[0].scores,Z[1].scores,Z[2].scores,Z[3].scores),axis=0)
# dataset_train1.labels = np.concatenate((Z[0].labels,Z[1].labels,Z[2].labels,Z[3].labels),axis=0)
# dataset_train1.protected_attributes = np.concatenate((Z[0].protected_attributes,Z[1].protected_attributes,Z[2].protected_attributes,Z[3].protected_attributes),axis=0)
# dataset_train1.instance_weights = np.concatenate((Z[0].instance_weights,Z[1].instance_weights,Z[2].instance_weights,Z[3].instance_weights),axis=0)
# dataset_train1.instance_names = np.concatenate((Z[0].instance_names,Z[1].instance_names,Z[2].instance_names,Z[3].instance_names),axis=0)
# dataset_train1.metadata = Z[0].metadata.copy()
# dataset_test1= Z[4].copy()

# dataset_train2 = Z[1].copy()
# dataset_train2.features = np.concatenate((Z[1].features,Z[2].features,Z[3].features,Z[4].features),axis=0)
# dataset_train2.scores = np.concatenate((Z[1].scores,Z[2].scores,Z[3].scores,Z[4].scores),axis=0)
# dataset_train2.labels = np.concatenate((Z[1].labels,Z[2].labels,Z[3].labels,Z[4].labels),axis=0)
# dataset_train2.protected_attributes = np.concatenate((Z[1].protected_attributes,Z[2].protected_attributes,Z[3].protected_attributes,Z[4].protected_attributes),axis=0)
# dataset_train2.instance_weights = np.concatenate((Z[1].instance_weights,Z[2].instance_weights,Z[3].instance_weights,Z[4].instance_weights),axis=0)
# dataset_train2.instance_names = np.concatenate((Z[1].instance_names,Z[2].instance_names,Z[3].instance_names,Z[4].instance_names),axis=0)
# dataset_train2.metadata = Z[1].metadata.copy()
# dataset_test2= Z[0].copy()

# dataset_train3 = Z[2].copy()
# dataset_train3.features = np.concatenate((Z[2].features,Z[3].features,Z[4].features,Z[0].features),axis=0)
# dataset_train3.scores = np.concatenate((Z[2].scores,Z[3].scores,Z[4].scores,Z[0].scores),axis=0)
# dataset_train3.labels = np.concatenate((Z[2].labels,Z[3].labels,Z[4].labels,Z[0].labels),axis=0)
# dataset_train3.protected_attributes = np.concatenate((Z[2].protected_attributes,Z[3].protected_attributes,Z[4].protected_attributes,Z[0].protected_attributes),axis=0)
# dataset_train3.instance_weights = np.concatenate((Z[2].instance_weights,Z[3].instance_weights,Z[4].instance_weights,Z[0].instance_weights),axis=0)
# dataset_train3.instance_names = np.concatenate((Z[2].instance_names,Z[3].instance_names,Z[4].instance_names,Z[0].instance_names),axis=0)
# dataset_train3.metadata = Z[2].metadata.copy()
# dataset_test3= Z[1].copy()

# dataset_train4 = Z[3].copy()
# dataset_train4.features = np.concatenate((Z[3].features,Z[4].features,Z[0].features,Z[1].features),axis=0)
# dataset_train4.scores = np.concatenate((Z[3].scores,Z[4].scores,Z[0].scores,Z[1].scores),axis=0)
# dataset_train4.labels = np.concatenate((Z[3].labels,Z[4].labels,Z[0].labels,Z[1].labels),axis=0)
# dataset_train4.protected_attributes = np.concatenate((Z[3].protected_attributes,Z[4].protected_attributes,Z[0].protected_attributes,Z[1].protected_attributes),axis=0)
# dataset_train4.instance_weights = np.concatenate((Z[3].instance_weights,Z[4].instance_weights,Z[0].instance_weights,Z[1].instance_weights),axis=0)
# dataset_train4.instance_names = np.concatenate((Z[3].instance_names,Z[4].instance_names,Z[0].instance_names,Z[1].instance_names),axis=0)
# dataset_train4.metadata = Z[3].metadata.copy()
# dataset_test4= Z[2].copy()

# dataset_train5 = Z[4].copy()
# dataset_train5.features = np.concatenate((Z[4].features,Z[0].features,Z[1].features,Z[2].features),axis=0)
# dataset_train5.scores = np.concatenate((Z[4].scores,Z[0].scores,Z[1].scores,Z[2].scores),axis=0)
# dataset_train5.labels = np.concatenate((Z[4].labels,Z[0].labels,Z[1].labels,Z[2].labels),axis=0)
# dataset_train5.protected_attributes = np.concatenate((Z[4].protected_attributes,Z[0].protected_attributes,Z[1].protected_attributes,Z[2].protected_attributes),axis=0)
# dataset_train5.instance_weights = np.concatenate((Z[4].instance_weights,Z[0].instance_weights,Z[1].instance_weights,Z[2].instance_weights),axis=0)
# dataset_train5.instance_names = np.concatenate((Z[4].instance_names,Z[0].instance_names,Z[1].instance_names,Z[2].instance_names),axis=0)
# dataset_train5.metadata = Z[4].metadata.copy()
# dataset_test5= Z[3].copy()

In [129]:
# dataset_train1.convert_to_dataframe()[0].to_csv("Huangrui/german/german_train1.csv",index=False)
# dataset_test1.convert_to_dataframe()[0].to_csv("Huangrui/german/german_test1.csv",index=False)
# dataset_train2.convert_to_dataframe()[0].to_csv("Huangrui/german/german_train2.csv",index=False)
# dataset_test2.convert_to_dataframe()[0].to_csv("Huangrui/german/german_test2.csv",index=False)
# dataset_train3.convert_to_dataframe()[0].to_csv("Huangrui/german/german_train3.csv",index=False)
# dataset_test3.convert_to_dataframe()[0].to_csv("Huangrui/german/german_test3.csv",index=False)
# dataset_train4.convert_to_dataframe()[0].to_csv("Huangrui/german/german_train4.csv",index=False)
# dataset_test4.convert_to_dataframe()[0].to_csv("Huangrui/german/german_test4.csv",index=False)
# dataset_train5.convert_to_dataframe()[0].to_csv("Huangrui/german/german_train5.csv",index=False)
# dataset_test5.convert_to_dataframe()[0].to_csv("Huangrui/german/german_test5.csv",index=False)


In [130]:
# import pickle
# from sklearn.linear_model import Lasso
# import pandas as pd
# # 用部分 data训练 biased model
# selected_dataset,_ = dataset_train5.split([0.6], shuffle=True)
# _ =  dataset_test5
# # Logistic regression classifier and predictions
# scale_orig = StandardScaler()
# X_train = scale_orig.fit_transform(selected_dataset.features)
# y_train = selected_dataset.labels.ravel()
# X_test = scale_orig.fit_transform(_.features)
# y_test = _.labels.ravel()
# lmod = Lasso(alpha = 0.001)
# lmod.fit(X_train, y_train)
# y_train_pred = lmod.predict(X_train)>0.5
# y_test_pred = lmod.predict(X_test)>0.5
# dataset_orig_train_pred = selected_dataset.copy(deepcopy=True)
# dataset_orig_train_pred.labels = y_train_pred
# dataset_orig_test_pred = _.copy(deepcopy=True)
# dataset_orig_test_pred.labels = y_test_pred
# print(np.mean(y_train_pred!=y_train))
# print(np.mean(y_test_pred!=y_test))
# metric_test = compute_metrics(_, dataset_orig_test_pred, 
#                 unprivileged_groups, privileged_groups)
# metric_train = compute_metrics(selected_dataset, dataset_orig_train_pred,unprivileged_groups, privileged_groups)

In [131]:
#save model
# with open("Huangrui/german/german_model.pkl", 'wb') as file:
#     pickle.dump(lmod, file)
# pickle.dump(lmod, open("Huangrui/german/german_model.pkl", 'wb'))

In [171]:
K = 5
budget = 0.01

In [172]:
dataset_orig_train= GermanDataset(path="./Huangrui/german/german_train{}.csv".format(K))
dataset_orig_test= GermanDataset(path="./Huangrui/german/german_test{}.csv".format(K))

In [173]:
#only use the budget% of the training data
dataset_orig_train,_ = dataset_orig_train.split([budget], shuffle=True)

#### Clean up training data and display properties of the data

In [174]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes, 
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

#### Training Dataset shape

(8, 51)


#### Favorable and unfavorable labels

1.0 0.0


#### Protected attribute names

['sex', 'age']


#### Privileged and unprivileged protected attribute values

[array([1.]), array([1.])] [array([0.]), array([0.])]


#### Dataset feature names

['month', 'credit_amount', 'investment_as_income_percentage', 'residence_since', 'age', 'number_of_credits', 'people_liable_for', 'sex', 'status=200+', 'status=<200', 'status=None', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'purpose=A40', 'purpose=A41', 'purpose=A410', 'purpose=A42', 'purpose=A43', 'purpose=A44', 'purpose=A45', 'purpose=A46', 'purpose=A48', 'purpose=A49', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed', 'other_debtors=A101', 'other_debtors=A102', 'other_debtors=A103', 'property=A121', 'property=A122', 'property=A123', 'property=A124', 'installment_plans=A141', 'installment_plans=A142', 'installment_plans=A143', 'housing=A151', 'housing=A152', 'housing=A153', 'skill_level=A171', 'skill_level=A172', 'skill_level=A173', 'skill_level=A174', 'telephone=A191', 'telephone=A192', 'foreign_worker=A201', 'foreign_worker=A202']


#### Metric for original training data

In [175]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.166667


### Train classifier on original data

In [176]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
if protected_attribute_used == 1:
    lmod = pickle.load(open('experiments/german'+str(K)+'_sex_bmodel.pkl','rb'))
else:
    lmod = pickle.load(open('experiments/german'+str(K)+'_age_bmodel.pkl','rb'))
y_train_pred = lmod.predict(X_train)

dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
dataset_orig_train_pred.labels = y_train_pred.reshape(-1,1)
sigmoid = lambda x: 1 / (1 + np.exp(0.5-x))
dataset_orig_train_pred.scores = sigmoid(y_train_pred).reshape(-1,1)

#### Obtain scores for validation and test sets

In [177]:
dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels
dataset_orig_test_pred.scores = sigmoid(lmod.predict(X_test)).reshape(-1,1)

### Find the optimal parameters from the validation set

#### Best threshold for classification only (no fairness)

In [178]:
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    
    fav_inds = dataset_orig_train_pred.scores > class_thresh
    dataset_orig_train_pred.labels[fav_inds] = dataset_orig_train_pred.favorable_label
    dataset_orig_train_pred.labels[~fav_inds] = dataset_orig_train_pred.unfavorable_label
    
    classified_metric_orig_train = ClassificationMetric(dataset_orig_train,
                                             dataset_orig_train_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_train.true_positive_rate()\
                       +classified_metric_orig_train.true_negative_rate())

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

print("Best balanced accuracy (no fairness constraints) = %.4f" % np.max(ba_arr))
print("Optimal classification threshold (no fairness constraints) = %.4f" % best_class_thresh)

Best balanced accuracy (no fairness constraints) = 0.8000
Optimal classification threshold (no fairness constraints) = 0.5346


#### Estimate optimal parameters for the ROC method

In [179]:
ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, 
                                 privileged_groups=privileged_groups, 
                                 low_class_thresh=0.01, high_class_thresh=0.99,
                                  num_class_thresh=100, num_ROC_margin=50,
                                  metric_name=metric_name,
                                  metric_ub=metric_ub, metric_lb=metric_lb)
ROC = ROC.fit(dataset_orig_train, dataset_orig_train_pred)

In [180]:
print("Optimal classification threshold (with fairness constraints) = %.4f" % ROC.classification_threshold)
print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

Optimal classification threshold (with fairness constraints) = 0.2773
Optimal ROC margin = 0.2546


### Predictions from Validation Set

In [181]:
# Metrics for the test set
fav_inds = dataset_orig_train_pred.scores > best_class_thresh
dataset_orig_train_pred.labels[fav_inds] = dataset_orig_train_pred.favorable_label
dataset_orig_train_pred.labels[~fav_inds] = dataset_orig_train_pred.unfavorable_label

display(Markdown("#### train set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_train_bef = compute_metrics(dataset_orig_train, dataset_orig_train_pred, 
                unprivileged_groups, privileged_groups)

#### train set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.8000
Statistical parity difference = -0.5000
Disparate impact = 0.0000
Average odds difference = -0.3750
Equal opportunity difference = -0.7500
Theil index = 0.2877


In [182]:
# Transform the validation set
dataset_transf_train_pred = ROC.predict(dataset_orig_train_pred)

display(Markdown("#### train set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_train_aft = compute_metrics(dataset_orig_train, dataset_transf_train_pred, 
                unprivileged_groups, privileged_groups)

#### train set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.8333
Statistical parity difference = 0.3333
Disparate impact = 1.5000
Average odds difference = 0.5000
Equal opportunity difference = 0.0000
Theil index = 0.0362


In [183]:
# Testing: Check if the metric optimized has not become worse
assert np.abs(metric_train_aft[metric_name]) <= np.abs(metric_train_bef[metric_name])

### Predictions from Test Set

In [184]:
# Metrics for the test set
fav_inds = dataset_orig_test_pred.scores > best_class_thresh
dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label

display(Markdown("#### Test set"))
display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.6710
Statistical parity difference = -0.1500
Disparate impact = 0.5714
Average odds difference = -0.0859
Equal opportunity difference = -0.1180
Theil index = 0.4948


In [185]:
# Metrics for the transformed test set
dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)

display(Markdown("#### Test set"))
display(Markdown("##### Transformed predictions - With fairness constraints"))
metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                unprivileged_groups, privileged_groups)

#### Test set

##### Transformed predictions - With fairness constraints

Balanced accuracy = 0.6018
Statistical parity difference = 0.6250
Disparate impact = 2.6667
Average odds difference = 0.7034
Equal opportunity difference = 0.5133
Theil index = 0.3868


In [186]:
#自己计算error, 不是balanced accuracy！！！
metric_test_aft["Equal opportunity difference"]
print("The Error for the test dataset is {:.4}".format(np.mean(dataset_orig_test.labels!=dataset_transf_test_pred.labels)))
print("The Equal opportunity difference for the test dataset is {:.4}".format(metric_test_aft["Equal opportunity difference"]))

The Error for the test dataset is 0.41
The Equal opportunity difference for the test dataset is 0.5133
