In [3]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)

from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display

pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'
  warn_deprecated('vmap', 'torch.vmap')


In [4]:
dataset_orig = GermanDataset(
    protected_attribute_names=['age'],           # this dataset also contains protected
                                                 # attribute for "sex" which we do not
                                                 # consider in this evaluation
    privileged_classes=[lambda x: x >= 25],      # age >=25 is considered privileged
    features_to_drop=['personal_status', 'sex'] # ignore sex-related attributes
)

dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [5]:
df_bias_train = dataset_orig_train.convert_to_dataframe()[0]
df_bias_test = dataset_orig_test.convert_to_dataframe()[0]
df_bias_train.head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,status=A11,status=A12,status=A13,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
993,36.0,3959.0,4.0,3.0,1.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0
859,9.0,3577.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0
298,18.0,2515.0,3.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
553,12.0,1995.0,4.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
672,60.0,10366.0,2.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0


In [6]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.169905


In [7]:
from xgboost import XGBRegressor as Gbr
import xgboost as xgb
import os
from sklearn.metrics import root_mean_squared_error as RMSE

xgb_regressor = xgb.XGBRegressor()
xgb_regressor.fit(df_bias_train.drop('credit_amount',axis=1), df_bias_train['credit_amount'])

# Make predictions
preds = xgb_regressor.predict(df_bias_test.drop('credit_amount',axis=1))

# Evaluate the model
rmse = RMSE(df_bias_test['credit_amount'], preds)
print(f"RMSE: {rmse:.2f}")

# Save the model
xgb_regressor.save_model(os.path.join('..','models''xgboost_credit_bias_regressor_model.pkl'))

RMSE: 2199.00




In [8]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)

In [9]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.000000


In [10]:
df_unbias_train = dataset_transf_train.convert_to_dataframe()[0]
df_unbias_train.head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,status=A11,status=A12,status=A13,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
993,36.0,3959.0,4.0,3.0,1.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0
859,9.0,3577.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0
298,18.0,2515.0,3.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
553,12.0,1995.0,4.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
672,60.0,10366.0,2.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0


In [11]:
xgb_unbias = xgb.XGBRegressor()
xgb_unbias.fit(df_unbias_train.drop('credit_amount',axis=1), df_unbias_train['credit_amount'])

# Make predictions
preds_unbias = xgb_unbias.predict(df_bias_test.drop('credit_amount',axis=1))

# Evaluate the model
rmse_unbias = RMSE(df_bias_test.loc[:,'credit_amount'], preds_unbias)
print(f"RMSE: {rmse_unbias:.2f}")

# Save the model
xgb_unbias.save_model(os.path.join('..','models','xgboost_credit_unbias_regressor_model.pkl'))

RMSE: 2199.00


