In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Markdown, display

# Datasets
from aif360.datasets import GermanDataset
from aif360.datasets import BinaryLabelDataset

# Fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
# from aif360.metrics import DatasetMetric
from aif360.metrics import ClassificationMetric

# Explainers
from aif360.explainers import MetricTextExplainer

# Scalers
from sklearn.preprocessing import StandardScaler

# Classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# Bias mitigation techniques
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.inprocessing import PrejudiceRemover


pip install 'aif360[AdversarialDebiasing]'


In [2]:
#Specify sensitive attribute
dataset_orig_panel19_train = GermanDataset()
sens_ind = 0
sens_attr = dataset_orig_panel19_train.protected_attribute_names[sens_ind]
print(sens_attr)
unprivileged_groups = [{sens_attr: v} for v in
                       dataset_orig_panel19_train.unprivileged_protected_attributes[sens_ind]]
privileged_groups = [{sens_attr: v} for v in
                     dataset_orig_panel19_train.privileged_protected_attributes[sens_ind]]

sex


In [3]:
#TODO 1
dataset_orig_panel19_train, dataset_orig_panel19_test = GermanDataset().split([0.7], shuffle=True)
metric_orig_panel19 = BinaryLabelDatasetMetric(
        GermanDataset(),
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
explainer_orig_panel19 = MetricTextExplainer(metric_orig_panel19)

print(explainer_orig_panel19.disparate_impact())

Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.8965673282047968


In [4]:
#TODO 2 train random forest classifier
dataset = dataset_orig_panel19_train
model = make_pipeline(StandardScaler(),
                      RandomForestClassifier(n_estimators=500, min_samples_leaf=25))
fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}
rf_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)

In [5]:
import copy
dataset_test = dataset_orig_panel19_test
predictions= model.predict(dataset_test.features)
prediction_dataset=copy.deepcopy(dataset_test)
#prediction_dataset = copy.deepcopy(dataset)
prediction_dataset.labels = predictions
#bldataset = BinaryLabelDataset(dataset, privileged_groups[0].get('sex'), unprivileged_groups[0].get('sex'))
metric_class = ClassificationMetric(dataset_test, prediction_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

In [6]:
print(metric_class.disparate_impact())
print(metric_class.equal_opportunity_difference())

0.9966297544535389
-0.003279320987654266


In [7]:
#Bias Mitigation TODO 3
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_panel19_train = RW.fit_transform(dataset_orig_panel19_train)
dataset_transf_panel19_test = RW.fit_transform(dataset_orig_panel19_test)

In [8]:
metric_transf = BinaryLabelDatasetMetric(
        dataset_transf_panel19_train,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
explainer_transf = MetricTextExplainer(metric_transf)

print(explainer_transf.disparate_impact())

Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 1.0


In [9]:
#using random tree on transformed data
dataset1 = dataset_transf_panel19_train
model1 = make_pipeline(StandardScaler(),
                      RandomForestClassifier(n_estimators=500, min_samples_leaf=25))
fit_params = {'randomforestclassifier__sample_weight': dataset1.instance_weights}
rf_transf_panel19 = model1.fit(dataset1.features, dataset1.labels.ravel(), **fit_params)

In [10]:
dataset1_test = dataset_transf_panel19_test
predictions1= model1.predict(dataset1_test.features)
prediction_dataset1=copy.deepcopy(dataset1_test)
prediction_dataset1.labels = predictions
metric_class1 = ClassificationMetric(dataset1_test, prediction_dataset1, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

In [11]:
print(metric_class1.disparate_impact())
print(metric_class1.equal_opportunity_difference())

1.0025227872689229
-0.003279320987654044
