In [2]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")

import numpy as np
np.random.seed(0)

from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing import OptimPreproc
from aif360.sklearn.inprocessing import AdversarialDebiasing
from aif360.algorithms.postprocessing import RejectOptionClassification
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing

from IPython.display import Markdown, display
import statistics
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
import tensorflow as tf

ModuleNotFoundError: No module named 'aif360'

# AIF360

### Load adult Census Income Dataset
### Possible protected attributes -> Sex and race, we have chosen sex

In [65]:
# Load dataset and set bias detection options,
single_protected = ['sex']
single_privileged = [['Male']]

# We have dropped attributes that had <= 0.15 association with the true label, duplicate attribute 'eduction' (already included with numerical encoding), and 'realtionship' which is highly correlated with "sex", "marital-status", and "age")
dataset_orig = AdultDataset(
    protected_attribute_names=single_protected,
    privileged_classes=single_privileged,
    #categorical_features=[],
    #features_to_keep=['age', 'education-num']
    features_to_drop=['fnlwgt', 'native-country', 'race', 'capital-loss', 'education', 'relationship']
)

print("Feature names:\n", dataset_orig.feature_names)
print("Labelname:\n", dataset_orig.label_names)

# Split between train and test
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

# Sex as protected attribute encoded with 0 for females
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]



Feature names:
 ['age', 'education-num', 'sex', 'capital-gain', 'hours-per-week', 'workclass=Federal-gov', 'workclass=Local-gov', 'workclass=Private', 'workclass=Self-emp-inc', 'workclass=Self-emp-not-inc', 'workclass=State-gov', 'workclass=Without-pay', 'marital-status=Divorced', 'marital-status=Married-AF-spouse', 'marital-status=Married-civ-spouse', 'marital-status=Married-spouse-absent', 'marital-status=Never-married', 'marital-status=Separated', 'marital-status=Widowed', 'occupation=Adm-clerical', 'occupation=Armed-Forces', 'occupation=Craft-repair', 'occupation=Exec-managerial', 'occupation=Farming-fishing', 'occupation=Handlers-cleaners', 'occupation=Machine-op-inspct', 'occupation=Other-service', 'occupation=Priv-house-serv', 'occupation=Prof-specialty', 'occupation=Protective-serv', 'occupation=Sales', 'occupation=Tech-support', 'occupation=Transport-moving']
Labelname:
 ['income-per-year']


### Bias mitigation during preprocessing

In [1]:
# Compute multiple binary label fairness metrics on the original training dataset
def binary_metrics(dataset, unprivileged_groups, privileged_groups):
    metric_orig_train = BinaryLabelDatasetMetric(dataset,
                        unprivileged_groups=unprivileged_groups,
                        privileged_groups=privileged_groups)

    result = {'Disparate impact': metric_orig_train.disparate_impact(),
              'Mean difference': metric_orig_train.mean_difference(),
              'Smoothed empirical differential fairness': metric_orig_train.smoothed_empirical_differential_fairness(concentration=1.0)
             }
    return result

metrics_orig_train = binary_metrics(dataset_orig_train, unprivileged_groups, privileged_groups)

NameError: name 'dataset_orig_train' is not defined

In [67]:
# Mitigation: Preprocessing: Reweighing
RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
dataset_transf_train_rw = RW.fit_transform(dataset_orig_train)

# Compute fairness metrics again after the mitigation step
binary_metrics(dataset_transf_train_rw, unprivileged_groups, privileged_groups)

{'Disparate impact': 0.9999999999999997,
 'Mean difference': -8.326672684688674e-17,
 'Smoothed empirical differential fairness': 5.16176875673402e-05}

In [68]:
display(Markdown("#### Reweighing: weights the examples in each (group, label) combination differently to ensure fairness before classification"))

display(Markdown("##### Before mitigation:"))
for key, value in metrics_orig_train.items():
    print(key, ':', value)

display(Markdown("##### After mitigation:"))
for key, value in binary_metrics(dataset_transf_train_rw, unprivileged_groups, privileged_groups).items():
    print(key, ':', value)

#### Reweighing: weights the examples in each (group, label) combination differently to ensure fairness before classification

##### Before mitigation:

Disparate impact : 0.3543500702597874
Mean difference : -0.2013421957302125
Smoothed empirical differential fairness : 1.037158700736489


##### After mitigation:

Disparate impact : 0.9999999999999997
Mean difference : -8.326672684688674e-17
Smoothed empirical differential fairness : 5.16176875673402e-05


In [69]:
# Mitigation: Preprocessing: Disparate Impact Remover:
DIR = DisparateImpactRemover(repair_level=1.0, sensitive_attribute='sex')
dataset_transf_train_dir = DIR.fit_transform(dataset_orig_train)

In [70]:
display(Markdown("#### Disparate Impact Remover: edits feature values increase group fairness while preserving rank-ordering within groups"))

display(Markdown("##### Before mitigation:"))
for key, value in metrics_orig_train.items():
    print(key, ':', value)

display(Markdown("##### After mitigation:"))
for key, value in binary_metrics(dataset_transf_train_dir, unprivileged_groups, privileged_groups).items():
    print(key, ':', value)

#### Disparate Impact Remover: edits feature values increase group fairness while preserving rank-ordering within groups

##### Before mitigation:

Disparate impact : 0.3543500702597874
Mean difference : -0.2013421957302125
Smoothed empirical differential fairness : 1.037158700736489


##### After mitigation:

Disparate impact : 0.3543500702597874
Mean difference : -0.2013421957302125
Smoothed empirical differential fairness : 1.037158700736489


### Bias mitigation during inprocessing: Adversarial debiasing:
learns a classifier to maximize prediction accuracy and simultaneously reduce an adversary’s ability to determine the protected attribute from the predictions [1]. This approach leads to a fair classifier as the predictions cannot carry any group discrimination information that the adversary can exploit.

#### Train a plain classifier

In [71]:
# Learn plain classifier without debiasing
# https://github.com/Trusted-AI/AIF360/blob/master/examples/demo_adversarial_debiasing.ipynb
tf.compat.v1.disable_eager_execution()
if sess:
    sess.close()
tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()
clf = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='plain_classifier',
                          debias=False,
                          sess=sess)
clf.fit(dataset_orig_train)
#y_pred = clf.predict(dataset_orig_test.features)

# Apply the plain model to test data
dataset_nodebiasing_train = clf.predict(dataset_orig_train)
dataset_nodebiasing_test = clf.predict(dataset_orig_test)

epoch 0; iter: 0; batch classifier loss: 5.807417
epoch 0; iter: 200; batch classifier loss: 7.163935
epoch 1; iter: 0; batch classifier loss: 2.698117
epoch 1; iter: 200; batch classifier loss: 2.462013
epoch 2; iter: 0; batch classifier loss: 2.410797
epoch 2; iter: 200; batch classifier loss: 4.004699
epoch 3; iter: 0; batch classifier loss: 0.970145
epoch 3; iter: 200; batch classifier loss: 1.177792
epoch 4; iter: 0; batch classifier loss: 1.884840
epoch 4; iter: 200; batch classifier loss: 2.031663
epoch 5; iter: 0; batch classifier loss: 0.491326
epoch 5; iter: 200; batch classifier loss: 1.327786
epoch 6; iter: 0; batch classifier loss: 2.038118
epoch 6; iter: 200; batch classifier loss: 0.804699
epoch 7; iter: 0; batch classifier loss: 1.639204
epoch 7; iter: 200; batch classifier loss: 0.318860
epoch 8; iter: 0; batch classifier loss: 0.406714
epoch 8; iter: 200; batch classifier loss: 0.386706
epoch 9; iter: 0; batch classifier loss: 1.019848
epoch 9; iter: 200; batch classi

#### Train a debiasing classifier

In [72]:
sess.close()
tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()

# Learn parameters with debias set to True
debiased_clf = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='debiased_classifier',
                          debias=True,
                          sess=sess)

debiased_clf.fit(dataset_orig_train)

# Apply the plain model to test data
# Apply the plain model to test data
dataset_debiasing_train = debiased_clf.predict(dataset_orig_train)
dataset_debiasing_test = debiased_clf.predict(dataset_orig_test)

epoch 0; iter: 0; batch classifier loss: 105.667290; batch adversarial loss: 0.855942
epoch 0; iter: 200; batch classifier loss: 6.655151; batch adversarial loss: 0.756046
epoch 1; iter: 0; batch classifier loss: 10.932810; batch adversarial loss: 0.807950
epoch 1; iter: 200; batch classifier loss: 7.798539; batch adversarial loss: 0.662345
epoch 2; iter: 0; batch classifier loss: 3.995960; batch adversarial loss: 0.688334
epoch 2; iter: 200; batch classifier loss: 5.578463; batch adversarial loss: 0.650072
epoch 3; iter: 0; batch classifier loss: 10.116468; batch adversarial loss: 0.637887
epoch 3; iter: 200; batch classifier loss: 1.575187; batch adversarial loss: 0.611195
epoch 4; iter: 0; batch classifier loss: 4.453220; batch adversarial loss: 0.671072
epoch 4; iter: 200; batch classifier loss: 3.543374; batch adversarial loss: 0.635989
epoch 5; iter: 0; batch classifier loss: 0.774815; batch adversarial loss: 0.687947
epoch 5; iter: 200; batch classifier loss: 2.594435; batch adv

In [73]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_nodebiasing_train = BinaryLabelDatasetMetric(dataset_nodebiasing_train,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())

metric_dataset_nodebiasing_test = BinaryLabelDatasetMetric(dataset_nodebiasing_test,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

#####
# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())

metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())



display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_nodebiasing_test = ClassificationMetric(dataset_orig_test,
                                                 dataset_nodebiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())


display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test,
                                                 dataset_debiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.206872
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.201568


#### Model - with debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.095770
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.092306


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.843374
Test set: Balanced classification accuracy = 0.773292
Test set: Disparate impact = 0.302917
Test set: Equal opportunity difference = -0.140927
Test set: Average odds difference = -0.116580
Test set: Theil_index = 0.120074


#### Model - with debiasing - classification metrics

Test set: Classification accuracy = 0.838668
Test set: Balanced classification accuracy = 0.736977
Test set: Disparate impact = 0.556197
Test set: Equal opportunity difference = 0.097225
Test set: Average odds difference = 0.039272
Test set: Theil_index = 0.141963


### What types of bias mitigation algorithm are available?

Bias mitigation algorithms are typically separated into preprocessing, inpprocessing,and postprocessig. Another possible topology is based on the fairness metrics they mitigate.

### Do you see a difference between the different types of algorithms?

The step of processing is very crucial in defining the possibilities and limitations for a algorithmic bias mitigation - algorithms at different steps act very differently. However,the algorithms within each category can also be very different.

### What changes are you able to witness?

The preprocessing algorithm reweighing seems to have a negative impact on fairness, while the preprocessing algorithm disparate impact remover has no effect at all. This suggests that something has gone wrong in our way of applying these algorithms, however, we can't seem to find the bug at the moment.

The inprocessing algorithm adversarial debiasing has worked well on debiaing the mean outcomes between the two groups, while not affecting the performance of the classification model much.

### Fairness Tools Overview

There's quite a few other Fairness Tools available, although AI Fairness 360 appears to be the most popular and most developed open source projects available.
However, we could not find much information on these tools being used commercially. The Fairlearn toolkit, developed by Microsoft, seems to have been used internally by them, but it is unclear in which capacity. Rather, we found in our research that the adoption of fairness toolkits seems to be lagging behind. To quote one [article](https://ukfinancialservicesinsights.deloitte.com/post/102gh6a/landscape-and-gaps-in-open-source-fairness-toolkits): "Only 54% survey respondents had used any open source fairness toolkit before, despite the study's sampling of groups with likely exposure to fairness-related concerns."
We also read this [paper](https://www.researchgate.net/publication/356985512_A_Framework_for_Fairness_A_Systematic_Review_of_Existing_Fair_AI_Solutions) which isn't practically relevant for the section, but still quite interesting.

**Aequitas**

Aequitas is another Open Source Toolkit with a focus on analysing und visualising fairness metrics. Using Aequitas, you can generate a "Bias Report" based on your input data with the following criterias:

Equal Parity - Each group is represented equally  
Proportional Parity - Each group is represented proportional to their representation overall population  
False Positive Parity - Each group has proportionally equal false positive errors made by the model.  
False negative Parity - Each group has proportionally equal false negative errors made by the model.  

The report gives back a table judging each Attribute value as either fair or unfair based on these metrics (and a weighted combined score). 

This is visually very appealing, and certainly a great tool for analysing problems with fairness in a given dataset, however Aequitas doesn't include tools to combat problems with fairness.

**FairSight**

From their github repo: "FairSight is a viable fair decision making system to assist decision makers in achieving fair decision making through the machine learning workflow."

This tool really takes the cake on a visualisation level - You get a beautiful colored dashboard with lots of relevant metrics for fairness after feeding it with input data. However, it also does not contain the tooling to actually change the data or model to become more fair. Again, from the github repo: 

"FairSight is developed on top of FairDM, a general fair decision making framework. Our framework is a model-agnostic framework with its goal to provide a fairness pipeline to guide the examination of fairness at each step (from input to output) in the workflow."

**Fairlearn**

Fairlearn is a python package both for assessing and mitigating issues with unfairness. It contains some algorithms for mitigating unfairness, including Correlation Remover for preprocessing and various kinds of UtilityParity-Algorithms like Demographic Parity, True/False Positive Rate Parity, Equalized Odds, and Error Rate Parity.