In [36]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")

import numpy as np
np.random.seed(0)

#from aif360.datasets import GermanDataset
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display

# AIF360

### 4. Choose one of the provided common datasets to work with. -> Adult Census Income Dataset
### 5. Select a protected attribute from your chosen dataset for the next steps. -> Sex and race would be possible, we have chosen sex

### Step 2: Set bias detection options, load dataset, and split between train and test

In [37]:
# Load dataset and set bias detection options,
single_protected = ['sex']
single_privileged = [['Male']]

dataset_orig = AdultDataset(
    protected_attribute_names=single_protected,
    privileged_classes=single_privileged,
    #categorical_features=[],
    #features_to_keep=['age', 'education-num']
    features_to_drop=['fnlwgt', 'native-country', 'race', 'capital-loss', 'education', 'relationship']
)

print(dataset_orig.feature_names)
#print(dataset_orig.label_names)

# Split between train and test
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

#
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]



['age', 'education-num', 'sex', 'capital-gain', 'hours-per-week', 'workclass=Federal-gov', 'workclass=Local-gov', 'workclass=Private', 'workclass=Self-emp-inc', 'workclass=Self-emp-not-inc', 'workclass=State-gov', 'workclass=Without-pay', 'marital-status=Divorced', 'marital-status=Married-AF-spouse', 'marital-status=Married-civ-spouse', 'marital-status=Married-spouse-absent', 'marital-status=Never-married', 'marital-status=Separated', 'marital-status=Widowed', 'occupation=Adm-clerical', 'occupation=Armed-Forces', 'occupation=Craft-repair', 'occupation=Exec-managerial', 'occupation=Farming-fishing', 'occupation=Handlers-cleaners', 'occupation=Machine-op-inspct', 'occupation=Other-service', 'occupation=Priv-house-serv', 'occupation=Prof-specialty', 'occupation=Protective-serv', 'occupation=Sales', 'occupation=Tech-support', 'occupation=Transport-moving']


### 6. Compute multiple fairness metrics on that attribute (minimum: 4).

In [38]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.201342


### 7. Try to mitigate bias using one of the bias mitigation algorithms provided by the toolkit. If required by the algorithm, train a classifier on the data.

#### TODO: What types of bias mitigation algorithm are available. :pencil2: Write down your answer in a markdown cell.
#### TODO: :pencil2: Do you see a difference between the different types of algorithms

In [39]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)

### 8. Compute your fairness metrics again after the mitigation step

In [40]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)

### 9. Compare your pre- and post mitigation metrics. If you trained a classifier, evaluate it's performance before and after the mitigation.

In [41]:
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.000000


### TODO: :pencil2: What changes are you able to witness?

# TODO: Fairness Tools Overview
Do a research on other fairness tools that are currently available. What are their use cases? Have some of them been used in the development of commercial products? Take a closer look on at least three other tools.

:pencil2: Document your findings.