#  Tutorial Understanding AIF360 
tutorial can be found here : https://nbviewer.org/github/Trusted-AI/AIF360/blob/main/examples/tutorial_credit_scoring.ipynb

In [1]:
#!pip install aif360
#!pip install 'aif360[Reductions]'

#!pip install 'aif360[inFairness]'

In [2]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)

from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


# Prepare Dataset

In [3]:
import pandas as pd

In [4]:
# read in the dataset
data_df = pd.read_csv('Data/fake_data.csv')

# show data
data_df.head()

Unnamed: 0,age,income,gender,car,target
0,45,29923,male,yes,0
1,39,75755,male,yes,0
2,18,73277,male,yes,1
3,37,24442,male,yes,0
4,34,58901,male,yes,0


In [5]:
# remap gender to binary
map = {'male': 0, 'female': 1}

# replace values in gender column
data_df['gender'].replace(map, inplace=True)

# show data
data_df.head()

Unnamed: 0,age,income,gender,car,target
0,45,29923,0,yes,0
1,39,75755,0,yes,0
2,18,73277,0,yes,1
3,37,24442,0,yes,0
4,34,58901,0,yes,0


In [6]:
#remap the car column to binary
map = {'yes': 0, 'no': 1}


# replace values in car column
data_df['car'].replace(map, inplace=True)

In [7]:
data_df.head()

Unnamed: 0,age,income,gender,car,target
0,45,29923,0,0,0
1,39,75755,0,0,0
2,18,73277,0,0,1
3,37,24442,0,0,0
4,34,58901,0,0,0


In [8]:
from aif360.datasets import StandardDataset

In [9]:
# import sklearn split function
from sklearn.model_selection import train_test_split

# split data_df into train and test datasets
data_df_train, data_df_test = train_test_split(data_df, test_size=0.2, random_state=0)


In [10]:
# the privileged group is the group with the most advantage in the dataset
# the unprivileged group is the group with the least advantage in the dataset

# privileged group - female, age < 30 
# unprivileged group - male, age > 30 
privileged_groups = [{'gender': 1}]
unprivileged_groups = [{'gender': 0}]

dataset_train = StandardDataset(data_df_train, 
                          label_name='target', 
                          protected_attribute_names=['gender'],
                          favorable_classes=[1],
                          privileged_classes=[[1]]
                          )

In [11]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_train,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

In [48]:
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.862179


Mean Difference < 0: The unprivileged group receives favorable outcomes less often than the privileged group, indicating potential bias against the unprivileged group.

### Transform the dataset by reweighing the dataset 

In [49]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_train)

  self.w_p_unfav = n_unfav*n_p / (n*n_p_unfav)


In [50]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.672500
