## Performing Bias Mitigations on Multiple sensitive features
This is an example on performing sequential bias mitigations for multiple sensitive feature.

Since AIF360 doesnt seem to have support for multiple sensitive features, we need to perform this manually, sequentially.

In [None]:
# import relevant dependencies
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from fairai.mitigator import FairAIMitigator
from fairai.utils.metrics import bias_fairness_report

In [2]:
# fetch raw-data from sklearn.datasets
raw_data = fetch_openml(data_id=1590, as_frame=True)

# preview raw-data
raw_data.frame

  warn(


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
0,25.0,Private,226802.0,11th,7.0,Never-married,Machine-op-inspct,Own-child,Black,Male,0.0,0.0,40.0,United-States,<=50K
1,38.0,Private,89814.0,HS-grad,9.0,Married-civ-spouse,Farming-fishing,Husband,White,Male,0.0,0.0,50.0,United-States,<=50K
2,28.0,Local-gov,336951.0,Assoc-acdm,12.0,Married-civ-spouse,Protective-serv,Husband,White,Male,0.0,0.0,40.0,United-States,>50K
3,44.0,Private,160323.0,Some-college,10.0,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688.0,0.0,40.0,United-States,>50K
4,18.0,,103497.0,Some-college,10.0,Never-married,,Own-child,White,Female,0.0,0.0,30.0,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27.0,Private,257302.0,Assoc-acdm,12.0,Married-civ-spouse,Tech-support,Wife,White,Female,0.0,0.0,38.0,United-States,<=50K
48838,40.0,Private,154374.0,HS-grad,9.0,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0.0,0.0,40.0,United-States,>50K
48839,58.0,Private,151910.0,HS-grad,9.0,Widowed,Adm-clerical,Unmarried,White,Female,0.0,0.0,40.0,United-States,<=50K
48840,22.0,Private,201490.0,HS-grad,9.0,Never-married,Adm-clerical,Own-child,White,Male,0.0,0.0,20.0,United-States,<=50K


In [3]:
# Step 1: Data pre-processing
X_raw = pd.get_dummies(raw_data.data)
X = pd.DataFrame(MinMaxScaler().fit_transform(X_raw), columns=X_raw.columns)
y = 1 * (raw_data.target == ">50K")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Step 2a: Model Training
LR = LogisticRegression(solver="liblinear", random_state=42)
LR.fit(X_train, y_train)

# Step 2b: Bias Mitigation
y_val_pred_proba = LR.predict_proba(X_val)[:,1].reshape(-1,1)

protected_attribute_name = 'sex_Male'
model_sex = FairAIMitigator(protected_attribute_name, mitigation_algorithm='ceo')
model_sex.fit(X_val, y_val, y_val_pred_proba)

protected_attribute_name = 'race_White'
model_race = FairAIMitigator(protected_attribute_name, mitigation_algorithm='ceo')
model_race.fit(X_val, y_val, y_val_pred_proba)

# Step 3: Prediction
y_test_pred = LR.predict(X_test)
y_test_pred_temp = model_sex.get_outputs(X=X_test, y_pred=y_test_pred)
y_test_pred_mit = model_race.get_outputs(X=X_test, y_pred=y_test_pred_temp)

In [4]:
bias_scores = bias_fairness_report(X_test, y_test, y_test_pred, y_test_pred_mit, 'sex_Male')


    Generating report for sex_Male sensitive attribute...
    
    Disparity in Data: 0.362
    
    Unmitigated Model
        Accuracy: 0.853
        Disparate Impact: 0.277
        Error Rate Difference: 0.113
    
    Mitigated Model
        Accuracy: 0.814
        Disparate Impact: 0.454
        Error Rate Difference: 0.162
