#### This notebook demonstrates the use of the EOP post-processing algorithm for bias mitigation.


In [23]:
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
import numpy as np
from tqdm import tqdm
from warnings import warn

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from eq_odds_postprocessing import EqOddsPostprocessing
from common_utils import compute_metrics

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt
from ipywidgets import interactive, FloatSlider
import pandas as pd
import pickle
from sklearn.linear_model import Lasso
from random import sample

## Huangrui's Dataset 

In [24]:
default_mappings = {
    #Huangrui flip the lable
    'label_maps': [{0: 'Did recid.', 1: 'No recid.'}],
    'protected_attribute_maps': [{0.0: 'Male', 1.0: 'Female'},
                                 {1.0: 'Caucasian', 0.0: 'Not Caucasian'}]
}
def code_continuous(df,collist,Nlevel):
    for col in collist:
        for q in range(1,Nlevel,1):
            threshold = df[~np.isnan(df[col])][col].quantile(float(q)/Nlevel)
            df[col+'_geq'+str(int(q))+'q'+str(threshold)] = (df[col] >= threshold).astype(float)
    df.drop(collist,axis = 1, inplace = True)
class CompasDataset(StandardDataset):
    """ProPublica COMPAS Dataset.

    See :file:`aif360/data/raw/compas/README.md`.
    """

    def __init__(self, label_name='Y', favorable_classes=[1],
                 protected_attribute_names=['sex'],
                 privileged_classes=[[1]],
                 instance_weights_name=None,
                 categorical_features=[],
                 features_to_keep=[],
                 features_to_drop=[], na_values=[],
                 custom_preprocessing=None,
                 metadata=default_mappings,
                 path='./Huangrui/recidivism/recidivism_test1.csv'):

    
        df = pd.read_csv(path,index_col=False)
        df.rename(columns={'Probationerssex_Female': 'sex'}, inplace=True)
        df.drop(["Probationerssex_Male","Probationerssex_Notascertained"], axis=1, inplace=True)
        numericals = [col for col in df.columns if len(df[col].unique())>2 and max(df[col])>1]
        code_continuous(df,numericals, 5)
        #flip the Y lable to 0: recid, 1: no recid
        df["Y"] = [1 if x == 0 else 0 for x in df["Y"]]
        
        super(CompasDataset, self).__init__(df=df, label_name=label_name,
            favorable_classes=favorable_classes,
            protected_attribute_names=protected_attribute_names,
            privileged_classes=privileged_classes,
            instance_weights_name=instance_weights_name,
            categorical_features=categorical_features,
            features_to_keep=features_to_keep,
            features_to_drop=features_to_drop, na_values=na_values,
            custom_preprocessing=custom_preprocessing, metadata=metadata)


#### Load dataset and specify options

In [25]:
## import dataset
dataset_used = "compas" 

privileged_groups = [{'sex': 1}] #Females
unprivileged_groups = [{'sex': 0}]
        
# Metric used (should be one of allowed_metrics)
metric_name = "Equal opportunity difference"

# Upper and lower bound on the fairness metric used
metric_ub = 0.05
metric_lb = -0.05
        
#random seed for calibrated equal odds prediction
random_seed = 12345679

# Verify metric name
allowed_metrics = ["Statistical parity difference",
                   "Average odds difference",
                   "Equal opportunity difference"]
if metric_name not in allowed_metrics:
    raise ValueError("Metric name should be one of allowed metrics")

#### Split into train, test and validation

In [29]:
experiments_info = {}
bef_experiments_info = {}
budget = 0.1
for K in range(1, 6):
    dataset_orig_train= CompasDataset(path="./Huangrui/recidivism/recidivism_train{}.csv".format(K),protected_attribute_names=['sex'],
                privileged_classes=[[1]])
    dataset_orig_test= CompasDataset(path="./Huangrui/recidivism/recidivism_test{}.csv".format(K),protected_attribute_names=['sex'],
                privileged_classes=[[1]])
    #only use the budget% of the training data
    dataset_orig_train,_ = dataset_orig_train.split([budget], shuffle=False)
    # Lasso linear classifier and predictions
    X_train = dataset_orig_train.features
    y_train = dataset_orig_train.labels.ravel()
    lmod = pickle.load(open('experiments/recidivism'+str(K)+'_sex_bmodel.pkl','rb'))["clf"]
   
    y_train_pred = lmod.predict(X_train)

    dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
    #also flip the labels for the train set
    dataset_orig_train_pred.labels = (y_train_pred<0.5).reshape(-1,1)
    sigmoid = lambda x: 1 / (1 + np.exp(2-4*x))
    #also flip the score for the train set
    dataset_orig_train_pred.scores = 1- sigmoid(y_train_pred).reshape(-1,1)


    dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
    X_test = dataset_orig_test.features
    y_test = dataset_orig_test.labels
    y_test_pred = lmod.predict(X_test)
    #also flip the score for the test set
    dataset_orig_test_pred.scores = 1 -  sigmoid(y_test_pred).reshape(-1,1)
    #also flip the labels for the test set
    dataset_orig_test_pred.labels = (y_test_pred<0.5).reshape(-1,1)
    #load EOP Model
    EOP = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=random_seed)
    #fit EOP model
    EOP = EOP.fit(dataset_orig_train, dataset_orig_train_pred)
    print("Fitted the EOP model")
    # get the EOP predictions for test (Transform the test set)
    dataset_transf_test_pred = EOP.predict(dataset_orig_test_pred)
    metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                    unprivileged_groups, privileged_groups)
    metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                    unprivileged_groups, privileged_groups)

    print("K = {}, budget = {}".format(K, budget))
    print("The Error for the test dataset is {:.4}".format(np.mean(dataset_orig_test.labels!=dataset_transf_test_pred.labels)))
    print("The Equal opportunity difference for the test dataset is {:.4}".format(metric_test_aft["Equal opportunity difference"]))
    experiments_info["K = {}, budget = {}".format(K, budget)] = {"Error": np.mean(dataset_orig_test.labels!=dataset_transf_test_pred.labels), "Equal opportunity difference": metric_test_aft["Equal opportunity difference"]}
    bef_experiments_info["K = {}, budget = {}".format(K, budget)] = {"Error": np.mean(dataset_orig_test.labels!=dataset_orig_test_pred.labels), "Equal opportunity difference": metric_test_bef["Equal opportunity difference"]}

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Fitted the EOP model
Balanced accuracy = 0.6926
Statistical parity difference = -0.1409
Disparate impact = 0.8370
Average odds difference = -0.1182
Equal opportunity difference = -0.0358
Theil index = 0.1175
Balanced accuracy = 0.6905
Statistical parity difference = -0.1091
Disparate impact = 0.8689
Average odds difference = -0.0894
Equal opportunity difference = -0.0010
Theil index = 0.1217
K = 1, budget = 0.1
The Error for the test dataset is 0.2652
The Equal opportunity difference for the test dataset is -0.001045


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Fitted the EOP model
Balanced accuracy = 0.7199
Statistical parity difference = -0.1491
Disparate impact = 0.8181
Average odds difference = -0.0899
Equal opportunity difference = -0.0833
Theil index = 0.1328
Balanced accuracy = 0.6284
Statistical parity difference = -0.0485
Disparate impact = 0.9408
Average odds difference = 0.0290
Equal opportunity difference = -0.0661
Theil index = 0.1326
K = 2, budget = 0.1
The Error for the test dataset is 0.3156
The Equal opportunity difference for the test dataset is -0.06607


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Fitted the EOP model
Balanced accuracy = 0.6792
Statistical parity difference = -0.1491
Disparate impact = 0.7700
Average odds difference = -0.1183
Equal opportunity difference = -0.0689
Theil index = 0.2766
Balanced accuracy = 0.6084
Statistical parity difference = 0.0248
Disparate impact = 1.0382
Average odds difference = 0.0691
Equal opportunity difference = 0.0458
Theil index = 0.2170
K = 3, budget = 0.1
The Error for the test dataset is 0.359
The Equal opportunity difference for the test dataset is 0.04577


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Fitted the EOP model
Balanced accuracy = 0.7068
Statistical parity difference = -0.1407
Disparate impact = 0.8254
Average odds difference = -0.1110
Equal opportunity difference = -0.0463
Theil index = 0.1468
Balanced accuracy = 0.6281
Statistical parity difference = -0.0216
Disparate impact = 0.9732
Average odds difference = 0.0238
Equal opportunity difference = 0.0039
Theil index = 0.1272
K = 4, budget = 0.1
The Error for the test dataset is 0.3139
The Equal opportunity difference for the test dataset is 0.003912
Fitted the EOP model
Balanced accuracy = 0.7136
Statistical parity difference = -0.1378
Disparate impact = 0.8360
Average odds difference = -0.1183
Equal opportunity difference = -0.0640
Theil index = 0.1206
Balanced accuracy = 0.6055
Statistical parity difference = -0.0207
Disparate impact = 0.9754
Average odds difference = 0.0229
Equal opportunity difference = -0.0420
Theil index = 0.1174
K = 5, budget = 0.1
The Error for the test dataset is 0.3273
The Equal opportunity dif

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [30]:
experiments_info

{'K = 1, budget = 0.1': {'Error': 0.2652360515021459,
  'Equal opportunity difference': -0.0010445866610250265},
 'K = 2, budget = 0.1': {'Error': 0.31558608844997854,
  'Equal opportunity difference': -0.06606947269597863},
 'K = 3, budget = 0.1': {'Error': 0.3589523400601116,
  'Equal opportunity difference': 0.045772801579223366},
 'K = 4, budget = 0.1': {'Error': 0.31386861313868614,
  'Equal opportunity difference': 0.003912405586045775},
 'K = 5, budget = 0.1': {'Error': 0.327319587628866,
  'Equal opportunity difference': -0.04195758564437191}}

# 0.01 we need to seperately run it

In [34]:
experiments_info = {}
bef_experiments_info = {}

In [39]:
budget = 0.01
K = 5
dataset_orig_train= CompasDataset(path="./Huangrui/recidivism/recidivism_train{}.csv".format(K),protected_attribute_names=['sex'],
            privileged_classes=[[1]])
dataset_orig_test= CompasDataset(path="./Huangrui/recidivism/recidivism_test{}.csv".format(K),protected_attribute_names=['sex'],
            privileged_classes=[[1]])
#only use the budget% of the training data
dataset_orig_train,_ = dataset_orig_train.split([budget], shuffle=False)
# Lasso linear classifier and predictions
X_train = dataset_orig_train.features
y_train = dataset_orig_train.labels.ravel()
lmod = pickle.load(open('experiments/recidivism'+str(K)+'_sex_bmodel.pkl','rb'))["clf"]

y_train_pred = lmod.predict(X_train)

dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
#also flip the labels for the train set
dataset_orig_train_pred.labels = (y_train_pred<0.5).reshape(-1,1)
sigmoid = lambda x: 1 / (1 + np.exp(2-4*x))
dataset_orig_train_pred.scores = 1- sigmoid(y_train_pred).reshape(-1,1)


dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = dataset_orig_test.features
y_test = dataset_orig_test.labels
y_test_pred = lmod.predict(X_test)
dataset_orig_test_pred.scores = 1 -  sigmoid(y_test_pred).reshape(-1,1)
#also flip the labels for the test set
dataset_orig_test_pred.labels = (y_test_pred<0.5).reshape(-1,1)
#load EOP Model
EOP = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups, seed=random_seed)
#fit EOP model
EOP = EOP.fit(dataset_orig_train, dataset_orig_train_pred)
print("Fitted the EOP model")
# get the EOP predictions for test (Transform the test set)
dataset_transf_test_pred = EOP.predict(dataset_orig_test_pred)
metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                unprivileged_groups, privileged_groups)
metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                unprivileged_groups, privileged_groups)

#自己计算error, 不是balanced accuracy！！！
print("K = {}, budget = {}".format(K, budget))
print("The Error for the test dataset is {:.4}".format(np.mean(dataset_orig_test.labels!=dataset_transf_test_pred.labels)))
print("The Equal opportunity difference for the test dataset is {:.4}".format(metric_test_aft["Equal opportunity difference"]))
experiments_info["K = {}, budget = {}".format(K, budget)] = {"Error": np.mean(dataset_orig_test.labels!=dataset_transf_test_pred.labels), "Equal opportunity difference": metric_test_aft["Equal opportunity difference"]}
bef_experiments_info["K = {}, budget = {}".format(K, budget)] = {"Error": np.mean(dataset_orig_test.labels!=dataset_orig_test_pred.labels), "Equal opportunity difference": metric_test_bef["Equal opportunity difference"]}

Fitted the EOP model
Balanced accuracy = 0.7136
Statistical parity difference = -0.1378
Disparate impact = 0.8360
Average odds difference = -0.1183
Equal opportunity difference = -0.0640
Theil index = 0.1206
Balanced accuracy = 0.6423
Statistical parity difference = -0.2045
Disparate impact = 0.7566
Average odds difference = -0.1684
Equal opportunity difference = -0.1961
Theil index = 0.2019
K = 5, budget = 0.01
The Error for the test dataset is 0.3286
The Equal opportunity difference for the test dataset is -0.1961


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [40]:
experiments_info

{'K = 1, budget = 0.01': {'Error': 0.3094420600858369,
  'Equal opportunity difference': 0.0872395144655419},
 'K = 2, budget = 0.01': {'Error': 0.31472735079433234,
  'Equal opportunity difference': -0.2132295415427945},
 'K = 3, budget = 0.01': {'Error': 0.37741519965650494,
  'Equal opportunity difference': -0.11676845254618917},
 'K = 4, budget = 0.01': {'Error': 0.31730356376127095,
  'Equal opportunity difference': -0.1308156278867576},
 'K = 5, budget = 0.01': {'Error': 0.3286082474226804,
  'Equal opportunity difference': -0.1961174551386623}}