In [1]:
import sklearn.metrics
import numpy as np
import pandas as pd
import gc

from transparentai.datasets import load_adult, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from transparentai.models import classification

import transparentai.fairness as fairness

In [2]:
data = load_adult()
X, Y = data.drop(columns='income'), data['income']
X = X.select_dtypes('number')
Y = Y.replace({'>50K':1, '<=50K':0})
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.33, random_state=42)
clf = RandomForestClassifier().fit(X_train,Y_train)
''

''

In [3]:
y_true = Y_train
y_true_valid = Y_valid
y_pred = clf.predict_proba(X_train)
y_pred_valid = clf.predict_proba(X_valid)

In [4]:
privileged_group = {
#     'gender':['Male']              
#     'marital-status': lambda x: 'Married' in x,
    'race':['White']
}

df_valid = data.loc[X_valid.index,:]
df_train = data.loc[X_train.index,:]

res_train1 = fairness.compute_fairness_metrics(y_true, 
                                     y_pred, 
                                     df_train,
                                     privileged_group)

res_valid1 = fairness.compute_fairness_metrics(y_true_valid, 
                                     y_pred_valid, 
                                     df_valid,
                                     privileged_group)

In [5]:
res_train1

{'race': {'statistical_parity_difference': -0.09440105444246027,
  'disparate_impact': 0.6284450909465391,
  'equal_opportunity_difference': 0.002317519974318505,
  'average_odds_difference': 0.0010454175477514654,
  'theil_index': 0.000955280843598188}}

In [6]:
from aif360.algorithms import preprocessing

In [28]:
from aif360.datasets import StandardDataset
from sklearn.preprocessing import LabelEncoder

def df_to_StandardDataset(df, target, privileged_group, pos_label):
    """
    """
    privileged_classes = [v for (k,v) in privileged_group.items()]
    
    
    df[target] = (df[target] == pos_label).astype(int)
    
    num_feats = df.select_dtypes('number').columns.tolist()
    
    protected_attribute_names = list(privileged_group.keys())
    privileged_classes = list()

    # Use Label Encoder for categorical columns (including target column)
    for attr, values in privileged_group.items():
        if attr in num_feats:
            privileged_classes.append(values)
            continue
        
        le = LabelEncoder()
        le.fit(df[attr])

        df[attr] = le.transform(df[attr])
        if type(values) == type(lambda x:x):
            fn = values
            tmp = [i for (i,v) in enumerate(le.classes_) if fn(v)]
            
        else:
            tmp = [np.where(le.classes_ == v)[0][0] for v in values]
        privileged_classes.append(tmp)
        
        num_feats += [attr]
            
    
    categorical_features = [c for c in df.columns if c not in num_feats]
    
    return StandardDataset(df=df, 
                   label_name=target, 
                   favorable_classes=[1],
                   categorical_features=categorical_features,
                   protected_attribute_names=protected_attribute_names,
                   privileged_classes=privileged_classes)
    
privileged_group = {
    'gender':['Male'],          
    'marital-status': lambda x: 'Married' in x,
    'race':['White']
}

    

gc.collect()
test = df_to_StandardDataset(data.copy(), 
                         'income', 
                         privileged_group,
                         '>50K')

print(test.protected_attribute_names)
print(test.privileged_protected_attributes)

['gender', 'marital-status', 'race']
[array([1.]), array([1., 2., 3.]), array([4.])]


In [31]:
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import LFR, Reweighing
from aif360.algorithms.inprocessing import PrejudiceRemover
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing, EqOddsPostprocessing, RejectOptionClassification

In [32]:
unprivileged_groups = [{a:test.unprivileged_protected_attributes[i]} for (i,a) in enumerate(test.protected_attribute_names)]
privileged_groups = [{a:test.privileged_protected_attributes[i]} for (i,a) in enumerate(test.protected_attribute_names)]

RW = Reweighing(unprivileged_groups, privileged_groups)
# RW.fit(data_orig_train)
data_transf_train = RW.fit_transform(test)

In [34]:
data_transf_train.instance_weights

array([1.01549893, 1.01549893, 0.95372367, ..., 0.86725774, 1.01549893,
       2.08883832])

In [35]:
test.instance_weights

array([1., 1., 1., ..., 1., 1., 1.])

In [37]:
debiased_model = PrejudiceRemover(sensitive_attr=test.protected_attribute_names[0], eta = 25.0)
debiased_model.fit(test)

KeyboardInterrupt: 