In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from helpers.aequitas_methods import *
import warnings
warnings.filterwarnings(action='ignore')

# Read predictions #

In [30]:
###### german credit data ######
pred_forest = pd.read_csv('predictions/predictions_RandomForest.csv')
pred_lr = pd.read_csv('predictions/predictions_LogisticRegression.csv')
pred_tree = pd.read_csv('predictions/predictions_DecisionTree.csv')
pred_cat = pd.read_csv('predictions/predictions_CatBoost.csv')
pred_cat_intersect = pd.read_csv('predictions/predictions_catboost_intersect.csv')
pred_cat_postprocess = pd.read_csv('predictions/predictions_catboost_postprocess.csv')
####### medical data ########
pred_med_forest = pd.read_csv('predictions/predictions_med_data_RandomForest.csv')
pred_med_lr = pd.read_csv('predictions/predictions_med_data_LogisticRegression.csv')
pred_med_tree = pd.read_csv('predictions/predictions_med_data_DecisionTree.csv')
pred_med_cat = pd.read_csv('predictions/predictions_med_data_Catboost.csv')

In [31]:
predictions_ds = {
    0: pred_forest, # predictions from random forest classifier
    1: pred_lr,     # predictions from logisic regression
    2: pred_tree,   # predictions from decision tree classifier
    3: pred_cat,    # predictions from catboost classifier
    4: pred_cat_intersect,   #predictions from catboost classifier with intersectional columns 
    5: pred_med_cat,
    6: pred_med_lr,
    7: pred_med_tree,
    8: pred_med_forest,
    9: pred_cat_postprocess
}

In [32]:
ds_id = 3
predictions = predictions_ds[ds_id]

In [33]:
predictions.head()

Unnamed: 0.1,Unnamed: 0,acc_status,acc_duration,credit_history,purpose,credit_amount,savings_acc,employment_time,installment_rate,gender,...,age,installment_plans,housing,num_credits_at_bank,job,num_people_maintenance,phone,foreigner,label_value,score
0,0,3,21,1,1,12680,4,1,4,1,...,1,1,0,1,0,1,1,1,0,1
1,1,3,30,1,7,3077,4,1,3,1,...,2,1,1,2,1,2,1,1,1,1
2,2,2,60,0,0,7297,3,1,4,1,...,1,1,2,1,1,1,0,1,0,0
3,3,0,24,0,5,3069,0,1,4,1,...,1,1,0,1,1,1,0,1,1,1
4,4,2,30,1,2,10623,3,1,3,1,...,1,1,0,3,0,2,1,1,1,0


In [34]:
predictions["age"].value_counts()

age
1    75
0    65
2    60
Name: count, dtype: int64

In [35]:
def get_majority_classes(series):
    """
    Majority classes are considered to be those with higher empirical_distribution 
    in comparisson to related attribute equiprobability
    """
    if series.dtype == object or series.dtype == 'category':
        unique_classes, class_counts = np.unique(series, return_counts=True)
        empirical_distribution = class_counts / class_counts.sum()
        
        eqp = 1 / len(class_counts) # equiprobability
        
        result = {unique_classes[i]:x for i, x in enumerate(empirical_distribution) if x > eqp}
        result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
        return list(result.keys())[0] #only get one majority class -> the one with the highest emperical distribution      
    else:
        return np.nan

In [36]:
if ds_id == 4:
    sensitive_attributes = ['[40+]-male', '[40+]-less_one',
       '[40+]-four_to_seven', '[40+]-one_to_four', '[40+]-greater_eq_seven',
       '[40+]-unemployed', '[30-40]-female', '[30-40]-male',
       '[30-40]-less_one', '[30-40]-four_to_seven', '[30-40]-one_to_four',
       '[30-40]-greater_eq_seven', '[30-40]-unemployed', '[19-30]-female',
       '[19-30]-male', '[19-30]-less_one', '[19-30]-four_to_seven',
       '[19-30]-one_to_four', '[19-30]-greater_eq_seven', '[19-30]-unemployed',
       'female-less_one', 'female-four_to_seven', 'female-one_to_four',
       'female-greater_eq_seven', 'female-unemployed', 'male-less_one',
       'male-four_to_seven', 'male-one_to_four', 'male-greater_eq_seven',
       'male-unemployed']
elif 5 <= ds_id <= 8: # medical dataset 
    sensitive_attributes = ["Age Range", "Patient Gender"]
elif ds_id ==9:
    sensitive_attributes = ["age"]
else:
    sensitive_attributes = ["age", "gender"]
predictions[sensitive_attributes] = predictions[sensitive_attributes].astype(str)
reference_groups = {}
for attr in sensitive_attributes:
    result = get_majority_classes(predictions[attr])
    print("Majority class for: ", attr, " is:",result)
    reference_groups[attr] = result[0]

Majority class for:  age  is: 1
Majority class for:  gender  is: 1


# Aequitas - fairness report #

In [37]:
df_test_no_features = predictions[["score", "label_value"] + sensitive_attributes]
"""
1. Calculate metrics for each group within the sensitive attributes
"""
xtab, absolute_metrics  = init_group_and_get_metrics(df_test_no_features, attr_cols=sensitive_attributes)
xtab[['attribute_name', 'attribute_value'] + absolute_metrics].round(2)

Unnamed: 0,attribute_name,attribute_value,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
0,age,0,0.66,0.61,0.75,0.47,0.19,0.25,0.39,0.53,0.81,0.24,0.48,0.63
1,age,1,0.77,0.8,0.67,0.55,0.09,0.33,0.2,0.45,0.91,0.41,0.71,0.8
2,age,2,0.83,0.89,0.67,0.33,0.11,0.33,0.11,0.67,0.89,0.35,0.75,0.75
3,gender,0,0.71,0.64,0.92,0.54,0.04,0.08,0.36,0.46,0.96,0.19,0.5,0.75
4,gender,1,0.77,0.82,0.64,0.43,0.14,0.36,0.18,0.57,0.86,0.81,0.69,0.72


In [38]:
xtab[[col for col in xtab.columns if col not in absolute_metrics]]

Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
0,0,binary 0/1,129,age,0,31,34,6,16,18,25,41,24,65,200
1,0,binary 0/1,129,age,1,53,22,5,12,10,48,60,15,75,200
2,0,binary 0/1,129,age,2,45,15,5,5,10,40,45,15,60,200
3,0,binary 0/1,129,gender,0,24,24,1,13,11,23,36,12,48,200
4,0,binary 0/1,129,gender,1,105,47,15,20,27,90,110,42,152,200


In [39]:
"""
2. Calculate disparity metrics for each minority group wrt the majority group.
"""
b, bdf = init_bias_and_print_metrics(xtab=xtab, dict=reference_groups, df_no_features=df_test_no_features)
bdf[['attribute_name', 'attribute_value'] +
    b.list_disparities(bdf)+ b.list_significance(bdf)].round(2)

[]


Unnamed: 0,attribute_name,attribute_value,fdr_disparity,fnr_disparity,for_disparity,fpr_disparity,npv_disparity,ppr_disparity,pprev_disparity,precision_disparity,...,fdr_significance,fnr_significance,for_significance,fpr_significance,npv_significance,ppr_significance,pprev_significance,precision_significance,tnr_significance,tpr_significance
0,age,0,2.05,1.95,0.86,0.75,1.16,0.58,0.67,0.89,...,False,False,False,False,False,True,True,False,False,False
1,age,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,False
2,age,2,1.18,0.56,0.61,1.0,1.47,0.85,1.06,0.98,...,False,False,False,False,False,False,False,False,False,False
3,gender,0,0.29,1.99,1.27,0.23,0.8,0.23,0.72,1.12,...,False,False,False,False,False,True,True,False,False,False
4,gender,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,False


In [42]:
"""
3. Check if parity is met. By default, an attribute satisfies the parity if the metric value for the minority group lies between [0.8, 1.2]
"""
gaf,fdf, overall_fairness = init_fairness_and_print_results(bdf)
gaf

Unnamed: 0,model_id,score_threshold,attribute_name,Statistical Parity,Impact Parity,FDR Parity,FPR Parity,FOR Parity,FNR Parity,TPR Parity,TNR Parity,NPV Parity,Precision Parity,TypeI Parity,TypeII Parity,Equalized Odds,Unsupervised Fairness,Supervised Fairness
0,0,binary 0/1,age,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False
1,0,binary 0/1,gender,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False


In [28]:
overall_fairness

{'Unsupervised Fairness': False,
 'Supervised Fairness': False,
 'Overall Fairness': False}