In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import os
from pipeline.utils import read_json, make_folder


## Read results from disc

In [3]:
datasets = ['adult', 'german', 'compas', 'cardio', 'bank', 'meps16', 'lawgpa', 'credit', 'UFRGS']
y_cols = ['Income Binary', 'credit', 'two_year_recid'] + ['Y' for i in range(6)]
sensi_cols = ['sex', 'age', 'race'] + ['C0' for i in range(6)]

seeds = [1, 12345, 6, 2211, 15, 88, 121, 433, 500, 1121] + [50, 583, 5278, 100000, 0xbeef, 0xcafe, 0xdead,
                                                                0xdeadcafe, 0xdeadbeef, 0xbeefcafe]

In [4]:
# extracting results from MultiCC and multi-model
exp_setting = 'Multi_eval'
exp_suffix = 'orig'

res_path = 'intermediate/models/'
group_eval_metrics = ['AUC', 'ACC', 'SR', 'BalAcc']
overall_metrics = ['BalAcc', 'DI', 'EQDiff', 'AvgOddsDiff', 'SPDiff']
settings = ['A', 'A0', 'A1', 'E', 'F']

eval_path = 'intermediate/evals/'

eval_file = exp_setting+'_data'+str(len(datasets))+'_n'+str(len(seeds))+'.csv'


all_eval_files = list(filter(lambda x: not os.path.isdir(os.path.join(eval_path, x)), os.listdir(eval_path)))
if not eval_file in all_eval_files:
    multi_df = pd.DataFrame(columns=['data', 'seed', 'setting', 'group', 'metric', 'value'])

    for data_name in datasets:
        for seed in seeds:
            eval_res = read_json(res_path+data_name+'/'+'-'.join([exp_setting, str(seed), exp_suffix]))
            for setting in settings:
                for group in ['all', 'G0', 'G1']:
                    base = [data_name, seed, setting, group]
                    for metric_i in group_eval_metrics:
                        multi_df.loc[multi_df.shape[0]] = base + [metric_i, eval_res[setting][group][metric_i]]
                for metric_i in overall_metrics:
                    multi_df.loc[multi_df.shape[0]] = [data_name, seed, setting, 'all'] + [metric_i, eval_res[setting]['all'][metric_i]]

    multi_df.to_csv(eval_path+eval_file, index=False)
    print('Save evaluation results at {}'.format(eval_path+eval_file))
else:
    multi_df = pd.read_csv(eval_path+eval_file)
    print('Read evaluation results from {}'.format(eval_path+eval_file))
    

Read evaluation results from intermediate/evals/Multi_eval_data9_n20.csv


In [5]:
# extracting results from SingleCC and no intervention
exp_setting = 'Single_eval'
exp_suffix = 'SingleCC'

res_path = 'intermediate/models/'
group_eval_metrics = ['AUC', 'ACC', 'SR', 'BalAcc']
overall_metrics = ['BalAcc', 'DI', 'EQDiff', 'AvgOddsDiff', 'SPDiff']
settings = ['A', 'C']

eval_path = 'intermediate/evals/'

eval_file = exp_setting+'_data'+str(len(datasets))+'_n'+str(len(seeds))+'.csv'


all_eval_files = list(filter(lambda x: not os.path.isdir(os.path.join(eval_path, x)), os.listdir(eval_path)))
if not eval_file in all_eval_files:
    single_df = pd.DataFrame(columns=['data', 'seed', 'setting', 'group', 'metric', 'value'])

    for data_name in datasets:
        for seed in seeds:
            eval_res = read_json(res_path+data_name+'/'+'-'.join([exp_setting, str(seed), exp_suffix]))
            for setting in settings:
                for group in ['all', 'G0', 'G1']:
                    base = [data_name, seed, setting, group]
                    for metric_i in group_eval_metrics:
                        single_df.loc[single_df.shape[0]] = base + [metric_i, eval_res[setting][group][metric_i]]
                for metric_i in overall_metrics:
                    single_df.loc[single_df.shape[0]] = [data_name, seed, setting, 'all'] + [metric_i, eval_res[setting]['all'][metric_i]]

    single_df.to_csv(eval_path+eval_file, index=False)
    print('Save evaluation results at {}'.format(eval_path+eval_file))
else:
    single_df = pd.read_csv(eval_path+eval_file)
    print('Read evaluation results from {}'.format(eval_path+eval_file))
    

Read evaluation results from intermediate/evals/Single_eval_data9_n20.csv


In [6]:
eval_df = pd.concat([multi_df, single_df])
eval_df.head()

Unnamed: 0,data,seed,setting,group,metric,value
0,adult,1,A,all,AUC,0.541
1,adult,1,A,all,ACC,0.305
2,adult,1,A,all,SR,0.938037
3,adult,1,A,all,BalAcc,0.540552
4,adult,1,A,G0,AUC,0.545


In [7]:
# for visualization change the values to be consistent with the order that higher is better
def normalize_fairness_measures(x):
    if x.iloc[0] in ['EQDiff', 'AvgOddsDiff', 'SPDiff']: # difference change to 1-abs(x)
        if x.iloc[1] < 0:
            return 1-abs(x.iloc[1])
        else:
            return 1-x.iloc[1]
    
    elif x.iloc[0] == 'DI':
        if x.iloc[1] > 1:
            return min(x.iloc[1], 1/x.iloc[1])
        else:
            return x.iloc[1]
    else:# other metrics
        return x.iloc[1]
def add_vis_flag(x):
    if x.iloc[0] in ['EQDiff', 'AvgOddsDiff', 'SPDiff']: # difference change to 1-abs(x)
        if x.iloc[1] < 0:
            return 1
        else:
            return 0
    
    elif x.iloc[0] == 'DI':
        if x.iloc[1] > 1:
            return 1
        else:
            return 0
    else:# other metrics
        return 0


In [8]:
eval_df['norm_value'] = eval_df[['metric', 'value']].apply(lambda x: normalize_fairness_measures(x), axis=1)
eval_df['norm_flag'] = eval_df[['metric', 'value']].apply(lambda x: add_vis_flag(x), axis=1)

In [9]:
eval_df.head()

Unnamed: 0,data,seed,setting,group,metric,value,norm_value,norm_flag
0,adult,1,A,all,AUC,0.541,0.541,0
1,adult,1,A,all,ACC,0.305,0.305,0
2,adult,1,A,all,SR,0.938037,0.938037,0
3,adult,1,A,all,BalAcc,0.540552,0.540552,0
4,adult,1,A,G0,AUC,0.545,0.545,0


In [10]:
eval_df.to_csv('intermediate/evals/Exp1_data'+str(len(datasets))+'_n'+str(len(seeds))+'.csv', index=False)

In [11]:
eval_df.shape

(3213, 8)