In [97]:
import warnings
import sys
sys.path.append('../')
warnings.filterwarnings('ignore')

In [98]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import os
from tabulate import tabulate
from texttable import Texttable

import latextable
from PrepareData import read_json, make_folder

In [99]:
# for visualization change the values to be consistent with the order that higher is better
def normalize_fairness_measures(x):
    if 'Diff' in x.iloc[0]: # difference change to 1-abs(x)
        return 1-abs(x.iloc[1])
    
    elif x.iloc[0] == 'DI':
        if x.iloc[1] > 1:
            return min(x.iloc[1], 1/x.iloc[1])
        else:
            return x.iloc[1]
    else:# other metrics
        return x.iloc[1]
def add_vis_flag(x):
    if 'Diff' in x.iloc[0]: # difference change to 1-abs(x)
        if x.iloc[0] in ['ERRDiff', 'FNRDiff', 'FPRDiff']: # measures with lower value means better
            if x.iloc[1] > 0: 
                return 0
            else: # G0 has better outcome
                return 1
        else: # for measures like eqdiff, avgoddsdiff with higher value means better
            if x.iloc[1] < 0: 
                return 0
            else: # G0 has better outcome
                return 1
    
    elif x.iloc[0] == 'DI':
        if x.iloc[1] > 1:
            return 1
        else:
            return 0
    else:# other metrics
        return 0


In [100]:
repo_dir = sys.path[0].replace('notebooks', '')
eval_path=repo_dir+ 'eval/'
# data_name = 'lsac'
# temp_df = pd.read_csv(eval_path+'res-{}.csv'.format(data_name))
# temp_df.head()

In [101]:
seeds = [1, 12345, 6, 2211, 15, 88, 121, 433, 500, 1121, 50, 583, 5278, 100000, 0xbeef, 0xcafe, 0xdead, 7777, 100, 923]
# seeds = [88, 121, 433, 500, 1121, 50, 583, 5278, 100000, 0xbeef, 0xcafe, 0xdead, 7777, 100, 923]

models = ['LR', 'TR']

datasets = ['lsac', 'cardio', 'bank', 'meps16', 'credit', 'ACSE', 'ACSP', 'ACSH', 'ACSM', 'ACSI']

In [102]:
eval_suffix = '-min-0.5'
# eval_suffix = '-sim-pos'
# eval_suffix = '-sim-both-1'
# eval_suffix = ''
eval_file = 'scc_mcc_datasets{}_n{}{}.csv'.format(len(datasets), len(seeds), eval_suffix)
if os.path.exists(eval_path+eval_file):
    eval_df = pd.read_csv(eval_path+eval_file)
    print('Read evaluation results at {}'.format(eval_path+eval_file))
else:
    eval_df = pd.DataFrame()
    for data_name in datasets:
        cur_eval_df = pd.read_csv(eval_path+'res{}-{}.csv'.format(eval_suffix, data_name))
#         print(cur_eval_df.head())
        cur_eval_df['norm_value'] = cur_eval_df[['metric', 'value']].apply(lambda x: normalize_fairness_measures(x), axis=1)
        cur_eval_df['norm_flag'] = cur_eval_df[['metric', 'value']].apply(lambda x: add_vis_flag(x), axis=1)

        eval_df = pd.concat([eval_df, cur_eval_df])
    
    eval_df.to_csv(eval_path+eval_file, index=False)
    print('Save evaluation results at {}'.format(eval_path+eval_file))

Read evaluation results at /Users/keyang/Projects/PubRepo/NonInvasiveTool4FairML/eval/scc_mcc_datasets10_n20-min-0.5.csv


In [103]:
eval_df.head()

Unnamed: 0,data,model,seed,method,group,metric,value,norm_value,norm_flag
0,lsac,LR,1,MCC-MIN,all,AUC,0.725,0.725,0
1,lsac,LR,1,MCC-MIN,all,ACC,0.779,0.779,0
2,lsac,LR,1,MCC-MIN,all,SR,0.732571,0.732571,0
3,lsac,LR,1,MCC-MIN,all,BalAcc,0.725224,0.725224,0
4,lsac,LR,1,MCC-MIN,G0,AUC,0.633,0.633,0


In [104]:
def bar_plots(df, output_name, vis_datasets, vis_metric, vis_settings, group_input=None,
              legend_names=None, font_label=26, font_legend=18, 
              colors=['#ffffff', '#fffacd', '#3cb371','#20603d', '#0e6670'], bg_color = '#f3f3f3', x_tick_offset=6.3,
              x_ticks=None, y_label=None, x_label=None, legend=True, legend_col=5, save_to_disc=True):
    
    fig, ax = plt.subplots(1, figsize=(10, 4), dpi=200)
    input_df = df.copy()
    bar_mean = []
    bar_std = []
    dash_filling = []
    line_styles = []
    x_bars = []
    ind = 0
   

    for off_i, name in enumerate(vis_datasets):
        vis_df = input_df[(input_df['data']==name) & (input_df['metric']==vis_metric) & (input_df['group']==group_input)].copy()
        for setting_i in vis_settings:
            set_df = vis_df[vis_df['method']==setting_i]
            if set_df.shape[0] > 0:
                y_values = np.array(set_df['norm_value'])
                n_reverse = sum(np.array(set_df['norm_flag']))
                if n_reverse > int(len(y_values) * 0.9): # majoirty of cases in which G0 has better outcomes
                    dash_filling.append(True)
                else:
                    dash_filling.append(False)
                
                cur_mean = np.mean(y_values)
                cur_std = np.std(y_values)
                if cur_mean == 0:
#                     print('++', name, setting_i, cur_mean)
                    cur_mean = 0.01 # for visualization purpose so that the bar exists in the plot
                    line_styles.append('solid')
                elif cur_mean == 0.5 and vis_metric == 'BalAcc' and cur_std == 0:
                    # dashed border
                    line_styles.append('dashed')
                elif cur_mean == 1.0 and vis_metric == 'DI' and cur_std == 0:
                    line_styles.append('dashed')
                else:
                    line_styles.append('solid')
                    
                bar_mean.append(cur_mean)
                bar_std.append(cur_std)
            else: # no model is returned
                dash_filling.append(False)
                line_styles.append('dashed')
                bar_mean.append(0)
                bar_std.append(0)
                
            x_bars.append(ind+off_i*2)
                
            ind += 0.83
    bplot = ax.bar(x_bars, bar_mean, yerr=bar_std)
#     print('-->', vis_metric, bar_mean)
    
    n_bars = len(vis_settings)
    for idx, patch in enumerate(bplot):
        patch.set_facecolor(colors[idx % n_bars])
        
        if dash_filling[idx]:
            patch.set_hatch('//')
            patch.set_edgecolor("#cb4154")
        else:
            patch.set_edgecolor("black")
        patch.set_linestyle(line_styles[idx])
        
    if legend_names:
        legends = legend_names
    else:
        legends = vis_settings
    # add labels for settings 
    for idx, setting_i, color_i, legend_i in zip(range(len(vis_settings)), vis_settings, colors, legends):
        ax.bar(-2, 1, ec='black', fc=color_i, label=legend_i)

    ax.set_facecolor(bg_color)
    ax.yaxis.grid(True)
    plt.xlim([-1, max(x_bars)+1])
    plt.xticks([(x-1)*x_tick_offset+0.8 for x in range(1, len(vis_datasets)+1)])
    if x_ticks:
        locs, labels=plt.xticks();
        plt.xticks(locs, x_ticks, horizontalalignment='center', fontsize=font_label-10, rotation=0);

    plt.ylim([0.0, 1.0])
    plt.yticks(fontsize=font_label);

    if y_label:
        plt.ylabel(y_label, fontsize=font_label)

    if x_label:
        plt.xlabel(x_label, fontsize=font_label)

    if legend:
        plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower center", mode="expand", ncol=legend_col, frameon=False, borderaxespad=0, handlelength=0.9, handletextpad=0.3, fontsize=font_label-7)

    if save_to_disc:
        plt.savefig(output_name, bbox_inches="tight")
        print('Bar plot is saved at ', output_name)
        plt.close()

In [105]:
plot_path = repo_dir+ 'intermediate/plots/'
if 'sort_' in eval_suffix or 'min_' in eval_suffix:
    plot_path = plot_path + 'err/'
    make_folder(plot_path)

In [106]:
eval_df.head(10)

Unnamed: 0,data,model,seed,method,group,metric,value,norm_value,norm_flag
0,lsac,LR,1,MCC-MIN,all,AUC,0.725,0.725,0
1,lsac,LR,1,MCC-MIN,all,ACC,0.779,0.779,0
2,lsac,LR,1,MCC-MIN,all,SR,0.732571,0.732571,0
3,lsac,LR,1,MCC-MIN,all,BalAcc,0.725224,0.725224,0
4,lsac,LR,1,MCC-MIN,G0,AUC,0.633,0.633,0
5,lsac,LR,1,MCC-MIN,G0,ACC,0.614,0.614,0
6,lsac,LR,1,MCC-MIN,G0,SR,0.297794,0.297794,0
7,lsac,LR,1,MCC-MIN,G0,BalAcc,0.632847,0.632847,0
8,lsac,LR,1,MCC-MIN,G1,AUC,0.72,0.72,0
9,lsac,LR,1,MCC-MIN,G1,ACC,0.808,0.808,0


In [107]:
eval_df.query('data=="lsac" and seed==1 and model=="LR" and group=="G0" and method=="OMN-ONE"').head(10)

Unnamed: 0,data,model,seed,method,group,metric,value,norm_value,norm_flag
144,lsac,LR,1,OMN-ONE,G0,AUC,0.5,0.5,0
145,lsac,LR,1,OMN-ONE,G0,ACC,0.544,0.544,0
146,lsac,LR,1,OMN-ONE,G0,SR,1.0,1.0,0
147,lsac,LR,1,OMN-ONE,G0,BalAcc,0.5,0.5,0


In [113]:
# first_cols = ['size', '# of attributes (numer./ categ.)', 'minority group U', 'population of U', '% of positive labels in U', 'predictive task']
rows = [['model', 'method'] + datasets]
# setting = 'err'
group_input = 'G0'
cur_metric = 'SR'

err_df = pd.DataFrame(columns=['model', 'method'] + datasets)
# res_path = 'intermediate/models/'
for model_i in ['LR', 'TR']:

    if model_i == 'LR':
        methods = ['MCC-MIN', 'MCC-W1', 'MCC-W2', 'SEP', 'OMN-ONE', 'SCC-KAM']
    else:
        methods = ['MCC-MIN', 'MCC-W1', 'MCC-W2', 'SEP', 'OMN-ONE', 'SCC-KAM', 'CAP-ONE']
    for method_i in methods:
        cur_row = [model_i, method_i]
        for data_name in datasets:
            
            cur_df = eval_df.query('data=="{}" and model=="{}" and method=="{}"'.format(data_name, model_i, method_i))
            cur_sr = cur_df.query('metric == "{}" and group =="{}"'.format(cur_metric, group_input))['norm_value']
            cur_balacc = cur_df.query('metric == "BalAcc" and group =="all"')['norm_value']
            

            if cur_balacc.mean() == 0.5:
                cur_sr_res = 0
                print('BalAcc 0.5', data_name, model_i, method_i)
            else:
                cur_sr_res = cur_sr.mean()
#             cur_sr_res = []
#             for x, y, null_i in zip(cur_sr, cur_balacc, cur_sr.isnull()):  
#                 if y != 0.5 and not null_i:
#                     cur_sr_res.append(x)
#                 else:
#                     cur_sr_res.append(0)
#             print(cur_sr_res)
            cur_row.append('{}%'.format(round(np.mean(cur_sr_res)*100, 1)))
            
        err_df.loc[err_df.shape[0]] = cur_row
        rows.append(cur_row)
# sim_df.to_csv('eval/sim_{}_{}.csv'.format(setting, model_i), index=False)
#             print(cur_row)
#             break

BalAcc 0.5 lsac LR OMN-ONE
BalAcc 0.5 cardio LR OMN-ONE
BalAcc 0.5 cardio LR SCC-KAM


In [114]:
err_df

Unnamed: 0,model,method,lsac,cardio,bank,meps16,credit,ACSE,ACSP,ACSH,ACSM,ACSI
0,LR,MCC-MIN,23.5%,29.1%,38.6%,24.2%,52.4%,47.4%,50.3%,38.8%,38.1%,49.1%
1,LR,MCC-W1,53.4%,20.2%,20.0%,34.7%,21.3%,79.5%,89.8%,95.0%,39.3%,72.5%
2,LR,MCC-W2,47.1%,37.3%,20.0%,24.8%,20.5%,96.6%,57.8%,90.0%,39.3%,80.1%
3,LR,SEP,17.1%,27.3%,33.4%,25.3%,16.0%,48.3%,48.5%,32.7%,35.1%,57.7%
4,LR,OMN-ONE,0.0%,0.0%,41.2%,71.0%,27.5%,54.8%,97.6%,100.0%,40.0%,47.3%
5,LR,SCC-KAM,15.6%,0.0%,33.9%,22.3%,31.7%,47.6%,46.8%,48.1%,40.0%,56.2%
6,TR,MCC-MIN,31.5%,31.7%,37.2%,24.4%,36.8%,46.8%,43.3%,68.6%,60.2%,49.4%
7,TR,MCC-W1,50.4%,29.9%,95.0%,39.0%,61.0%,73.3%,90.4%,98.4%,62.3%,97.3%
8,TR,MCC-W2,30.3%,32.5%,95.0%,25.7%,27.0%,96.6%,43.3%,95.7%,62.3%,99.9%
9,TR,SEP,37.0%,26.1%,29.9%,27.0%,31.5%,47.3%,42.3%,69.2%,49.8%,53.4%


In [115]:

# rows = [['Rocket', 'Organisation', 'LEO Payload (Tonnes)', 'Maiden Flight'],
#         ['Saturn V', 'NASA', '140', '1967'],
#         ['Space Shuttle', 'NASA', '24.4', '1981'],
#         ['Falcon 9 FT-Expended', 'SpaceX', '22.8', '2017'],
#         ['Ariane 5 ECA', 'ESA', '21', '2002']]


table = Texttable()
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES)
table.add_rows(rows)

# print('Tabulate Table:')
# print(tabulate(rows, headers='firstrow'))

# print('\nTexttable Table:')
# print(table.draw())

print('\nTabulate Latex:')
print(tabulate(rows, headers='firstrow', tablefmt='latex'))


Tabulate Latex:
\begin{tabular}{llllllllllll}
\hline
 model   & method   & lsac   & cardio   & bank   & meps16   & credit   & ACSE   & ACSP   & ACSH   & ACSM   & ACSI   \\
\hline
 LR      & MCC-MIN  & 23.5\%  & 29.1\%    & 38.6\%  & 24.2\%    & 52.4\%    & 47.4\%  & 50.3\%  & 38.8\%  & 38.1\%  & 49.1\%  \\
 LR      & MCC-W1   & 53.4\%  & 20.2\%    & 20.0\%  & 34.7\%    & 21.3\%    & 79.5\%  & 89.8\%  & 95.0\%  & 39.3\%  & 72.5\%  \\
 LR      & MCC-W2   & 47.1\%  & 37.3\%    & 20.0\%  & 24.8\%    & 20.5\%    & 96.6\%  & 57.8\%  & 90.0\%  & 39.3\%  & 80.1\%  \\
 LR      & SEP      & 17.1\%  & 27.3\%    & 33.4\%  & 25.3\%    & 16.0\%    & 48.3\%  & 48.5\%  & 32.7\%  & 35.1\%  & 57.7\%  \\
 LR      & OMN-ONE  & 0.0\%   & 0.0\%     & 41.2\%  & 71.0\%    & 27.5\%    & 54.8\%  & 97.6\%  & 100.0\% & 40.0\%  & 47.3\%  \\
 LR      & SCC-KAM  & 15.6\%  & 0.0\%     & 33.9\%  & 22.3\%    & 31.7\%    & 47.6\%  & 46.8\%  & 48.1\%  & 40.0\%  & 56.2\%  \\
 TR      & MCC-MIN  & 31.5\%  & 31.7\%    & 37

In [None]:
# TODO get the table for SR for all the datasets compareing MCC best variants with SEP, SCC+K, OMN, CAP for the usefulness of MCC

group_input = eval_suffix.split('-')[1].split('_')[1].upper()

exp_datasets = ['credit', 'cardio', 'meps16', 'lsac', 'bank', 'ACSH', 'ACSP', 'ACSI', 'ACSE', 'ACSM']
exp_ticks = ['Credit', 'Cardio', 'MEPS', 'LSAC', 'Bank', 'ACSH', 'ACSP', 'ACSI', 'ACSE', 'ACSM']

mcc_setttings = ['SEP', 'MCC-MIN', 'MCC-W1', 'MCC-W2', 'SCC-KAM']
mcc_legends = ['SEP', 'MCC-MIN', 'MCC-W1', 'MCC-W2', 'SCC+K']
mcc_colors = ['#ffffff', '#fffacd', '#3cb371','#20603d', '#0e6670', '#2e8b57']

scc_settings = ['ORIG', 'OMN-ONE', 'KAM-ONE', 'SCC-ONE', 'SCC-KAM']
scc_legends = ['ORIG', 'OMN', 'KAM', 'SCC', 'SCC+K']
scc_colors = ['#ffffff', '#fffacd', '#f4ca16', '#9dc209', '#2e8b57']

# eval_metrics = ['BalAcc', 'DI', 'AvgOddsDiff', 'EQDiff', 'FPRDiff', 'FNRDiff', 'ERRDiff']

eval_metrics = ['SR', 'BalAcc']

for model_name in models:
    if model_name == 'TR':
        scc_colors = ['#ffffff', '#fffacd', '#ffc0cb', '#f4ca16', '#9dc209', '#2e8b57'] # '#ffdab9', '#006400'
        scc_settings = ['ORIG', 'OMN-ONE', 'CAP-ONE', 'KAM-ONE', 'SCC-ONE', 'SCC-KAM']
        scc_legends = ['ORIG', 'OMM', 'CAP', 'KAM', 'SCC', 'SCC+K']
        
        x_tick_sets = [7.2, 7.2]
    else:
        x_tick_sets = [7.2, 6.3]
    vis_df = eval_df.query('model=="{}"'.format(model_name))
    exp_path = plot_path+model_name+'/'+group_input + '/'
    make_folder(exp_path)
    
    for settings_i, colors_i, setting_name, legend_i, x_tick_set in zip([mcc_setttings, scc_settings], [mcc_colors, scc_colors], ['multi', 'single'], [mcc_legends, scc_legends], x_tick_sets):
#         print(vis_df.shape[0])
        for exp_metric in eval_metrics:
            output_name = '{}{}-{}-{}{}.png'.format(exp_path, model_name, setting_name, exp_metric, eval_suffix)
            bar_plots(vis_df, output_name, exp_datasets, exp_metric, settings_i, group_input=group_input, x_ticks=exp_ticks, colors=colors_i, 
                      legend_names=legend_i, x_tick_offset=x_tick_set,
                      legend_col=len(settings_i),
                      save_to_disc=True)

            