In [1]:
import warnings
import sys
sys.path.append('../')
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import os
from PrepareData import read_json, make_folder

In [3]:
repo_dir = sys.path[0].replace('notebooks', '')
eval_path=repo_dir+ 'eval/'

In [4]:
seeds = [1, 12345, 6, 2211, 15, 88, 121, 433, 500, 1121, 50, 583, 5278, 100000, 0xbeef, 0xcafe, 0xdead, 7777, 100, 923]

datasets = ['lsac', 'cardio', 'bank', 'meps16', 'credit', 'ACSE', 'ACSP', 'ACSH', 'ACSM', 'ACSI']

In [11]:

eval_suffix = 'time'

eval_file = '{}_data{}_n{}-lr.csv'.format(eval_suffix, len(datasets), len(seeds))
if os.path.exists(eval_path+eval_file):
    eval_df = pd.read_csv(eval_path+eval_file)
else:
    eval_df = pd.DataFrame()
    for data_name in datasets:
        cur_eval_df = pd.read_csv(eval_path+'{}-lr-{}.csv'.format(eval_suffix, data_name))

        eval_df = pd.concat([eval_df, cur_eval_df])
    
    eval_df.to_csv(eval_path+eval_file, index=False)
    print('Save evaluation results at {}'.format(eval_path+eval_file))

Save evaluation results at /Users/keyang/Projects/PubRepo/NonInvasiveTool4FairML/eval/time_data10_n20-lr.csv


In [12]:
eval_df.head()

Unnamed: 0,data,model,seed,method,step,time
0,lsac,LR,1,SCC-KAM,prep,21.299728
1,lsac,LR,1,SCC-KAM,fairmodel,48.461404
2,lsac,LR,1,OMN-ONE,prep,0.0
3,lsac,LR,1,OMN-ONE,fairmodel,61.527628
4,lsac,LR,1,KAM-ONE,prep,0.0


In [13]:
eval_df[eval_df['step']=='fairmodel']

Unnamed: 0,data,model,seed,method,step,time
1,lsac,LR,1,SCC-KAM,fairmodel,48.461404
3,lsac,LR,1,OMN-ONE,fairmodel,61.527628
5,lsac,LR,1,KAM-ONE,fairmodel,7.280181
7,lsac,LR,1,CAP-ONE,fairmodel,9.508113
9,lsac,LR,12345,SCC-KAM,fairmodel,58.216312
...,...,...,...,...,...,...
151,ACSI,LR,100,CAP-ONE,fairmodel,33.148107
153,ACSI,LR,923,SCC-KAM,fairmodel,42.286070
155,ACSI,LR,923,OMN-ONE,fairmodel,1901.723204
157,ACSI,LR,923,KAM-ONE,fairmodel,32.878697


In [14]:
def bar_plots(df, output_name, vis_datasets, vis_settings,
              legend_names=None, font_label=26, font_legend=18, 
              colors=['#ffffff', '#fffacd', '#3cb371','#20603d', '#0e6670'], bg_color = '#f3f3f3', x_tick_offset=6.3,
              x_ticks=None, y_label=None, x_label=None, legend=True, legend_col=5, save_to_disc=True):
    
    fig, ax = plt.subplots(1, figsize=(10, 4), dpi=200)
    input_df = df.copy()
    bar_mean = []
    bar_std = []
    dash_filling = []
    line_styles = []
    x_bars = []
    ind = 0
    bottom_bar_mean = []
    bottom_bar_std = []
        
    for off_i, name in enumerate(vis_datasets):
        vis_df = df[df['data']==name]
        for setting_i in vis_settings:
            set_df = vis_df[vis_df['method']==setting_i]
            bottom_df = set_df[set_df['step']=='prep']
            up_df = set_df[set_df['step']=='fairmodel']
#             if sum(set_df['time'].isnull()) > 0:
#                 print(name, setting_i)
#                 pass
            
#             else:
            y_values = np.array(up_df['time'])
            bottom_y = np.array(bottom_df['time'])
        
            cur_mean = np.mean(y_values)
            cur_std = np.std(y_values)

            bar_mean.append(cur_mean)
            bar_std.append(cur_std)
            bottom_bar_mean.append(np.mean(bottom_y))
            bottom_bar_std.append(np.std(bottom_y))
#             else: # no model is returned
#                 dash_filling.append(False)
#                 line_styles.append('dashed')
#                 bar_mean.append(0)
#                 bar_std.append(0)
                
            x_bars.append(ind+off_i*2)
                
            ind += 0.83
            
    bottom_bplot = ax.bar(x_bars, bottom_bar_mean, yerr=bottom_bar_std)
    
    bplot = ax.bar(x_bars, bar_mean, yerr=bar_std, bottom=bottom_bar_mean)
    
#     print('-->', vis_metric, bar_mean)
    
    n_bars = len(vis_settings)
    for idx, patch in enumerate(bottom_bplot):
        patch.set_facecolor('grey')
#         patch.set_hatch('\\')
        patch.set_edgecolor("black")

    for idx, patch in enumerate(bplot):
        patch.set_facecolor(colors[idx % n_bars])
        patch.set_edgecolor("black")
        
    if legend_names:
        legends = legend_names
    else:
        legends = vis_settings
    # add labels for settings 
    for idx, setting_i, color_i, legend_i in zip(range(len(vis_settings)), vis_settings, colors, legends):
        ax.bar(-2, 1, ec='black', fc=color_i, label=legend_i)

    ax.set_facecolor(bg_color)
    ax.yaxis.grid(True)
    plt.xlim([-1, max(x_bars)+1])
    plt.xticks([(x-1)*x_tick_offset+0.8 for x in range(1, len(vis_datasets)+1)])
    if x_ticks:
        locs, labels=plt.xticks();
        plt.xticks(locs, x_ticks, horizontalalignment='center', fontsize=font_label-10, rotation=0);

#     plt.ylim([0.0, 1.0])
    plt.yticks(fontsize=font_label);

    if y_label:
        plt.ylabel(y_label, fontsize=font_label)

    if x_label:
        plt.xlabel(x_label, fontsize=font_label)

    if legend:
        plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower center", mode="expand", ncol=legend_col, frameon=False, borderaxespad=0, handlelength=0.9, handletextpad=0.3, fontsize=font_label-7)

    if save_to_disc:
        plt.savefig(output_name, bbox_inches="tight")
        print('Bar plot is saved at ', output_name)
        plt.close()

In [15]:
plot_path = repo_dir+ 'intermediate/plots/'
if 'sort_' in eval_suffix or 'min_' in eval_suffix:
    plot_path = plot_path + 'err/'
    make_folder(plot_path)
else:
    pass

In [16]:
plot_path

'/Users/keyang/Projects/PubRepo/NonInvasiveTool4FairML/intermediate/plots/'

In [18]:
exp_datasets = ['meps16', 'lsac', 'bank', 'ACSM', 'cardio', 'ACSP', 'credit', 'ACSI', 'ACSE', 'ACSH']
exp_ticks = ['MEPS', 'LSAC', 'Bank', 'Cardio',  'ACSH', 'ACSP', 'Credit', 'ACSI', 'ACSE', 'ACSM']


scc_settings = ['OMN-ONE', 'KAM-ONE', 'SCC-KAM', 'CAP-ONE']
scc_legends = ['OMN', 'KAM', 'SCC+K', 'CAP']
scc_colors = ['#fffacd', '#f4ca16', '#2e8b57', '#ffc0cb']


    
x_tick_set = 5.3       
model_name = 'LR'

# vis_df = eval_df[eval_df['step']=='prep']
output_name = '{}time-{}.png'.format(plot_path, model_name)
bar_plots(eval_df, output_name, exp_datasets, scc_settings, x_ticks=exp_ticks, colors=scc_colors, 
          legend_names=scc_legends, x_tick_offset=x_tick_set, y_label='time(seconds)',
          legend_col=len(scc_settings),
          save_to_disc=True)

Bar plot is saved at  /Users/keyang/Projects/PubRepo/NonInvasiveTool4FairML/intermediate/plots/time-LR.png
