In [11]:
import ast
import os
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_theme(palette="colorblind", style='whitegrid', font_scale=1.25)


In [2]:
def group_func(x):
    return (x.sum() - x.min() - x.max()) / (len(x) - 2)

def group_func_2(x):
    return (x.sum() - x.nlargest(5, columns=["violation"]).sum() - x.nsmallest(5, columns=["violation"]).sum()) / (len(x) - 10)


METRIC_MAP = {
    "Demographic_Parity": "Dem_Parity",
    "Equal_Opportunity": "Eq_Opp", 
    "Equalized_Odds": "Eq_Odds",
    "Predictive_Equality": "Pred_Eq",
    "Predictive_Parity": "Pred_Parity",
    "Disparate_Impact": "Disp_Impact"
}

In [3]:
result_files = [os.path.join(dp, f) for dp, _, fn in os.walk(os.path.expanduser("fairness_trials")) for f in fn]

all_results = []
for rfile in result_files:
    arr = rfile.split("/")[1:]
    dataset_sens_attrs, metric, use_classwise, cp_method = arr[0], arr[1], ast.literal_eval(arr[2]), arr[3]
    if cp_method in ["aps_no_rand.csv", "tps_classwise.csv", "dtps.csv"]: continue

    df = pd.read_csv(rfile, header=None, names=["Unnamed: 0", "c","base_eff", "base_coverage", "base_violation", "eff", "coverage", "violation"])
    df = df.dropna()
    
    df['c'] = df["c"].astype(str)

    df = df[df["c"] != "0.01"]

    df["dataset_sens_attrs"] = dataset_sens_attrs
    df["use_classwise"] = use_classwise
    df["cp_method"] = cp_method.split(".")[0]
    df["fairness_metric"] = METRIC_MAP[metric] if metric in METRIC_MAP else metric

    all_results.append(df)

res_df = pd.concat(all_results, ignore_index=True)

res_df = res_df.drop(columns=["Unnamed: 0"])

res_df["eff"] = res_df["eff"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))
res_df["coverage"] = res_df["coverage"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))
res_df["violation"] = res_df["violation"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))

res_df["base_eff"] = res_df["base_eff"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))
res_df["base_coverage"] = res_df["base_coverage"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))
res_df["base_violation"] = res_df["base_violation"].apply(lambda x: round(float(x[len("tensor("):-1]), 3))

our_res_df = res_df[['c', 'eff', 'coverage',
       'violation', 'dataset_sens_attrs', 'use_classwise', 'cp_method',
       'fairness_metric']]

our_res_df["type"] = "ours"

base_res_df = res_df[['c', 'base_eff', 'base_coverage',
       'base_violation', 'dataset_sens_attrs', 'use_classwise', 'cp_method',
       'fairness_metric']].rename(columns={'base_eff': 'eff', 'base_coverage': 'coverage', 'base_violation': 'violation'})
base_res_df["type"] = "base"

res_df = pd.concat([our_res_df,base_res_df],ignore_index=True)


grouped_res_df = res_df.groupby((['c', 'dataset_sens_attrs', 'use_classwise', 'cp_method', 'fairness_metric', 'type']))

mean_res_df = grouped_res_df.mean().reset_index() # apply(group_func)

std_res_df = grouped_res_df.std().reset_index()

res_df = mean_res_df[['c', 'dataset_sens_attrs', 'use_classwise', 'cp_method', 'fairness_metric', 'type']]

res_df["eff_mean"] = mean_res_df["eff"].round(4)
res_df["eff_std"] = std_res_df["eff"].round(4)

res_df["violation_mean"] = mean_res_df["violation"].round(4)
res_df["violation_std"] = std_res_df["violation"].round(4)

res_df

Unnamed: 0,c,dataset_sens_attrs,use_classwise,cp_method,fairness_metric,type,eff_mean,eff_std,violation_mean,violation_std
0,0.05,ACSEducation_,False,aps,Dem_Parity,base,2.9816,0.0018,0.4578,0.0022
1,0.05,ACSEducation_,False,aps,Dem_Parity,ours,5.9217,0.0139,0.0455,0.0081
2,0.05,ACSEducation_,False,aps,Eq_Odds,base,2.9816,0.0018,0.4654,0.0233
3,0.05,ACSEducation_,False,aps,Eq_Odds,ours,6.0000,0.0000,0.0000,0.0000
4,0.05,ACSEducation_,False,aps,Eq_Opp,base,2.9816,0.0018,0.4356,0.0551
...,...,...,...,...,...,...,...,...,...,...
3195,0.8,Pokec_z_region_gender,True,cfgnn,Disp_Impact,ours,2.7876,0.1404,0.8005,0.0367
3196,0.8,Pokec_z_region_gender,True,daps,Disp_Impact,base,2.4093,0.0686,0.6579,0.0309
3197,0.8,Pokec_z_region_gender,True,daps,Disp_Impact,ours,2.7427,0.1714,0.7492,0.0734
3198,0.8,Pokec_z_region_gender,True,tps,Disp_Impact,base,2.2648,0.0663,0.6400,0.0384


In [43]:
melted_res_df = res_df.melt(["c", "dataset_sens_attrs", "use_classwise", "cp_method", "fairness_metric", "type"], var_name="stat_type", value_name="stats")

for dataset_sens_attrs in res_df["dataset_sens_attrs"].unique():
    for use_classwise in [False, True]:
        df = melted_res_df[(melted_res_df["dataset_sens_attrs"] == dataset_sens_attrs) & (melted_res_df["use_classwise"] == use_classwise)]
        df["index"] = f"{dataset_sens_attrs}_{use_classwise}"
        results_table = df.groupby(["index", 'c', "fairness_metric", "cp_method", "type", "stat_type"])["stats"].apply(lambda x: x.values[0]).reset_index().pivot(index=["index", "c", "fairness_metric"], columns=["cp_method", "type", "stat_type"], values=["stats"]).droplevel(0, axis=0)
        
        results_table.to_excel(f"./processed_trials/{dataset_sens_attrs}_{use_classwise}.xlsx")


In [None]:
METRICS = list(map(lambda x: METRIC_MAP[x], ["Predictive_Parity"]))
C = ["0.05", "0.1", "0.15", "0.2"]
for dataset_sens_attrs in res_df["dataset_sens_attrs"].unique():
    if "ACSEducation" not in dataset_sens_attrs: continue
    for use_classwise in [True]:
        df = res_df[(res_df["dataset_sens_attrs"] == dataset_sens_attrs) & (res_df["use_classwise"] == use_classwise)]

        print(f"{dataset_sens_attrs}_{use_classwise}")

        filtered_df = df[(df["fairness_metric"].isin(METRICS))]

        eff_lim = 4
        if "ACSEducation" in dataset_sens_attrs:
           eff_lim = 6

        if "Credit" in dataset_sens_attrs or "Pokec" in dataset_sens_attrs:
           CP_METHODS = ["aps", "cfgnn", "daps", "tps"]
        else:
           CP_METHODS = ["aps", "tps"]


        grid=sns.catplot(
           data=filtered_df[filtered_df["type"] == "ours"],
           x="fairness_metric",
           y="eff_mean",
           hue="c",
           col="cp_method",
           sharex=False,
           ci=None,
           kind="bar",
           order=METRICS
        )

        grid.figure.set_size_inches(5, 5)
        grid.set(xlabel="", ylabel="Efficiency",  ylim=(0, eff_lim))

        for key, ax in grid.axes_dict.items():
           ax.set_title(key.upper())

        sns.move_legend(grid, "center right", bbox_to_anchor=(1.4, 0.5), ncol=1)

        grid.legend.set_title("Closeness\nThreshold:")

        grid2 = sns.catplot(
           data=filtered_df[filtered_df["type"] == "ours"],
           x="fairness_metric",
           y="violation_mean",
           hue="c",
           col="cp_method",
           sharex=False,
           ci=None,
           kind="bar",
           order=METRICS
        )

        grid2.figure.set_size_inches(5, 5)
        grid2.set(xlabel="", ylabel="Actual Fairness Disparity", ylim=(0, 0.5))

        for key, ax in grid2.axes_dict.items():
           ax.set_title(key.upper())

        sns.move_legend(grid2, "center right", bbox_to_anchor=(1.4, 0.5), ncol=1)


        grid2.legend.set_title("Closeness\nThreshold:")

        for i in range(len(CP_METHODS)):
          base_cp_df = filtered_df[(filtered_df["type"] == "base") & (filtered_df["cp_method"] == CP_METHODS[i])]

          for ix, a in enumerate(grid.axes[0][i].patches):
            x_start = a.get_x()
            width = a.get_width()
            if width == 0: 
               continue
          
            eff_mean = base_cp_df[(base_cp_df["fairness_metric"] == METRICS[ix % len(METRICS)])]["eff_mean"].mean()
            grid.axes[0][i].plot(
                [x_start, x_start+width],
                [
                  eff_mean
                ] * 2,
                '--', 
                c='k'
              )
        
          for ix, a in enumerate(grid2.axes[0][i].patches):
            x_start = a.get_x()
            width = a.get_width()
            if width == 0: 
               continue
               
            violation_mean = base_cp_df[(base_cp_df["fairness_metric"] == METRICS[ix % len(METRICS)])]["violation_mean"].mean()
            grid2.axes[0][i].plot(
                [x_start, x_start+width],
                [
                    violation_mean
                ] * 2,
                '--', 
                c='k'
              )
            
            grid2.axes[0][i].plot(
                x_start + width / 2,
                float(C[ix // len(METRICS)]),
                'o',
                c='k',
                markersize=3.5,
              )
        
        grid.savefig(f"./figures/{str(dataset_sens_attrs).strip('_')}_{use_classwise}_Proxy_efficiency.pdf")
        grid2.savefig(f"./figures/{str(dataset_sens_attrs).strip('_')}_{use_classwise}_Proxy_violation.pdf")
        
        plt.show()


In [None]:
METRICS = list(map(lambda x: METRIC_MAP[x], ["Demographic_Parity", "Equal_Opportunity", "Equalized_Odds",  "Predictive_Equality"]))

C = ["0.05", "0.1", "0.15", "0.2"]
for dataset_sens_attrs in res_df["dataset_sens_attrs"].unique():
    if "Credit" not in dataset_sens_attrs: continue
    for use_classwise in [False, True]:
        df = res_df[(res_df["dataset_sens_attrs"] == dataset_sens_attrs) & (res_df["use_classwise"] == use_classwise)]

        print(f"{dataset_sens_attrs}_{use_classwise}")

        eff_lim = 4
        if "ACSEducation" in dataset_sens_attrs:
           eff_lim = 6

        if "Credit" in dataset_sens_attrs:
           CP_METHODS = ["aps","cfgnn", "daps", "tps"]
        elif "Pokec" in dataset_sens_attrs:
           CP_METHODS = ["aps","cfgnn", "daps", "tps"]
        else:
           CP_METHODS = ["aps", "tps"]


        filtered_df = df[(df["fairness_metric"].isin(METRICS)) & (df["cp_method"].isin(CP_METHODS))]

        grid=sns.catplot(
           data=filtered_df[filtered_df["type"] == "ours"],
           x="fairness_metric",
           y="eff_mean",
           hue="c",
           col="cp_method",
           sharex=False,
           ci=None,
           kind="bar",
           order=METRICS,
           col_wrap=2
        )

        grid.set(xlabel="")
        grid.set(ylabel="Efficiency")
        grid.set(ylim=(0, eff_lim))

        for key, ax in grid.axes_dict.items():
           ax.set_title(key.upper())

        sns.move_legend(grid, "upper center", bbox_to_anchor=(0.5, 1.05), ncol=len(C))

        grid.legend.set_title("Closeness Threshold:")


        grid2 = sns.catplot(
           data=filtered_df[filtered_df["type"] == "ours"],
           x="fairness_metric",
           y="violation_mean",
           hue="c",
           col="cp_method",
           sharex=False,
           ci=None,
           kind="bar",
           order=METRICS,
           col_wrap=2,
        )

        grid2.set(xlabel="")
        grid2.set(ylabel="Actual Fairness Disparity")

        if "Credit" in dataset_sens_attrs or "ACSEducation" in dataset_sens_attrs:
          grid2.set(ylim=(0, 0.5), yticks=[0, 0.1, 0.2, 0.3, 0.4, 0.5])
        else:
          grid2.set(ylim=(0, 0.4), yticks=[0, 0.1, 0.2, 0.3, 0.4])

        
        for key, ax in grid2.axes_dict.items():
           ax.set_title(key.upper())

        sns.move_legend(grid2, "upper center", bbox_to_anchor=(0.5, 1.05), ncol=len(C))

        grid2.legend.set_title("Closeness Threshold:")

        for i in range(len(CP_METHODS)):
          base_cp_df = filtered_df[(filtered_df["type"] == "base") & (filtered_df["cp_method"] == CP_METHODS[i])]

          for ix, a in enumerate(grid.axes[i].patches):
            x_start = a.get_x()
            width = a.get_width()
            if width == 0: 
               continue
          
            eff_mean = base_cp_df[(base_cp_df["fairness_metric"] == METRICS[ix % len(METRICS)])]["eff_mean"].mean()
            grid.axes[i].plot(
                [x_start, x_start+width],
                [
                  eff_mean
                ] * 2,
                '--', 
                c='k'
              )

          for ix, a in enumerate(grid2.axes[i].patches):
            x_start = a.get_x()
            width = a.get_width()
            if width == 0: 
               continue
               
            violation_mean = base_cp_df[(base_cp_df["fairness_metric"] == METRICS[ix % len(METRICS)])]["violation_mean"].mean()# & (base_cp_df["c"] == C[ix // len(C)])]["violation_mean"]#.mean()
            grid2.axes[i].plot(
                [x_start, x_start+width],
                [
                    violation_mean
                ] * 2,
                '--', 
                c='k'
              )
            
            grid2.axes[i].plot(
                x_start + width / 2,
                float(C[ix // len(METRICS)]),
                'o',
                c='k',
                markersize=3.5,
              )
        
        grid.savefig(f"./figures/{str(dataset_sens_attrs).strip('_')}_{use_classwise}_efficiency.pdf")
        grid2.savefig(f"./figures/{str(dataset_sens_attrs).strip('_')}_{use_classwise}_violation.pdf")

        plt.show()


In [None]:
METRICS = list(map(lambda x: METRIC_MAP[x],["Disparate_Impact"]))

for dataset_sens_attrs in res_df["dataset_sens_attrs"].unique():
  for use_classwise in [False, True]:
      df = res_df[(res_df["dataset_sens_attrs"] == dataset_sens_attrs) & (res_df["use_classwise"] == use_classwise) & (res_df["fairness_metric"].isin(METRICS))]
      df["index"] = f"{dataset_sens_attrs}_{use_classwise}"
      print(f"{dataset_sens_attrs}_{use_classwise}\n")

      results_table = df.groupby(["index", "cp_method", "type"])["eff_mean"].apply(lambda x: x.values[0]).reset_index().pivot(index=["index"], columns=["cp_method", "type"], values=["eff_mean"])
      
      # display(results_table)
      print(results_table.to_latex())
      
      results_table = df.groupby(["index", "cp_method", "type"])["violation_mean"].apply(lambda x: x.values[0]).reset_index().pivot(index=["index"], columns=["cp_method", "type"], values=["violation_mean"])
      
      # display(results_table)
      print(results_table.to_latex())
      print()

    # results_table.to_excel(f"./processed_trials/{dataset_sens_attrs}_{use_classwise}.xlsx")
