In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from fairlearn.metrics import MetricFrame, false_positive_rate, selection_rate, false_negative_rate, count
from sklearn.metrics import accuracy_score, recall_score

pred_folder = "ml_weekly"# "ml_endterm"
faireval_folder = "fairevals"
def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def generate_fairevals(config_name, eval_task, pred_target, ds_key):
    folder = os.path.join(pred_folder, config_name, eval_task, pred_target, ds_key)
    
    cnt = 40
    preds = []
    targs = []
    demos = []
    for k in range(cnt):
        path_pred = os.path.join(folder, 'y_pred_{:03d}.npy'.format(k + 1))
        path_targ = os.path.join(folder, 'y_targ_{:03d}.csv'.format(k + 1))
        path_demo = os.path.join(folder, 'demographic_test_{:03d}.csv'.format(k + 1))

        pred = np.load(path_pred, allow_pickle=True)
        targ = pd.read_csv(path_targ)["y_raw"]
        demo = pd.read_csv(path_demo).iloc[:, 1:]

        preds.append(pred)
        targs.append(targ)
        demos.append(demo)

    pred = np.concatenate(preds)
    targ = pd.concat(targs)
    demo = pd.concat(demos)

    metrics = {"accuracy": accuracy_score, 
                "recall": recall_score, 
                "fnr": false_negative_rate, 
                "fpr": false_positive_rate
            }

    fair_evals = {}
    for col in demo.columns:
        faireval = MetricFrame(metrics=metrics, y_true=targ, y_pred=pred, sensitive_features=demo[col])
        fair_evals[col] = faireval

    folder_output = os.path.join(faireval_folder, config_name, eval_task, pred_target, ds_key)
    mkdir(os.path.join(folder_output))
    for col, mf in fair_evals.items():
        mf.by_group.to_csv(os.path.join(folder_output, '{}_by_group.csv'.format(col)))
        mf.overall.to_csv(os.path.join(folder_output, '{}_overall.csv'.format(col)))

In [33]:
config_names = ['ml_wang',  'ml_xu_personalized', 'ml_xu_interpretable'] # 'ml_chikersal'
eval_task = 'single'
pred_target = 'dep_weekly'
ds_keys = ['INS-W_1', 'INS-W_2', 'INS-W_3', 'INS-W_4']
for config_name in config_names:
    for ds_key in ds_keys:
        generate_fairevals(config_name, eval_task, pred_target, ds_key)

  _warn_prf(average, modifier, msg_start, len(result))


In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

faireval_folder = "fairevals"
# config_names = ['ml_wang', 'ml_chikersal', 'ml_xu_personalized']
config_names = ['ml_wang', 'ml_xu_interpretable', 'ml_xu_personalized']
ds_keys = ['INS-W_1', 'INS-W_2', 'INS-W_3', 'INS-W_4']
sensitive_attrs = ["race", "gender", "student_1stGen", "student_international", 
                    "generation", "orientation_heterosexual", "parent_edu_father", "parent_edu_mother"]
sensitive_attrs_mapping = {
    "race": "Race",
    "gender": "Gender",
    "student_1stGen": "First Generation Student",
    "student_international": "International Student",
    "college_engineer": "Engineering Major",
    "generation": "Generation Status",
    "orientation_heterosexual": "Heterosexual Orientation",
    "parent_edu_father": "Father's Education Level",
    "parent_edu_mother": "Mother's Education Level"
}

metrics = ["accuracy", "recall", "fpr", "fnr"]
metrics_fullname = ["Accuracy", "Recall", "False positive rate", "False negative rate"]

value2label = {
    "race": {0: "Asian", 1: "Black", 2: "White", 3: "Latinx", 4: "Biracial"},
    "gender": {1: "Male", 2: "Female"},
    "orientation_heterosexual": {0: "Not heterosexual", 1: "Heterosexual"},
    "student_international": {0: "Not international", 1: "International"},
    "student_1stGen": {0: "Not first generation", 1: "First generation"},
    "parent_edu_mother": {0: "Below bachelor's degree", 1: "Bachelor's degree and above"},
    "parent_edu_father": {0: "Below bachelor's degree", 1: "Bachelor's degree and above"},
    "generation": {0: "Non-immigrant", 1: "Immigrant"},
    "disability": {0: "No disability", 1: "Disability"}
}

In [39]:
def generate_plot_data(config_name, sens_attr, eval_task = 'single', pred_target = 'dep_weekly'):
    metrics = ["accuracy", "recall", "fpr", "fnr"]
    faireval_dict = {metric: {} for metric in metrics}
    for ds_index, ds_key in enumerate(ds_keys):
        folder_output = os.path.join(faireval_folder, config_name, eval_task, pred_target, ds_key)
        faireval = pd.read_csv(os.path.join(folder_output, '{}_DEMO_by_group.csv'.format(sens_attr)))
        faireval = faireval.rename(columns={f"{sens_attr}_DEMO": sens_attr}) # remove the suffix DEMO
        # remove unspecified values, such as 5, 6 in genders
        unspecified_labels = faireval[~faireval[sens_attr].isin(value2label[sens_attr])].index
        faireval.drop(unspecified_labels, inplace=True)
        # replace numbers with labels
        faireval[sens_attr] = faireval[sens_attr].replace(value2label[sens_attr])

        faireval = faireval.set_index(sens_attr)
        for metric in metrics:
            faireval_dict[metric][f"DS{ds_index + 1}"] = faireval[metric]
    
    # put datasets in the second level of indexing
    for metric in metrics:
        faireval_dict[metric] = pd.concat(faireval_dict[metric], axis=1)
    return faireval_dict

In [40]:
def plot_faireval(config_name, sens_attr, eval_task = 'single', pred_target = 'dep_weekly'):
    faireval_dict = generate_plot_data(config_name, sens_attr, eval_task, pred_target)
    
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="Set2")
    plt.figure(figsize=(8, 6)) 
    plt.rcParams["font.family"] = "sans-serif"

    x_fig_num, y_fig_num = 2, 2
    for mindex, metric in enumerate(metrics):
        now_faireval = faireval_dict[metric]

        plt.subplot(x_fig_num, y_fig_num, mindex + 1)
        sens_labels_num = len(now_faireval.index)
        width = 1.25 / sens_labels_num
        X_axis = np.arange(2 * len(now_faireval.columns), step=2)
        for sens_index, sens_label in enumerate(now_faireval.index):
            plt.bar(x = X_axis + (sens_index - sens_labels_num/2 + 1/2) * width,
                    height = now_faireval.iloc[sens_index, :],
                    width=width, align="center", 
                    label=sens_label
                )
            
            plt.xticks(X_axis, now_faireval.columns.to_list(), fontsize=8)
            plt.yticks(np.arange(0, 1.2, 0.5))
            plt.ylim((0,1))
        
        plt.title(metrics_fullname[mindex], fontdict={"fontsize": 14, "fontweight": 10}, pad=10)

    plt.subplots_adjust(wspace=0.3, hspace=0.4)
    plt.legend(ncol=1, loc='lower left', bbox_to_anchor=(1.04, 0.8), title=sensitive_attrs_mapping[sens_attr], title_fontsize=11)
    
    folder_output = os.path.join("figures", config_name, eval_task, pred_target)
    if not os.path.exists(folder_output):
        mkdir(os.path.join(folder_output))
    plt.savefig(os.path.join(folder_output, f"{sens_attr}.png"), bbox_inches='tight')
    plt.close()
    # plt.show()


In [42]:
for config_name in config_names:
    for sens_attr in sensitive_attrs:
        plot_faireval(config_name, sens_attr, pred_target="dep_weekly")