In [1]:
import pandas as pd
import numpy as np
import sklearn
import sys
import os
import torch
import tqdm
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../')
sys.path.append('../../')
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.preprocessing import CorrelationRemover
from sklearn.linear_model import LogisticRegression

# custom
from fairws.data_util import load_dataset, load_LIFT_embedding, load_LF
from fairws.metrics import exp_eval
from fairws.sbm import get_baseline_pseudolabel, get_sbm_pseudolabel

# Configurations

In [2]:
dataset_name = "hateXplain" # adult | bank_marketing | CivilComments | hateXplain | CelebA | UTKFace
use_LIFT_embedding = False # only for adult, bank_marketing
sbm_diff_threshold = 0.05
fairml_method = "optimal_threshold" # correlation_remover | exponetiated_gradeint | optimal_threshold

result_collection = pd.DataFrame() # to keep results

# WS baseline

In [3]:
cond = "ws_baseline"

x_train, y_train, a_train, x_test, y_test, a_test = load_dataset(dataset_name=dataset_name,
                                                                    data_base_path='../data')

# weak supervision
L = load_LF(dataset_name, data_base_path='../data')
y_train = get_baseline_pseudolabel(L)

# downstream task
model = LogisticRegression()

# fair ml method
if fairml_method == "correlation_remover":
    x = np.concatenate([x_train, x_test], axis=0)
    a = np.concatenate([a_train, a_test], axis=0)
    x_aug = np.concatenate([x, np.expand_dims(a, axis=1)], axis=1)
    decorr = CorrelationRemover(sensitive_feature_ids=[x_train.shape[1]], alpha=1)
    x_decorr = decorr.fit_transform(x_aug)
    x_train_decorr = x_decorr[:x_train.shape[0]]
    x_test_decorr = x_decorr[x_train.shape[0]:]
    model.fit(x_train_decorr, y_train)
    y_pred = model.predict(x_test_decorr)
elif fairml_method == "exponentiated_gradient":
    constraints = DemographicParity(difference_bound=0)
    exp_grad_est = ExponentiatedGradient(
    estimator=model,
    constraints=constraints,
    )
    exp_grad_est.fit(x_train, y_train, sensitive_features=a_train)
    y_pred = exp_grad_est.predict(x_test)
elif fairml_method == "optimal_threshold":
    thr_opt_est = ThresholdOptimizer(
    estimator=model,
    constraints='demographic_parity',
    objective='accuracy_score',
    )
    thr_opt_est.fit(x_train, y_train, sensitive_features=a_train)
    y_pred = thr_opt_est.predict(x_test, sensitive_features=a_test)
else:
    print("fairml_method", fairml_method, "not implemented.")
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

            
result = exp_eval(y_test, y_pred, a_test, cond=cond)
result['fairml_method'] = fairml_method
print(result)

result_collection = result_collection.append(result, ignore_index=True)

100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:02<00:00, 464.88epoch/s]


{'condition': 'ws_baseline', 'accuracy': 0.5491931285788652, 'fscore': 0.5563524590163934, 'precision': 0.6687192118226601, 'recall': 0.4763157894736842, 'demographic_parity_gap': 0.02021467685699463, 'equal_opportunity_gap': 0.03451120853424072, 'fairml_method': 'optimal_threshold'}


# SBM

In [4]:
x_train, y_train, a_train, x_test, y_test, a_test = load_dataset(dataset_name=dataset_name,
                                                                    data_base_path='../data')

for ot_type in [None, "linear", "sinkhorn"]:
    cond = f"sbm({ot_type})"

    L = load_LF(dataset_name, data_base_path='../data')
    if use_LIFT_embedding:
        x_embedding_train, x_embedding_test = load_LIFT_embedding(dataset_name=dataset_name,
                                                                    data_base_path='../data')
        y_train= get_sbm_pseudolabel(L, x_embedding_train, a_train, dataset_name, 
                                     ot_type=ot_type, diff_threshold=sbm_diff_threshold,
                                     use_LIFT_embedding=True)
        
    else:
        y_train= get_sbm_pseudolabel(L, x_train, a_train, dataset_name, 
                                     ot_type=ot_type, diff_threshold=sbm_diff_threshold)
    
    # downstream task
    model = LogisticRegression()
    
    # fair ml method
    if fairml_method == "correlation_remover":
        x = np.concatenate([x_train, x_test], axis=0)
        a = np.concatenate([a_train, a_test], axis=0)
        x_aug = np.concatenate([x, np.expand_dims(a, axis=1)], axis=1)
        decorr = CorrelationRemover(sensitive_feature_ids=[x_train.shape[1]], alpha=1)
        x_decorr = decorr.fit_transform(x_aug)
        x_train_decorr = x_decorr[:x_train.shape[0]]
        x_test_decorr = x_decorr[x_train.shape[0]:]
        model.fit(x_train_decorr, y_train)
        y_pred = model.predict(x_test_decorr)
    elif fairml_method == "exponentiated_gradient":
        constraints = DemographicParity(difference_bound=0)
        exp_grad_est = ExponentiatedGradient(
        estimator=model,
        constraints=constraints,
        )
        exp_grad_est.fit(x_train, y_train, sensitive_features=a_train)
        y_pred = exp_grad_est.predict(x_test)
    elif fairml_method == "optimal_threshold":
        thr_opt_est = ThresholdOptimizer(
        estimator=model,
        constraints='demographic_parity',
        objective='accuracy_score',
        )
        thr_opt_est.fit(x_train, y_train, sensitive_features=a_train)
        y_pred = thr_opt_est.predict(x_test, sensitive_features=a_test)
    else:
        print("fairml_method", fairml_method, "not implemented.")
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

    result = exp_eval(y_test, y_pred, a_test, cond=cond)
    result['fairml_method'] = fairml_method
    print(result)
    result_collection = result_collection.append(result, ignore_index=True)


100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:02<00:00, 467.34epoch/s]


{'condition': 'sbm(None)', 'accuracy': 0.5866736074960958, 'fscore': 0.6583476764199656, 'precision': 0.6461148648648649, 'recall': 0.6710526315789473, 'demographic_parity_gap': 0.018130362033843994, 'equal_opportunity_gap': 0.0042018890380859375, 'fairml_method': 'optimal_threshold'}


100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:02<00:00, 469.71epoch/s]


{'condition': 'sbm(linear)', 'accuracy': 0.6007287870900573, 'fscore': 0.6852687730816578, 'precision': 0.643793369313801, 'recall': 0.7324561403508771, 'demographic_parity_gap': 0.006840825080871582, 'equal_opportunity_gap': 0.009219586849212646, 'fairml_method': 'optimal_threshold'}


100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:02<00:00, 482.04epoch/s]


{'condition': 'sbm(sinkhorn)', 'accuracy': 0.6048932847475273, 'fscore': 0.6908350305498981, 'precision': 0.6448669201520912, 'recall': 0.743859649122807, 'demographic_parity_gap': 0.0027193427085876465, 'equal_opportunity_gap': 0.025291621685028076, 'fairml_method': 'optimal_threshold'}


# Result summary

In [5]:
result_collection

Unnamed: 0,condition,accuracy,fscore,precision,recall,demographic_parity_gap,equal_opportunity_gap,fairml_method
0,ws_baseline,0.549193,0.556352,0.668719,0.476316,0.020215,0.034511,optimal_threshold
1,sbm(None),0.586674,0.658348,0.646115,0.671053,0.01813,0.004202,optimal_threshold
2,sbm(linear),0.600729,0.685269,0.643793,0.732456,0.006841,0.00922,optimal_threshold
3,sbm(sinkhorn),0.604893,0.690835,0.644867,0.74386,0.002719,0.025292,optimal_threshold
