In [1]:
import data as dt
import pandas as pd
import numpy as np 
import fair_classification.utils as ut
import json
import os
import math
import data as dt

from sklearn.metrics import f1_score
from collections import defaultdict
from fairlearn.metrics import (
    MetricFrame, plot_model_comparison,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,
    equalized_odds_difference)


In [2]:
# stores key value pairs of dataset, sensitive attributes and target labels
DATA2D = {'adult': 'target',
          'compas': 'ScoreText_',
          'german': 'loan_status',
          'synthetic' : 'D'}

DATA2S = {'adult': 'sex',
          'compas': 'Ethnic_Code_Text_',
          'german': 'sex',
          'synthetic': 'S'}

NAMES = ['adult', 'compas', 'german', 'synthetic']

In [3]:
config_path = os.path.join("report.json")
config = None

with open(config_path, 'r') as fh:
    content = json.load(fh)



dataset = content['dataset']
exp_num = content['exp-id']
fold = content['fold']
num_X = content['num_X']

In [4]:
train_data, test_data, cloumns, learn_decision_label, train_y_fair, train_y_proxy, test_y_fair, test_y_proxy = dt.load_data(dataset, fold, num_X=num_X, use_fair=False, exp_num=exp_num)
X_train = np.array(train_data.drop(columns=cloumns))
X_test = np.array(test_data.drop(columns=cloumns))
s_train = np.array(train_data[DATA2S[dataset]])
protected_train = [s_train]
s_test = np.array(test_data[DATA2S[dataset]])
protected_test = [s_test]

In [5]:
models_dict = {}
ans = {}

## Fair Metrics

In [6]:
def summary(prob, true_label, sv):
    try:
        pred = (prob > 0.5).astype(int)
        fair_metrics = {}
        fair_metrics["accuracy"] = dt.accuracy(prob, true_label)
        fair_metrics["precision"] = dt.precision(prob, true_label)
        fair_metrics["f1_score"] = f1_score(pred, true_label)
        fair_metrics["recall"] = dt.recall(prob, true_label)
        fair_metrics["Overall selection rate"] = selection_rate(true_label, pred)
        fair_metrics["Demographic parity difference"] = demographic_parity_difference(true_label, pred, sensitive_features=sv)
        fair_metrics["Demographic parity ratio"] = demographic_parity_ratio(true_label, pred, sensitive_features=sv)
        fair_metrics["False positive rate difference"] = false_positive_rate_difference(true_label, pred, sensitive_features=sv)
        fair_metrics["Equalized odds difference"] = equalized_odds_difference(true_label, pred, sensitive_features=sv)
        fair_metrics["False negative rate difference"] = false_negative_rate_difference(true_label, pred, sensitive_features=sv)
        return fair_metrics

    except TypeError as e:
        print(f"{prob=}, {true_label=}")
        print(f"{type(prob)=} , {type(true_label)=}")
        print("error")
        print(f"{e.with_traceback()=}")
    # print(fair_metrics)
    # return pd.DataFrame.from_dict(fair_metrics, orient="index", columns=fair_metrics.keys())

In [7]:
def analyze(models_dict, s_test):
    res = {}
    for name, models in models_dict.items():
        res[name] = {}
        print(name)
        print(models.keys())
        for exp, metrics in models.items():
            print(exp)
            res[name][exp] = {}
            res[name][exp]["proxy"] = summary(metrics["prob_test"], test_y_fair, s_test )
            res[name][exp]["fair"] = summary(metrics["prob_test"], test_y_proxy, s_test )
    return res

## REWEIGHT

In [8]:
import reweight as rw

In [9]:

models_dict["reweight"] = {}
models_dict["reweight"]["pre"] = rw.learning(X_train, train_y_fair, X_test, test_y_proxy, protected_train, protected_test)
models_dict["reweight"]["post"] = rw.learning(X_train, train_y_proxy, X_test, test_y_fair, protected_train, protected_test)
models_dict["reweight"]["pre_post"] = rw.learning(X_train, train_y_fair, X_test, test_y_fair, protected_train, protected_test)
models_dict["reweight"]["proxy"] = rw.learning(X_train, train_y_proxy, X_test, test_y_proxy, protected_train, protected_test)

[-0.01909891]
Train Accuracy 0.1617283950617284
Train Violation 0.0008263243323004543  		 All violations [0.0008263243323004543]
Test Accuracy 0.44
Test Violation 0.050000000000000044  		 All violations [-0.050000000000000044]


[0.17457577]
Train Accuracy 0.27037037037037037
Train Violation 0.001003393832079147  		 All violations [0.001003393832079147]
Test Accuracy 0.29
Test Violation 0.0485714285714286  		 All violations [-0.0485714285714286]


[-0.01909891]
Train Accuracy 0.1617283950617284
Train Violation 0.0008263243323004543  		 All violations [0.0008263243323004543]
Test Accuracy 0.17
Test Violation 0.050000000000000044  		 All violations [-0.050000000000000044]


[0.17457577]
Train Accuracy 0.27037037037037037
Train Violation 0.001003393832079147  		 All violations [0.001003393832079147]
Test Accuracy 0.3
Test Violation 0.0485714285714286  		 All violations [-0.0485714285714286]




## FAIR LR

In [10]:
import fair_lr as flr

In [11]:
x_control_test = {DATA2S[dataset]: s_test}
x_control_train= {DATA2S[dataset]: s_train}

lr_y_train_fair = np.array([-1 if y == 0 else 1 for y in train_y_fair])
lr_y_train_proxy = np.array([-1 if y == 0 else 1 for y in train_y_proxy])
lr_y_test_proxy = np.array([-1 if y == 0 else 1 for y in test_y_proxy])
lr_y_test_fair = np.array([-1 if y == 0 else 1 for y in test_y_fair])

In [12]:
models_dict["fair_lr"] = {}
models_dict["fair_lr"]["pre"] = flr.model(X_train, lr_y_train_fair, x_control_train, X_test, lr_y_test_proxy, x_control_test, DATA2S[dataset])[2]
models_dict["fair_lr"]["post"] = flr.model(X_train, lr_y_train_proxy, x_control_train, X_test, lr_y_test_fair, x_control_test, DATA2S[dataset])[2]
models_dict["fair_lr"]["pre_post"] = flr.model(X_train, lr_y_train_fair, x_control_train, X_test, lr_y_test_fair, x_control_test, DATA2S[dataset])[2]
models_dict["fair_lr"]["proxy"] = flr.model(X_train, lr_y_train_proxy, x_control_train, X_test, lr_y_test_proxy, x_control_test, DATA2S[dataset])[2]


Accuracy: 0.56
Protected/non-protected in +ve class: 69% / 71%
P-rule achieved: 97%
Covariance between sensitive feature and decision from distance boundary : 0.111
<class 'dict'>
Accuracy: 0.69
Protected/non-protected in +ve class: 78% / 93%
P-rule achieved: 84%
Covariance between sensitive feature and decision from distance boundary : 0.062
<class 'dict'>
Accuracy: 0.85
Protected/non-protected in +ve class: 69% / 71%
P-rule achieved: 97%
Covariance between sensitive feature and decision from distance boundary : 0.112
<class 'dict'>
Accuracy: 0.64
Protected/non-protected in +ve class: 78% / 93%
P-rule achieved: 84%
Covariance between sensitive feature and decision from distance boundary : 0.062
<class 'dict'>


## Fair Reduction

In [17]:
import fair_reduction as fr
from fairlearn.reductions import ExponentiatedGradient
from fairlearn.reductions import DemographicParity
from sklearn.linear_model import LogisticRegression


In [18]:
def reduction(y_label):
    learn = ExponentiatedGradient(
            LogisticRegression(solver='liblinear', fit_intercept=True),
            constraints=DemographicParity())

    learn.fit(X_train, y_label, sensitive_features=s_train)

        # predict
    prob_test = learn._pmf_predict(X_test)[:, 1]
    prob_train = learn._pmf_predict(X_train)[:, 1]
    res = {}
    res["prob_train"] = prob_test
    res["prob_test"] = prob_test
    return res

In [None]:
models_dict["reduction"] = {}
models_dict["reduction"]["pre"] = reduction(train_y_fair)
models_dict["reduction"]["post"] = reduction(train_y_proxy)


In [13]:
analyze(models_dict, s_test)

reweight
dict_keys(['pre', 'post', 'pre_post', 'proxy'])
pre
post
pre_post
proxy
fair_lr
dict_keys(['pre', 'post', 'pre_post', 'proxy'])
pre
post
pre_post
proxy


{'reweight': {'pre': {'proxy': {'accuracy': 0.83,
    'precision': 0.8285714285714286,
    'f1_score': 0.8721804511278196,
    'recall': 0.9206349206349206,
    'Overall selection rate': 0.7,
    'Demographic parity difference': 0.06944444444444442,
    'Demographic parity ratio': 0.9074074074074074,
    'False positive rate difference': 0.38333333333333336,
    'Equalized odds difference': 0.38333333333333336,
    'False negative rate difference': 0.06117021276595745},
   'fair': {'accuracy': 0.56,
    'precision': 0.6857142857142857,
    'f1_score': 0.6857142857142857,
    'recall': 0.6857142857142857,
    'Overall selection rate': 0.7,
    'Demographic parity difference': 0.06944444444444442,
    'Demographic parity ratio': 0.9074074074074074,
    'False positive rate difference': 0.13397129186602874,
    'Equalized odds difference': 0.13397129186602874,
    'False negative rate difference': 0.02663706992230852}},
  'post': {'proxy': {'accuracy': 0.71,
    'precision': 0.69318181818

In [14]:
ans.keys()

dict_keys([])