In [1]:
import os,sys
import numpy as np
from load_compas_data import *
import utils as ut
import funcs_disp_mist as fdm

In [2]:
def train_test_classifier(x_train, y_train, x_control_train, 
                          loss_function, EPS, cons_params,
                         x_test, y_test, x_control_test, sensitive_attrs):
    w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, 
                                  loss_function, EPS, cons_params)

    train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(w, x_train, y_train, x_control_train, 
                        x_test, y_test, x_control_test, sensitive_attrs)

    return w, test_score, s_attr_to_fp_fn_test

In [3]:
""" Load the data """
data_type = 1
X, y, x_control = load_compas_data()
sensitive_attrs = list(x_control.keys())


""" Split the data into train and test """
train_fold_size = 0.5
x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)

cons_params = None # constraint parameters, will use them later
loss_function = "logreg" # perform the experiments with logistic regression
EPS = 1e-6

Looking for file 'compas-scores-two-years.csv' in the current directory...
File found in current directory..

Number of people recidivating within two years
-1    2795
 1    2483
dtype: int64


Features we will be using for classification are: ['intercept', 'age_cat_25 - 45', 'age_cat_Greater than 45', 'age_cat_Less than 25', 'race', 'sex', 'priors_count', 'c_charge_degree'] 



In [4]:
""" Classify the data while optimizing for accuracy """
print()
print("== Unconstrained (original) classifier ==")
w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons = train_test_classifier(x_train, y_train, x_control_train, 
                      loss_function, EPS, cons_params,
                     x_test, y_test, x_control_test, sensitive_attrs)


== Unconstrained (original) classifier ==


Accuracy: 0.671
||  s  || FPR. || FNR. ||
||  0  || 0.35 || 0.32 ||
||  1  || 0.15 || 0.59 ||




In [5]:
""" Now classify such that we optimize for accuracy while achieving perfect fairness """

print()
print("\n\n== DM Algo ==")
print("\n\n== Constraints on FPR ==")	# setting parameter for constraints
cons_type = 1 # FPR constraint 
tau = 5.0
mu = 1.2
sensitive_attrs_to_cov_thresh = {"race": {0:{0:0, 1:0}, 1:{0:0, 1:0}, 2:{0:0, 1:0}}} 
cons_params = {"cons_type": cons_type, 
                "tau": tau, 
                "mu": mu, 
                "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}

w_cons, acc_cons, s_attr_to_fp_fn_test_cons  = train_test_classifier(x_train, y_train, x_control_train, 
                      loss_function, EPS, cons_params,
                     x_test, y_test, x_control_test, sensitive_attrs)






== DM Algo ==


== Constraints on FPR ==


Accuracy: 0.653
||  s  || FPR. || FNR. ||
||  0  || 0.28 || 0.41 ||
||  1  || 0.24 || 0.51 ||




In [6]:
print("\n\n== Constraints on FNR ==")	# setting parameter for constraints
cons_type = 2 # FNR constraint 
tau = 5.0
mu = 1.2
sensitive_attrs_to_cov_thresh = {"race": {0:{0:0, 1:0}, 1:{0:0, 1:0}, 2:{0:0, 1:0}}} 
cons_params = {"cons_type": cons_type, 
                "tau": tau, 
                "mu": mu, 
                "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}

w_cons_FNR, acc_cons_FNR, s_attr_to_fp_fn_test_cons_FNR  = train_test_classifier(x_train, y_train, x_control_train, 
                      loss_function, EPS, cons_params,
                     x_test, y_test, x_control_test, sensitive_attrs)



== Constraints on FNR ==


Accuracy: 0.655
||  s  || FPR. || FNR. ||
||  0  || 0.29 || 0.39 ||
||  1  || 0.29 || 0.44 ||




In [7]:
print("\n\n== DM-sen Algo ==")
print("\n\n== Constraints on  both FPR and FNR==")	# setting parameter for constraints
cons_type = 4 # both FPR and FNR constraint 
tau = 5.0
mu = 1.2
sensitive_attrs_to_cov_thresh = {"race": {0:{0:0, 1:0}, 1:{0:0, 1:0}, 2:{0:0, 1:0}}} 
cons_params = {"cons_type": cons_type, 
                "tau": tau, 
                "mu": mu, 
                "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}

w_cons_both, acc_cons_both, s_attr_to_fp_fn_test_cons_both  = train_test_classifier(x_train, y_train, x_control_train, 
                  loss_function, EPS, cons_params,
                 x_test, y_test, x_control_test, sensitive_attrs)



== DM-sen Algo ==


== Constraints on  both FPR and FNR==


Accuracy: 0.657
||  s  || FPR. || FNR. ||
||  0  || 0.29 || 0.39 ||
||  1  || 0.25 || 0.48 ||


