In [152]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
df = pd.read_csv('../data/compas-scores-two-years.csv')
features = ['race', 'age', 'sex', 'juv_misd_count', 'priors_count']
to_predict = 'two_year_recid'
races_to_filter = ['Caucasian', 'African-American']
filtered = df.loc[df['race'].isin(races_to_filter), features + [to_predict]].reset_index(drop=True)

filtered['race'] = filtered['race'].apply(lambda race: 0 if race == 'Caucasian' else 1)
filtered['sex'] = filtered['sex'].apply(lambda sex: 0 if sex == 'Male' else 1)
#x=filtered[['race', 'age', 'sex', 'juv_misd_count', 'priors_count']]
#y=filtered[['two_year_recid']]
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)


normalized_df = (filtered-filtered.mean())/filtered.std()
filtered['age'] = normalized_df['age']
filtered['juv_misd_count'] = normalized_df['juv_misd_count']
filtered['priors_count'] = normalized_df['priors_count']

In [164]:
from utils import train_model
import loss_funcs

train_size = 5000
x_train = filtered.loc[:train_size, features]
y_train = filtered.loc[:train_size, to_predict]
x_test = filtered.loc[train_size:, features]
y_test = filtered.loc[train_size:, to_predict]
x_control = {'race': x_train['race'].to_list()}



apply_fairness_constraints = 0
apply_accuracy_constraint = 0
sep_constraint = 0
gamma = 0
sensitive_attrs = ['race']
sensitive_attrs_to_cov_thresh = {'race': 0}

w = train_model(x_train.to_numpy(),
                y_train.to_numpy(),
                x_control,
                loss_funcs._logistic_loss,
                apply_fairness_constraints,
                apply_accuracy_constraint,
                sep_constraint,
                sensitive_attrs,
                sensitive_attrs_to_cov_thresh,
                gamma)
w

array([19.86856785, -0.06275713, 18.53437354, -0.33472788,  0.16827645])

In [165]:
from sklearn.linear_model import LogisticRegression
m = LogisticRegression()
m.coef_= w.reshape((1,-1))
m.intercept_ = 0
m.classes_ = np.array([0, 1])
(m.predict(x_test[features]) == y_test).sum() / len(y_test)



0.5365217391304348

In [151]:
apply_fairness_constraints = 1
apply_accuracy_constraint = 0
sep_constraint = 0
gamma = 0
sensitive_attrs = ['race']
sensitive_attrs_to_cov_thresh = {'race': 0}

w = train_model(x_train.to_numpy(),
                y_train.to_numpy(),
                x_control,
                loss_funcs._logistic_loss,
                apply_fairness_constraints,
                apply_accuracy_constraint,
                sep_constraint,
                sensitive_attrs,
                sensitive_attrs_to_cov_thresh,
                gamma)

In [152]:
w

array([0.61866718, 0.52736244, 0.47858317, 0.23981689, 1.0926482 ,
       0.27346651])

In [153]:
m = LogisticRegression()
m.coef_= w[0:5].reshape((1,-1))
m.intercept_ = w[5]
m.classes_=np.array([0, 1])
x = np.array(x_test[['race', 'age', 'sex', 'juv_misd_count', 'priors_count']])
np.unique(m.predict(x))

array([1])

In [154]:
apply_fairness_constraints = 0
apply_accuracy_constraint = 0
sep_constraint = 0
gamma = 0
sensitive_attrs = ['race']
sensitive_attrs_to_cov_thresh = {'race': 0}

w = train_model(x_train.to_numpy(),
                y_train.to_numpy(),
                x_control,
                loss_funcs._logistic_loss,
                apply_fairness_constraints,
                apply_accuracy_constraint,
                sep_constraint,
                sensitive_attrs,
                sensitive_attrs_to_cov_thresh,
                gamma)

In [155]:
w

array([0.11993083, 0.89981818, 0.95816955, 0.48344054, 0.69401705,
       0.01439868])

In [156]:
m = LogisticRegression()
m.coef_= w[0:5].reshape((1,-1))
m.intercept_ = w[5]
m.classes_=np.array([0, 1])
x = np.array(x_test[['race', 'age', 'sex', 'juv_misd_count', 'priors_count']])
np.unique(m.predict(x))

array([1])

In [13]:
import utils as ut
import loss_funcs as lf
def test_data():
    X, y, x_control = filtered
    ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data
    
    """ Split the data into train and test """
    X = ut.add_intercept(X) # add intercept to X before applying the linear classifier
    train_fold_size = 0.7
    x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)
    
    apply_fairness_constraints = None
    apply_accuracy_constraint = None
    sep_constraint = None

    loss_function = lf._logistic_loss
    sensitive_attrs = ["sex"]
    sensitive_attrs_to_cov_thresh = {}
    gamma = None
    
    def train_test_classifier():
        w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma)
        train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None)
        distances_boundary_test = (np.dot(x_test, w)).tolist()
        all_class_labels_assigned_test = np.sign(distances_boundary_test)
        correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs)
        cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs)
        p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])	
        return w, p_rule, test_score
    
    print 
    print("== Unconstrained (original) classifier ==")
    # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier
    apply_fairness_constraints = 0
    apply_accuracy_constraint = 0
    sep_constraint = 0
    w_uncons, p_uncons, acc_uncons = train_test_classifier()
    
    """ Now classify such that we optimize for accuracy while achieving perfect fairness """
    apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints
    apply_accuracy_constraint = 0
    sep_constraint = 0
    sensitive_attrs_to_cov_thresh = {"sex":0}
    print
    print("== Classifier with fairness constraint ==")
    w_f_cons, p_f_cons, acc_f_cons  = train_test_classifier()
    
    
    """ Classify such that we optimize for fairness subject to a certain loss in accuracy """
    apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
    apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints
    sep_constraint = 0
    gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy
    print("== Classifier with accuracy constraint ==")
    w_a_cons, p_a_cons, acc_a_cons = train_test_classifier()	
    
    """ 
    Classify such that we optimize for fairness subject to a certain loss in accuracy 
    In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified.
    """
    apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
    apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints
    sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md)
    gamma = 1000.0
    print("== Classifier with accuracy constraint (no +ve misclassification) ==")
    w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine  = train_test_classifier()
    
    return



In [14]:
apply_fairness_constraints = 0
apply_accuracy_constraint = 0
sep_constraint = 0
w_uncons, p_uncons, acc_uncons = train_test_classifier()

NameError: name 'x_control_train' is not defined