In [1]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import make_scorer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, GridSearchCV, cross_validate
from sklearn.model_selection import train_test_split


from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
from aequitas.plotting import Plot

%matplotlib inline

## Incorporating fairness into cross validation

Read in data 

In [53]:
### train data
train = pd.read_csv("../data/broward_train.csv")
X_train = train_data.loc[:,:'five_year']
Y_train = train_data['recid_two_year']

### test data
test= pd.read_csv("../data/broward_test.csv")
X_test = test_data.loc[:,:'five_year']
Y_test = test_data['recid_two_year']

In [12]:
### Implementing fairness as scoring method 
def compute_fairness(df: pd.DataFrame, 
                     decoders: dict, 
                     sensitive_attrs: list,
                     ref_groups_dict: dict) -> pd.DataFrame:
    """
    decoders: dictionary of dictionary of decoders 
    """
    # decode numeric encodings for cat var
    for decoder_name, decoder_dict in decoders.items():
        df = df.replace({decoder_name: decoder_dict})
    
    g = Group()
    xtab, _ = g.get_crosstabs(df, attr_cols=sensitive_attrs)
    # compute bias 
    b = Bias()
    bdf = b.get_disparity_predefined_groups(xtab, 
                                            original_df=df, 
                                            ref_groups_dict=ref_groups_dict, 
                                            alpha=0.05, 
#                                           check_significance=True, 
#                                           mask_significance=True
                                            )
    f = Fairness()
    fdf = f.get_group_value_fairness(bdf)

    # list results of fairness analysis
    parity_determinations = f.list_parities(fdf)
    
    absolute_metrics = g.list_absolute_metrics(xtab)
    return fdf[['attribute_name', 'attribute_value'] + absolute_metrics + b.list_disparities(fdf) + parity_determinations].style

In [27]:
# prepare data 
clf = LogisticRegression(class_weight = 'balanced', solver='liblinear', random_state=0)
clf.fit(X_train.drop(['person_id', 'screening_date', 'race'], axis=1),
        Y_train)

preds = clf.predict(X_test.drop(['person_id', 'screening_date', 'race'], axis=1))

In [28]:
X_test.loc[:,"score"] = preds
X_test.loc[:,"label_value"] = Y_test

df = X_test[["person_id", "screening_date", "sex", "race", "score", "label_value"]]
df = df.rename({"person_id": "entity_id"}, axis="columns")

decoders = {"sex": {0: "male",
                    1: "female"}
           }

sensitive_attrs = ['sex', 'race'] # race 

ref_groups_dict = {'sex':'male', 
                   'race': 'Caucasian'}
res = compute_fairness(df, 
                       decoders=decoders, 
                       sensitive_attrs=sensitive_attrs,
                       ref_groups_dict=ref_groups_dict)

res

model_id, score_thresholds 1 {'rank_abs': [178]}
get_disparity_predefined_group()


Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev,ppr_disparity,pprev_disparity,precision_disparity,fdr_disparity,for_disparity,fpr_disparity,fnr_disparity,tpr_disparity,tnr_disparity,npv_disparity,TNR Parity,Statistical Parity,FDR Parity,FPR Parity,Unsupervised Fairness,FOR Parity,Precision Parity,Impact Parity,NPV Parity,Supervised Fairness,TPR Parity,TypeII Parity,TypeI Parity,Equalized Odds,FNR Parity
0,sex,female,0.688742,0.621951,0.315436,0.373494,0.378049,0.311258,0.684564,0.626506,0.932584,0.526984,0.479365,13.8333,2.15185,1.25301,0.746988,1.06101,2.01626,0.481036,1.95143,0.765478,0.974187,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False
1,sex,male,0.352941,0.8125,0.297297,0.5,0.1875,0.647059,0.702703,0.5,0.0674157,0.244898,0.346939,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2,race,African-American,0.727273,0.614583,0.337079,0.316239,0.385417,0.272727,0.662921,0.683761,0.657303,0.567961,0.533981,2.6,1.60291,1.4652,0.592949,1.25638,1.34896,0.533058,1.48918,0.860417,0.905993,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False
3,race,Asian,,1.0,0.0,,0.0,,1.0,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,,,1.4,1.36667,False,False,,False,False,False,,False,False,False,,False,False,False,
4,race,Caucasian,0.488372,0.714286,0.268293,0.533333,0.285714,0.511628,0.731707,0.466667,0.252809,0.354331,0.338583,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
5,race,Hispanic,0.636364,0.555556,0.444444,0.363636,0.444444,0.363636,0.555556,0.636364,0.0617978,0.55,0.55,0.244444,1.55222,1.36364,0.681818,1.65657,1.55556,0.710744,1.30303,0.777778,0.759259,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,race,Other,0.5,0.5,0.4,0.6,0.5,0.5,0.6,0.4,0.0280899,0.5,0.4,0.111111,1.41111,0.857143,1.125,1.49091,1.75,0.977273,1.02381,0.7,0.82,False,False,True,False,False,False,True,False,True,False,True,False,False,False,True


### Create Custom Scorer

https://scikit-learn.org/stable/modules/model_evaluation.html#scoring

In [7]:
def my_custom_loss_func(y_true, y_pred):
    diff = np.abs(y_true - y_pred).max()
    return np.log1p(diff)

 # score will negate the return value of my_custom_loss_func,
 # which will be np.log(2), 0.693, given the values for X
 # and y defined below.
score = make_scorer(my_custom_loss_func, greater_is_better=False)
X = [[1], [1]]
y = [0, 1]
from sklearn.dummy import DummyClassifier
clf = DummyClassifier(strategy='most_frequent', random_state=0)
clf = clf.fit(X, y)
print(my_custom_loss_func(y, clf.predict(X)) )

print(score(clf, X, y))


0.6931471805599453
-0.6931471805599453


### Adapt current logistic prediction method 

In [54]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import roc_auc_score
    
### Logistic
def Logistic(train_x, train_y, test_x, test_y, C, seed):
    """Assumes train x and train y don't have person id or screening date 
    """
    train_x, train_y, test_x, test_y = train_x.copy(), train_y.copy(), test_x.copy(), test_y.copy()
    # TODO: extract this stuff out into a function    
    
    ### model & parameters
    lr = LogisticRegression(class_weight = 'balanced', solver='liblinear', random_state=seed)
    cross_validation = KFold(n_splits=5,shuffle=True, random_state=seed)
    c_grid = {"C": C}
    
    ### cross validation
    clf = GridSearchCV(estimator=lr, param_grid=c_grid, scoring='roc_auc',
                       cv=cross_validation, return_train_score=True).fit(train_x.drop(['person_id',
                                                                                       'screening_date',
                                                                                       'race'], 
                                                                                      axis=1), 
                                                                         train_y)
    train_score = clf.cv_results_['mean_train_score']
    test_score = clf.cv_results_['mean_test_score']
    test_std = clf.cv_results_['std_test_score']
    
    ### scores
    best_auc = clf.best_score_
    best_std = test_std[np.where(test_score == clf.best_score_)[0][0]]
    best_param = clf.best_params_
    auc_diff = train_score[np.where(test_score == clf.best_score_)[0][0]] - clf.best_score_
    
    ### holdout test
    lr = LogisticRegression(class_weight = 'balanced', solver='liblinear', random_state=seed, C=best_param['C']).fit(train_x.drop(['person_id',
                                                                                                                                   'screening_date',
                                                                                                                                    'race'], 
                                                                                                                                  axis=1),
                                                                                                                     train_y)
    holdout_prob = lr.predict_proba(test_x.drop(['person_id',
                                                 'screening_date',
                                                 'race'], 
                                                  axis=1))[:,1]
    holdout_pred = lr.predict(test_x.drop(['person_id',
                                           'screening_date',
                                           'race'], 
                                            axis=1))
    holdout_auc = roc_auc_score(test_y, holdout_prob)
    
    ### compute fairness
    test_x.loc[:,"score"] = holdout_pred
    test_x.loc[:,"label_value"] = test_y
    test_x['entity_id'] = test_x['person_id'].map(str) + " " + test_x["screening_date"].map(str)
    holdout_attrs = test_x[["entity_id", "sex", "race", "score", "label_value"]]
    
    # some hard-coded attrs                                     
    decoders = {"sex": {0: "male",
                        1: "female"}
           }

    sensitive_attrs = ['sex', 'race'] # race 

    ref_groups_dict = {'sex':'male', 
                       'race': 'Caucasian'}

    holdout_fairness_overview = compute_fairness(holdout_attrs, 
                                decoders=decoders, 
                                sensitive_attrs=sensitive_attrs,
                                ref_groups_dict=ref_groups_dict)
    
    return {'best_param': best_param, 
            'best_validation_auc': best_auc, 
            'best_validation_std': best_std, 
            'best_validation_auc_diff': auc_diff, 
            'holdout_test_auc': holdout_auc,
            'holdout_fairness_overview': holdout_fairness_overview} 

In [57]:
c = [1e-5, 1e-4, 1e-3]

logistic_summary = Logistic(X_train,
                            Y_train,
                            X_test,
                            Y_test,
                            c,
                            816)

res= ["Logistic", 
      logistic_summary['best_validation_auc'], 
      logistic_summary['best_validation_auc_diff'], 
      logistic_summary['best_param'],
      logistic_summary['holdout_fairness_overview']]


model_id, score_thresholds 1 {'rank_abs': [148]}
get_disparity_predefined_group()


In [56]:
res[-1]

Unnamed: 0,attribute_name,attribute_value,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev,ppr_disparity,pprev_disparity,precision_disparity,fdr_disparity,for_disparity,fpr_disparity,fnr_disparity,tpr_disparity,tnr_disparity,npv_disparity,TNR Parity,Statistical Parity,FDR Parity,FPR Parity,Unsupervised Fairness,FOR Parity,Precision Parity,Impact Parity,NPV Parity,Supervised Fairness,TPR Parity,TypeII Parity,TypeI Parity,Equalized Odds,FNR Parity
0,sex,female,0.576159,0.72561,0.349727,0.340909,0.27439,0.423841,0.650273,0.659091,0.891892,0.419048,0.479365,8.25,1.28333,1.31818,0.681818,1.28233,1.09756,0.800589,1.22434,0.96748,0.894126,True,False,False,True,False,False,False,False,True,False,True,False,False,True,True
1,sex,male,0.470588,0.75,0.272727,0.5,0.25,0.529412,0.727273,0.5,0.108108,0.326531,0.346939,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2,race,African-American,0.645455,0.697917,0.367925,0.29,0.302083,0.354545,0.632075,0.71,0.675676,0.485437,0.533981,2.77778,1.71251,1.50353,0.549474,1.28774,1.33553,0.586364,1.63262,0.901923,0.884906,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False
3,race,Asian,,1.0,0.0,,0.0,,1.0,,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,,,1.29231,1.4,False,False,,False,False,False,,False,False,False,,False,False,False,
4,race,Caucasian,0.395349,0.77381,0.285714,0.527778,0.22619,0.604651,0.714286,0.472222,0.243243,0.283465,0.338583,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
5,race,Hispanic,0.545455,0.666667,0.454545,0.333333,0.333333,0.454545,0.545455,0.666667,0.0608108,0.45,0.55,0.25,1.5875,1.41176,0.631579,1.59091,1.47368,0.751748,1.37968,0.861538,0.763636,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,race,Other,0.25,0.666667,0.428571,0.666667,0.333333,0.75,0.571429,0.333333,0.0202703,0.3,0.4,0.0833333,1.05833,0.705882,1.26316,1.5,1.47368,1.24038,0.632353,0.861538,0.8,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True
