In [18]:
import pandas as pd
import numpy as np
from tempeh.configurations import datasets

compas_dataset = datasets['compas']()
X_train, X_test = compas_dataset.get_X(format=pd.DataFrame)
y_train, y_test = compas_dataset.get_y(format=pd.Series)
sensitive_features_train, sensitive_features_test = compas_dataset.get_sensitive_features('race', format=pd.Series)
X_train.loc[0], y_train[0]

(age                        25.000000
 juv_fel_count               0.000000
 juv_misd_count             -2.340451
 juv_other_count             1.000000
 priors_count              -15.010999
 age_cat_25 - 45             1.000000
 age_cat_Greater than 45     0.000000
 age_cat_Less than 25        0.000000
 c_charge_degree_F           0.000000
 c_charge_degree_M           1.000000
 Name: 0, dtype: float64, 1.0)

In [19]:
# This wrapper around the estimator serves the purpose of mapping the predict
# method to predict_proba so that we can use real values to get more accurate estimates.
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator
    
    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator).fit(X, y)
        return self
    
    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:,1]
        return scores

In [36]:
from sklearn.linear_model import LogisticRegression
from fairlearn.postprocessing import ThresholdOptimizer
from copy import deepcopy
s = sensitive_features_train
s[sensitive_features_train == "African-American"]=0
s[sensitive_features_train == "Caucasian"]=1


group = pd.DataFrame(data=[y_train,s]).T
estimator = LogisticRegression(solver='liblinear')
estimator_wrapper = LogisticRegressionAsRegression(estimator).fit(X_train, y_train)

# Solve thet optimal policy equation by training a classifier
# ********Unconstrainted policy********
estimator.fit(X_train, y_train)

# ********EqOpt********
postprocessed_predictor_EO = ThresholdOptimizer(
    estimator=estimator_wrapper,
    constraints="equalized_odds",
    prefit=True)

# ********DP********
postprocessed_predictor_DP = ThresholdOptimizer(
    estimator=estimator_wrapper,
    constraints="demographic_parity",
    prefit=True)


predictions_train = estimator.predict(X_train)

postprocessed_predictor_EO.fit(X_train, y_train, sensitive_features=sensitive_features_train)
fairness_aware_predictions_EO_train = postprocessed_predictor_EO.predict(X_train, sensitive_features=sensitive_features_train)

postprocessed_predictor_DP.fit(X_train, y_train, sensitive_features=sensitive_features_train)
fairness_aware_predictions_DP_train = postprocessed_predictor_EO.predict(X_train, sensitive_features=sensitive_features_train)

