Let us predict who is earning more than 50.000 dollar.
Those people will be eligible to stay at our XXX lounge.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from shap.datasets import adult  # shap is only used as dataset utility


In [None]:
X, y_true = adult()
X.columns = [col.lower() for col in X.columns]
X.head()

# split train test

In [None]:
from fairtalk.plots import plot_perc_true_sex

y_true = y_true * 1 
sex = X['sex'].apply(lambda sex: "female" if sex == 0 else "male")

plot_perc_true_sex(y_true, sex)

### Fit an unaware model

In [None]:
from sklearn.linear_model import LogisticRegression
numeric_columns = ['age', 'capital gain', 'education-num', 'capital loss', 'hours per week']

classifier = LogisticRegression()
classifier.fit(X[numeric_columns], y_true)

y_pred = classifier.predict(X[numeric_columns])
y_prob = classifier.predict_proba(X[numeric_columns])

## Evaluate the fit

In [None]:
from fairlearn.metrics import group_summary
from sklearn.metrics import accuracy_score
group_summary(accuracy_score, y_true, y_pred, sensitive_features=sex)


In [None]:
from fairlearn.metrics import selection_rate, ratio_from_summary
selection_rate_summary = group_summary(selection_rate, y_true, y_pred, sensitive_features=sex)
selection_rate_summary

In [None]:
ratio_from_summary(selection_rate_summary)
# Thus one group is twice as much selected as the other

In [None]:
from fairlearn.metrics import equalized_odds_difference, equalized_odds_ratio
# Thus the probability barely of true positive and false negative barely changes if the protetected attribute 
# changes value

eo_diff = equalized_odds_difference(y_true, y_pred, sensitive_features=sex)
eo_ratio = equalized_odds_ratio(y_true, y_pred, sensitive_features=sex)




In [None]:
from fairlearn.metrics import equalized_odds_ratio
equalized_odds_ratio(y_true, y_pred, sensitive_features=sex)


In [None]:
from fairlearn.metrics import true_positive_rate, false_positive_rate
from fairtalk.summary import flatten_group_summary

tp = group_summary(true_positive_rate, y_true, y_pred, sensitive_features=sex)
fp = group_summary(false_positive_rate, y_true, y_pred, sensitive_features=sex)

pd.DataFrame([
    flatten_group_summary(tp),
    flatten_group_summary(fp)
], index=['tp', 'fp']).assign(ratio = lambda x: x['female'] / x['male'])

In [None]:
ratio_from_summary(tp)


In [None]:
# Accuracy (TP + TN)/ All: The model is better at assesing most appropriate category of women
# Balanced accuracy: If the smallest category (positive) gets more weight, weighed accuracy approx the same
# Precision TP/ (TP + FP): More women (+/- 50%) vs men (+/- 25%) are wrongly chosen for the True category
# Recall TP/ (TP + FN): More women are forgot
# Selection rate (TP + FP)/ P: Men are twice as likely to be selected

from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=sex,
                   sensitive_feature_names=['sex'],
                   y_true=y_true,
                   y_pred={"initial model": y_pred})

## Mitigate disparity using postprocessing

In [None]:
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError


class LogisticRegressionAsRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, logistic_regression_estimator):
        self.logistic_regression_estimator = logistic_regression_estimator

    def fit(self, X, y):
        try:
            check_is_fitted(self.logistic_regression_estimator)
            self.logistic_regression_estimator_ = self.logistic_regression_estimator
        except NotFittedError:
            self.logistic_regression_estimator_ = clone(
                self.logistic_regression_estimator
            ).fit(X, y)
        return self
    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores


estimator_wrapper = LogisticRegressionAsRegression(classifier).fit(X[numeric_columns], y_true)

postprocessed_predictor_DP = ThresholdOptimizer(
    estimator=estimator_wrapper, constraints="demographic_parity", prefit=True
)

postprocessed_predictor_DP.fit(
    X[numeric_columns], y_true, sensitive_features=sex
)

fairness_aware_predictions_DP_train = postprocessed_predictor_DP.predict(
    X[numeric_columns], sensitive_features=sex
)
# fairness_aware_predictions_EO_test = postprocessed_predictor_EO.predict(
#     X_test, sensitive_features=sensitive_features_test
# )



In [None]:
FairlearnDashboard(sensitive_features=sex,
                   sensitive_feature_names=['sex'],
                   y_true=y_true,
                   y_pred={
                       "initial model": y_pred, 
                       "mitigated_model": fairness_aware_predictions_DP_train
                          })


## Steps of theshold optimizer postprocessing

We compute the ROC charts per group

In [None]:
from fairlearn.postprocessing._threshold_optimizer import _reformat_and_group_data
from fairtalk.plots import get_roc_points, plot_roc, plot_convex_hull_interpolation, plot_overall_tradeoff
from fairtalk.plots import plot_creation_convex_hull

data_grouped_by_sensitive_feature = _reformat_and_group_data(sex, y_true, y_prob[:,1])
roc_points = get_roc_points(data_grouped_by_sensitive_feature)
plt1 = plot_roc(data_grouped_by_sensitive_feature, roc_points)


For a lower threshold (operation) we will have more false positives (x) and true positives (y)

In [None]:
(
    roc_points
    .get('male')
    .rename(columns={"x": "False positives", "y": "True positives"})
    .head(n=5)
)

Based on the classifications (true and false positives/ negatives), we can calculate the the selection rate and error (1 - accuracy) for each group.


Then, we can determine the convex Hull

In [None]:
plot_creation_convex_hull(sex, y_true, y_prob[:,1])

Subsequently, we interpolate the convex hull with a grid a of m selection rates.

In [None]:
plot_convex_hull_interpolation(sex, y_true, y_prob[:,1])

This gives us a dictionary with which operations/ thresholds we can achieve a certain selection rate/ error for each group.

#### Trade off plot

We pick a selection rate for which the weighted sum of errors is minimal. Thus
different groups will have different thresholds 

In [None]:
plot_overall_tradeoff(sex, y_true, y_prob[:,1])

In order to obtain same selection rate for all groups, 
the base predictions are reweighted based on the threshold neighbours of the selection rate.

In [None]:
for (
    group,
    interpolation,
) in postprocessed_predictor_DP._post_processed_predictor_by_sensitive_feature.items():
    print("{}:".format(group))
    print("\n ".join(interpolation.__repr__().split(",")))
    print("-----------------------------------")