In [26]:
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

from sklearn.neural_network import MLPClassifier

from helpers.plot import group_box_plots, group_roc_curves
from helpers.fairness_measures import *
from helpers.finance import preprocess
from helpers import export_plot

from aif360.datasets import StandardDataset
from aif360.algorithms.postprocessing.reject_option_classification import (
    RejectOptionClassification,
)

In [7]:
artifacts_dir = Path("../artifacts")

In [8]:
# override data_dir in source notebook
# this is stripped out for the hosted notebooks
artifacts_dir = Path("../../artifacts")

In [9]:
data_dir = artifacts_dir / "data" / "adult"
preprocess(data_dir)

## Load data

In [10]:
train = pd.read_csv(data_dir / "processed" / "train-one-hot.csv").sample(6000)
val = pd.read_csv(data_dir / "processed" / "val-one-hot.csv").sample(2000)
test = pd.read_csv(data_dir / "processed" / "test-one-hot.csv").sample(2000)

In [11]:
train_sds = StandardDataset(
    train,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
test_sds = StandardDataset(
    test,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
val_sds = StandardDataset(
    val,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)

In [12]:
privileged_groups = [{"sex": 1.0}]
unprivileged_groups = [{"sex": 0.0}]

## Train original model

In [13]:
model = MLPClassifier(hidden_layer_sizes=(100, 100), early_stopping=True)
model.fit(train.drop("salary", axis=1), train.salary)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [14]:
val_scores = model.predict_proba(val.drop("salary", axis=1))[:, 1]

In [15]:
print("Original model accuracy =", accuracy(val_scores, val.salary))
print(
    "Female accuracy =",
    accuracy(val_scores[val.sex == 0], val.salary[val.sex == 0]),
)
print(
    "Male accuracy =",
    accuracy(val_scores[val.sex == 1], val.salary[val.sex == 1]),
)
print("Mean female score =", val_scores[val.sex == 0].mean())
print("Mean male score =", val_scores[val.sex == 1].mean())

Original model accuracy = 0.843
Female accuracy = 0.9252336448598131
Male accuracy = 0.8041237113402062
Mean female score = 0.10818829128093678
Mean male score = 0.297332645471104


In [16]:
val_sds_pred = val_sds.copy(deepcopy=True)
val_sds_pred.scores = val_scores.reshape(-1, 1)

## Perform intervention

### Find best threshold for classification only

In [17]:
from aif360.metrics import ClassificationMetric

num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):

    fav_inds = val_sds_pred.scores > class_thresh
    val_sds_pred.labels[fav_inds] = val_sds_pred.favorable_label
    val_sds_pred.labels[~fav_inds] = val_sds_pred.unfavorable_label

    classified_metric_orig_valid = ClassificationMetric(
        val_sds,
        val_sds_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
    )

    ba_arr[idx] = 0.5 * (
        classified_metric_orig_valid.true_positive_rate()
        + classified_metric_orig_valid.true_negative_rate()
    )

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

print(
    "Best balanced accuracy (no fairness constraints) = %.4f" % np.max(ba_arr)
)
print(
    "Optimal classification threshold (no fairness constraints) = %.4f"
    % best_class_thresh
)

Best balanced accuracy (no fairness constraints) = 0.8206
Optimal classification threshold (no fairness constraints) = 0.2179


In [18]:
# Metric used (should be one of allowed_metrics)
metric_name = "Statistical parity difference"

# Upper and lower bound on the fairness metric used
metric_ub = 0.05
metric_lb = -0.05

### Estimate optimal parameters in ROC

In [19]:
ROC = RejectOptionClassification(
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
    low_class_thresh=0.01,
    high_class_thresh=0.99,
    num_class_thresh=100,
    num_ROC_margin=50,
    metric_name=metric_name,
    metric_ub=metric_ub,
    metric_lb=metric_lb,
)
ROC = ROC.fit(val_sds, val_sds_pred)

In [20]:
print(
    "Optimal classification threshold (with fairness constraints) = %.4f"
    % ROC.classification_threshold
)
print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

Optimal classification threshold (with fairness constraints) = 0.1981
Optimal ROC margin = 0.1617


### Predictions from validation set

In [21]:
# Metrics for the test set
fav_inds = val_sds_pred.scores > best_class_thresh
val_sds_pred.labels[fav_inds] = val_sds_pred.favorable_label
val_sds_pred.labels[~fav_inds] = val_sds_pred.unfavorable_label

## Apply intervention

In [22]:
# Transform the validation set
val_sds_pred_transf = ROC.predict(val_sds_pred).copy(deepcopy=True)

## Analyse fairness and accuracy

In [23]:
print("Accuracy =", accuracy(val_sds_pred_transf.labels.flatten(), val.salary))
print(
    "Female accuracy =",
    accuracy(
        val_sds_pred_transf.labels.flatten()[val.sex == 0],
        val.salary[val.sex == 0],
    ),
)
print(
    "Male accuracy =",
    accuracy(
        val_sds_pred_transf.labels.flatten()[val.sex == 1],
        val.salary[val.sex == 1],
    ),
)
print(
    "Mean female score =",
    val_sds_pred_transf.labels.flatten()[val.sex == 0].mean(),
)
print(
    "Mean male score =",
    val_sds_pred_transf.labels.flatten()[val.sex == 1].mean(),
)

Accuracy = 0.7985
Female accuracy = 0.7897196261682243
Male accuracy = 0.8026509572901326
Mean female score = 0.30218068535825543
Mean male score = 0.3483063328424153


### Plots

In [25]:
fig_meanscore_fair_by_sex = go.Figure(
    data=[
        go.Bar(
            x=[sex],
            y=[val_sds_pred_transf.labels.flatten()[val.sex == sex].mean()],
            name="Male" if sex else "Female",
        )
        for sex in range(2)
    ]
)
fig_meanscore_fair_by_sex.show()

In [27]:
export_plot(fig_meanscore_fair_by_sex, "fig_meanscore_fair_by_sex.json")