# Pleiss

In [None]:
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from aif360.datasets import StandardDataset
from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import (
    CalibratedEqOddsPostprocessing,
)
from helpers.fairness_measures import accuracy, equalised_odds_p
from helpers.finance import preprocess
from helpers.plot import group_roc_curves

In [None]:
from helpers import export_plot

## Load data

In [None]:
artifacts_dir = Path("../../../artifacts")

In [None]:
# override data_dir in source notebook
# this is stripped out for the hosted notebooks
artifacts_dir = Path("../../../../artifacts")

Location of the data

In [None]:
data_dir = artifacts_dir / "data" / "adult"
preprocess(data_dir)

In [None]:
train = pd.read_csv(data_dir / "processed" / "train-one-hot.csv")
val = pd.read_csv(data_dir / "processed" / "val-one-hot.csv")
test = pd.read_csv(data_dir / "processed" / "test-one-hot.csv")

In [None]:
train_sds = StandardDataset(
    train,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
test_sds = StandardDataset(
    test,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
val_sds = StandardDataset(
    val,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
index = train_sds.feature_names.index("sex")

In [None]:
privileged_groups = [{"sex": 1.0}]
unprivileged_groups = [{"sex": 0.0}]

## Load original model

In [None]:
model = joblib.load(artifacts_dir / "models" / "finance" / "baseline.pkl")
test_probs = model.predict_proba(test.drop("salary", axis=1))[:, 1]

In [None]:
test_sds_pred = test_sds.copy(deepcopy=True)
test_sds_pred.scores = test_probs.reshape(-1, 1)

## Perform intervention

In [None]:
cost_constraint = "fnr"

In [None]:
# Learn parameters to equalize odds and apply to create a new dataset
cpp = CalibratedEqOddsPostprocessing(
    privileged_groups=privileged_groups,
    unprivileged_groups=unprivileged_groups,
    cost_constraint=cost_constraint,
    seed=42,
)
cpp = cpp.fit(test_sds, test_sds_pred)
test_sds_pred_tranf = cpp.predict(test_sds_pred)

## Analyse accuracy and fairness

In [None]:
sex_mask = test.sex == 1
salary_mask = test.salary == 1

fnr = np.abs(
    test_sds_pred_tranf.scores[salary_mask & sex_mask].mean()
    - test_sds_pred_tranf.scores[salary_mask & ~sex_mask].mean()
)
fpr = np.abs(
    test_sds_pred_tranf.scores[~salary_mask & sex_mask].mean()
    - test_sds_pred_tranf.scores[~salary_mask & ~sex_mask].mean()
)

In [None]:
print(
    "Accuracy =", accuracy(test_sds_pred_tranf.scores.flatten(), test.salary)
)
print(
    "Female accuracy =",
    accuracy(
        test_sds_pred_tranf.scores.flatten()[test.sex == 0],
        test.salary[test.sex == 0],
    ),
)
print(
    "Male accuracy =",
    accuracy(
        test_sds_pred_tranf.scores.flatten()[test.sex == 1],
        test.salary[test.sex == 1],
    ),
)
print("FNR =", fnr)
print("FPR =", fpr)

In [None]:
print(
    "Equalised odds = ",
    equalised_odds_p(
        test_sds_pred_tranf.scores.flatten(), test.sex, test.salary
    ),
)

### Plots

In [None]:
group_roc_curves(test.salary, test_sds_pred_tranf.scores, test.sex)

In [None]:
eo_calib_bar = go.Figure(
    data=[
        go.Bar(
            x=[label],
            y=[
                test_sds_pred_tranf.scores[
                    (test.sex == sex) & (test.salary == label)
                ].mean()
            ],
            name="Male" if sex else "Female",
        )
        for label in range(2)
        for sex in range(2)
    ]
)
eo_calib_bar

In [None]:
export_plot(eo_calib_bar, "pleiss-eo-calib.json")