# Kamishima

In [None]:
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from aif360.datasets import StandardDataset
from aif360.algorithms.inprocessing import PrejudiceRemover
from helpers.fairness_measures import *
from helpers.finance import preprocess
from helpers.plot import group_box_plots, group_roc_curves
from sklearn.neural_network import MLPClassifier

In [None]:
from helpers import export_plot

## Load data

In [None]:
artifacts_dir = Path("../../../artifacts")

In [None]:
# override data_dir in source notebook
# this is stripped out for the hosted notebooks
artifacts_dir = Path("../../../../artifacts")

Location of the data

In [None]:
data_dir = artifacts_dir / "data" / "adult"
preprocess(data_dir)

In [None]:
train = pd.read_csv(data_dir / "processed" / "train-one-hot.csv")
val = pd.read_csv(data_dir / "processed" / "val-one-hot.csv")
test = pd.read_csv(data_dir / "processed" / "test-one-hot.csv")

In [None]:
train_sds = StandardDataset(
    train,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
test_sds = StandardDataset(
    test,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)
val_sds = StandardDataset(
    val,
    label_name="salary",
    favorable_classes=[1],
    protected_attribute_names=["sex"],
    privileged_classes=[[1]],
)

## Perform intervention

We load a pretrained model from disk as the intervention is a little slow, but below is the code we used to train it.

In [None]:
PR = PrejudiceRemover(
    eta=5.0, sensitive_attr="sex", class_attr="salary"
)
PR.fit(train_sds)

In [None]:
val_sds_pred = PR.predict(val_sds)
val_scores = val_sds_pred.scores.flatten()

In [None]:
print("Accuracy =", accuracy(val_scores, val.salary))
print(
    "Female accuracy =",
    accuracy(val_scores[val.sex == 0], val.salary[val.sex == 0]),
)
print(
    "Male accuracy =",
    accuracy(val_scores[val.sex == 1], val.salary[val.sex == 1]),
)
print("Mean female score =", val_scores[val.sex == 0].mean())
print("Mean male score =", val_scores[val.sex == 1].mean())

### Plots

In [None]:
dp_box = go.Figure(
    data=[
        go.Box(
            x=[sex] * (val.sex == sex).sum(),
            y=val_scores[val.sex == sex],
            name="Male" if sex else "Female",
        )
        for sex in range(2)
    ]
)
dp_box

In [None]:
export_plot(dp_box, "kamishima-dp.json")