# Imports

In [1]:
import random
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from aif360.datasets import GermanDataset
from aif360.algorithms.preprocessing import Reweighing, OptimPreproc, LFR, DisparateImpactRemover
from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools
from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions  import get_distortion_german, get_distortion_adult
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_german, load_preproc_data_adult

from FairBoost import FairBoost, Bootstrap_type

2022-03-22 12:07:16.870536: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-22 12:07:16.870560: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Debug

In [2]:
# !pip install ipdb
import  ipdb

In [3]:
%pdb off

Automatic pdb calling has been turned OFF


# Code

In [4]:
dataset_orig = load_preproc_data_adult()
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

In [5]:
optim_options = {
    "distortion_fun": get_distortion_adult,
    "epsilon": 0.05,
    "clist": [0.99, 1.99, 2.99],
    "dlist": [.1, 0.05, 0] 
    }   


privileged_groups = [{'sex': 1.0}]
unprivileged_groups = [{'sex': 0.0}]
pp2 = OptimPreproc(OptTools, optim_options,
                  unprivileged_groups = unprivileged_groups,
                  privileged_groups = privileged_groups)

Privileged and unprivileged groups specified will not be used. The protected attributes are directly specified in the data preprocessing function. The current implementation automatically adjusts for discrimination across all groups. This can be changed by changing the optimization code.


In [6]:

## TODO: Fairboost does not support Reweighing!!
pp1 = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)


In [7]:
pp3 = LFR(unprivileged_groups=unprivileged_groups,
         privileged_groups=privileged_groups,
         k=10, Ax=0.1, Ay=1.0, Az=2.0,
         verbose=1
        )

In [8]:
pp4 = DisparateImpactRemover(repair_level=.5)
pp4.transform = pp4.fit_transform

In [9]:
pp = (pp1,pp2,pp3,pp4)

In [10]:
model = LogisticRegression()

ens = FairBoost(model, pp, bootstrap_type=Bootstrap_type.DEFAULT)
ens = ens.fit(dataset_orig_train)
y_pred = ens.predict(dataset_orig_test)
accuracy_score(y_pred, dataset_orig_test.labels)

Reweighing
OptimPreproc
LFR
DisparateImpactRemover
Reweighing
OptimPreproc
LFR
DisparateImpactRemover


0.7924657066812257

In [11]:
model = LogisticRegression()
model = model.fit(dataset_orig_train.features, dataset_orig_train.labels.ravel())
y_pred = model.predict(dataset_orig_test.features)
accuracy_score(y_pred, dataset_orig_test.labels)

0.8048181259810278

DIR

In [18]:
pp_ = DisparateImpactRemover(repair_level=1)
dataset_orig_train_m = pp_.fit_transform(dataset_orig_train)
y = dataset_orig_test.labels
dataset_orig_test_m = pp_.fit_transform(dataset_orig_test)
model = LogisticRegression()
model = model.fit(dataset_orig_train_m.features, dataset_orig_train_m.labels.ravel())
y_pred = model.predict(dataset_orig_test_m.features)
accuracy_score(y_pred, y)

0.8048181259810278

OPTIM


In [20]:
optim_options = {
    "distortion_fun": get_distortion_adult,
    "epsilon": 0.05,
    "clist": [0.99, 1.99, 2.99],
    "dlist": [.1, 0.05, 0] 
    }   


privileged_groups = [{'sex': 1.0}]
unprivileged_groups = [{'sex': 0.0}]
pp_ = OptimPreproc(OptTools, optim_options,
                  unprivileged_groups = unprivileged_groups,
                  privileged_groups = privileged_groups)

dataset_orig_train_m = pp_.fit_transform(dataset_orig_train)
y = dataset_orig_test.labels
dataset_orig_test_m = pp_.transform(dataset_orig_test)
model = LogisticRegression()
model = model.fit(dataset_orig_train_m.features, dataset_orig_train_m.labels.ravel())
y_pred = model.predict(dataset_orig_test_m.features)
accuracy_score(y_pred, y)

Privileged and unprivileged groups specified will not be used. The protected attributes are directly specified in the data preprocessing function. The current implementation automatically adjusts for discrimination across all groups. This can be changed by changing the optimization code.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.
This code path has been hit 21 times so far.


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.
This code path has been hit 2

0.7928069337337064

LFR

In [23]:
pp_ = LFR(unprivileged_groups=unprivileged_groups,
         privileged_groups=privileged_groups,
         k=10, Ax=0.1, Ay=1.0, Az=2.0,
         verbose=1
        )
dataset_orig_train_m = pp_.fit_transform(dataset_orig_train)
y = dataset_orig_test.labels
dataset_orig_test_m = pp_.transform(dataset_orig_test)
model = LogisticRegression()
model = model.fit(dataset_orig_train_m.features, dataset_orig_train_m.labels.ravel())
y_pred = model.predict(dataset_orig_test_m.features)
accuracy_score(y_pred, y)

0.7782706612980277

In [30]:
(y_pred == y).all()

False

In [39]:
import numpy as np 
np.argwhere(y_pred != y.ravel())

array([[    9],
       [   10],
       [   11],
       ...,
       [14646],
       [14647],
       [14648]])

In [40]:
y.ravel().shape

(14653,)

In [51]:
(y_pred == y.ravel()).all()

False

In [52]:
y_pred[9]

0.0

In [54]:
y.ravel()[9]

1.0