In [11]:
import pandas as pd
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset, BinaryLabelDataset
import numpy as np
import random

In [12]:
# Fake dataframe for exploration purposes, adapted from https://stackoverflow.com/questions/64506977/calculate-group-fairness-metrics-with-aif360/64543058#64543058 
df_testing = pd.DataFrame({
    'gender': [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
    'race':   [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2],
    'experience' : [random.random() for _ in range(12)]
})

df_true_labels = pd.DataFrame({'income': [0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]})
df_pred_labels = np.array([[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]], dtype=np.float64)

In [13]:
# true_labels = BinaryLabelDataset(df=pd.concat([df_testing, df_true_labels], axis=1))
# pred_labels = BinaryLabelDataset(df=pd.concat([df_testing, df_pred_labels], axis=1))

dataset_true = StandardDataset(pd.concat([df_testing, df_true_labels], axis=1), 
                          label_name='income', 
                          favorable_classes=[1],
                          protected_attribute_names=['gender', 'race'], 
                          privileged_classes=[[1], [1, 2]]
                          )

dataset_pred = dataset_true.copy()
dataset_pred.labels = df_pred_labels

In [14]:
print(dataset_true.protected_attribute_names)
print(dataset_true.privileged_protected_attributes)
print(dataset_pred.unprivileged_protected_attributes)

['gender', 'race']
[array([1.]), array([1., 2.])]
[array([0.]), array([0.])]


Trying single attribute bias

In [15]:
attr = dataset_pred.protected_attribute_names[0]
attr

'gender'

In [16]:
idx = dataset_pred.protected_attribute_names.index(attr)
idx

0

In [17]:
privileged_groups =  [{attr:dataset_pred.privileged_protected_attributes[idx][0]}] 
privileged_groups

[{'gender': 1.0}]

In [18]:
unprivileged_groups = [{attr:dataset_pred.unprivileged_protected_attributes[idx][0]}]
unprivileged_groups

[{'gender': 0.0}]

In [19]:
class_metric = ClassificationMetric(dataset_true, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

In [20]:
class_metric.statistical_parity_difference()

-1.0

Trying multiple attribute bias 

In [21]:

privileged_groups = [{name : attribute for name, attribute in zip(dataset_true.protected_attribute_names, dataset_true.privileged_protected_attributes)}] 
privileged_groups

[{'gender': array([1.]), 'race': array([1., 2.])}]

In [22]:
unprivileged_groups = [{name : attribute for name, attribute in zip(dataset_true.protected_attribute_names, dataset_true.unprivileged_protected_attributes)}] 
unprivileged_groups

[{'gender': array([0.]), 'race': array([0.])}]

This causes an exception

In [23]:
# class_metric = ClassificationMetric(dataset_pred, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
# class_metric.statistical_parity_difference()

ValueError: operands could not be broadcast together with shapes (12,) (2,) 