# aif360.metrics.BinaryLabelDatasetMetric

In [1]:
# Load all necessary packages
import sys
import pandas as pd
import numpy as np

from IPython.display import Markdown, display

sys.path.insert(1, "../")  
np.random.seed(0)

In [2]:
from aif360.datasets import GermanDataset, StructuredDataset 
from aif360.metrics import BinaryLabelDatasetMetric  #Base class for all structured datasets with binary labels.

#### BinaryLabelDatasetMetric


Parameters:	
<li> dataset (BinaryLabelDataset) – A BinaryLabelDataset.
<li> privileged_groups (list(dict)) – Privileged groups. Format is a list of dicts where the keys are protected_attribute_names and the values are values in protected_attributes. Each <li> dict element describes a single group. See examples for more details.
<li> unprivileged_groups (list(dict)) – Unprivileged groups in the same format as privileged_groups.

Raises:	
<li> TypeError – dataset must be a BinaryLabelDataset type.

In [3]:
german = GermanDataset(
    protected_attribute_names=['age'],           # this dataset also contains protected
                                                 # attribute for "sex" which we do not
                                                 # consider in this evaluation
    privileged_classes=[lambda x: x >= 25],      # age >=25 is considered privileged
    features_to_drop=['personal_status', 'sex'] # ignore sex-related attributes
)

p = [{'age': 1}] #, {'sex': 0}] 
u = [{'age': 0}]
bdm = BinaryLabelDatasetMetric(german, unprivileged_groups=u, privileged_groups=p)

In [4]:
df = StructuredDataset.convert_to_dataframe(german)
df[0].head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,status=A11,status=A12,status=A13,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
0,6.0,1169.0,4.0,4.0,1.0,2.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
1,48.0,5951.0,2.0,2.0,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0
2,12.0,2096.0,2.0,3.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,42.0,7882.0,2.0,4.0,1.0,1.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
4,24.0,4870.0,3.0,4.0,1.0,2.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0


In [5]:
df[1].keys()

dict_keys(['feature_names', 'label_names', 'protected_attribute_names', 'instance_names', 'instance_weights', 'privileged_protected_attributes', 'unprivileged_protected_attributes'])

In [6]:
df[1]['label_names']

['credit']

In [7]:
df[1]['protected_attribute_names']

['age']

In [8]:
df[1]['privileged_protected_attributes']

[array([1.])]

In [9]:
bdm.num_instances(privileged=None) #Compute the number of instances, n, in the dataset conditioned on protected attributes if necessary.

1000.0

In [10]:
bdm.num_instances(True)

851.0

In [11]:
bdm.num_instances(False)

149.0

In [12]:
def privilege_value(privileged=False) -> int:   # -> int just tells that f() returns an integer 
    if privileged:
        return bdm.num_instances(True)
    else:
        return bdm.num_instances(False)

metric_fun = privilege_value

In [13]:
bdm.ratio(metric_fun) #Compute ratio of the metric for unprivileged and privileged groups.

0.17508813160987075

In [14]:
bdm.difference(metric_fun) #Compute difference of the metric for unprivileged and privileged groups.

-702.0

## Base Rate

<img src="images/base_rate.JPG" style="height:180px" >

In [15]:
bdm.base_rate(privileged = True)

0.7191539365452408

In [16]:
bdm.base_rate(privileged = False)

0.5906040268456376

In [17]:
bdm.base_rate(privileged = None)

0.7

### Consistency
Individual fairness metric from that measures how similar the labels are for similar instances.

<img src = "images/consistency.JPG" style="height:180px" >

In [18]:
bdm.consistency(n_neighbors = 5)

array([0.6816])

## Disparate_impact

<img src = "images/disparate_impact.JPG" style="height:180px" >

In [19]:
bdm.disparate_impact()

0.8212484098784929

## statistical_parity_difference() == mean_difference()

Pr(Y=1|D=unprivileged)−Pr(Y=1|D=privileged)

In [20]:
bdm. mean_difference()

-0.12854990969960323

## num_negatives

<img src = "images/num_negatives.JPG" style="height:380px" >

In [38]:
bdm.num_positives(privileged= True) / bdm.num_instances(True)

0.7191539365452408

In [39]:
bdm.base_rate(privileged = True)

0.7191539365452408

In [40]:
bdm.num_positives(privileged= False) / bdm.num_instances(False)

0.5906040268456376

In [41]:
bdm.base_rate(privileged = False)

0.5906040268456376