# aif360.metrics.ClassificationMetric

In [1]:
# Load all necessary packages
import sys
import pandas as pd
import numpy as np
import random

from IPython.display import Markdown, display

sys.path.insert(1, "../")  
np.random.seed(0)

In [2]:
from aif360.datasets import GermanDataset, StructuredDataset, StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
'''
*** ClassificationMetric ***
 
Class for computing metrics based on TWO BinaryLabelDatasets.

The first dataset is the original one and the second is the output of the classification transformer (or similar).
'''

'\n*** ClassificationMetric ***\n \nClass for computing metrics based on TWO BinaryLabelDatasets.\n\nThe first dataset is the original one and the second is the output of the classification transformer (or similar).\n'

#### ClassificationMetric

###### Parameters:
<li> dataset (BinaryLabelDataset) – Dataset containing ground-truth labels.</li>
<li> classified_dataset (BinaryLabelDataset) – Dataset containing predictions.</li>
<li> privileged_groups (list(dict)) – Privileged groups. Format is a list of dicts where the keys are protected_attribute_names and the values are values in protected_attributes. Each dict element describes a single group. </li>
<li> unprivileged_groups (list(dict)) – Unprivileged groups in the same format as privileged_groups.</li>

###### Raises:
<li> TypeError – dataset and classified_dataset must be BinaryLabelDataset types.</li>

In [3]:
# Dataset containing ground-truth labels.
german = GermanDataset(
    label_name='credit',
    protected_attribute_names=['age'],           # this dataset also contains protected
                                                 # attribute for "sex" which we do not
                                                 # consider in this evaluation
    privileged_classes=[lambda x: x >= 25],      # age >=25 is considered privileged
    
    features_to_drop=['personal_status', 'sex']
)



In [4]:
df = StructuredDataset.convert_to_dataframe(german)

print(df[0]['credit'].describe())

for i in range(len(df[0]['credit'])):
    df[0]['credit'][i] = random.randint(1,2)

print(df[0]['credit'].describe())

count    1000.000000
mean        1.300000
std         0.458487
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64
count    1000.000000
mean        1.498000
std         0.500246
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64


In [14]:
# Dataset containing predictions.
german1 = StandardDataset(df[0], label_name='credit', favorable_classes=[1], protected_attribute_names= ['age'], privileged_classes=[lambda x: x >= 1])

df1 = StructuredDataset.convert_to_dataframe(german1)

df1[0]['credit'].describe()

count    1000.000000
mean        1.498000
std         0.500246
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64

In [15]:
german1.align_datasets(german) #Align the other dataset features, labels and protected_attributes to this dataset.

p = [{'age': 1}] #, {'sex': 0}] 
u = [{'age': 0}]

cm = ClassificationMetric(german, german1, unprivileged_groups=u, privileged_groups=p)


## Compute the number of true/false positives/negatives, optionally conditioned on protected attributes.

In [16]:
cm.binary_confusion_matrix(privileged=None)

{'TP': 339.0, 'FP': 163.0, 'TN': 137.0, 'FN': 361.0}

In [17]:
cm.binary_confusion_matrix(privileged=True)

{'TP': 301.0, 'FP': 129.0, 'TN': 110.0, 'FN': 311.0}

In [28]:
cm.generalized_binary_confusion_matrix() #Compute the number of generalized true/false positives/negatives, optionally conditioned on protected attributes. Generalized counts are based on scores and not on the hard predictions.

{'GTP': 339.0, 'GFP': 163.0, 'GTN': 137.0, 'GFN': 361.0}

## Compute various performance measures on the dataset, optionally conditioned on protected attributes.

In [18]:
cm.performance_measures(privileged=None)

{'TPR': 0.48428571428571426,
 'TNR': 0.45666666666666667,
 'FPR': 0.5433333333333333,
 'FNR': 0.5157142857142857,
 'GTPR': 0.48428571428571426,
 'GTNR': 0.45666666666666667,
 'GFPR': 0.5433333333333333,
 'GFNR': 0.5157142857142857,
 'PPV': 0.6752988047808764,
 'NPV': 0.2751004016064257,
 'FDR': 0.3247011952191235,
 'FOR': 0.7248995983935743,
 'ACC': 0.476}

In [19]:
cm.performance_measures(privileged=True)

{'TPR': 0.4918300653594771,
 'TNR': 0.4602510460251046,
 'FPR': 0.5397489539748954,
 'FNR': 0.5081699346405228,
 'GTPR': 0.4918300653594771,
 'GTNR': 0.4602510460251046,
 'GFPR': 0.5397489539748954,
 'GFNR': 0.5081699346405228,
 'PPV': 0.7,
 'NPV': 0.26128266033254155,
 'FDR': 0.3,
 'FOR': 0.7387173396674585,
 'ACC': 0.48296122209165687}

### Bias amplification is the difference in smoothed EDF between the classifier and the original dataset. 

In [20]:
cm.differential_fairness_bias_amplification(concentration=1.0) #Positive values mean the bias increased due to the classifier.

-0.33302546142046585

## Generalized entropy index is proposed as a unified individual and group fairness measure in b_i = hat{y}_i - y_i + 1. https://las.inf.ethz.ch/files/speicher2018a.pdf

In [23]:
cm0 = ClassificationMetric(german, german, unprivileged_groups=u, privileged_groups=p)

cm0.generalized_entropy_index()

0.0

In [26]:
cm.generalized_entropy_index()

0.37686021853097906