# aif360.metrics.ClassificationMetric

In [1]:
# Load all necessary packages
import sys
import pandas as pd
import numpy as np
import random

from IPython.display import Markdown, display

sys.path.insert(1, "../")  
np.random.seed(0)

In [2]:
from aif360.datasets import GermanDataset, StructuredDataset, StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
'''
*** ClassificationMetric ***
 
Class for computing metrics based on TWO BinaryLabelDatasets.

The first dataset is the original one and the second is the output of the classification transformer (or similar).
'''

'\n*** ClassificationMetric ***\n \nClass for computing metrics based on TWO BinaryLabelDatasets.\n\nThe first dataset is the original one and the second is the output of the classification transformer (or similar).\n'

#### ClassificationMetric

###### Parameters:
<li> dataset (BinaryLabelDataset) – Dataset containing ground-truth labels.</li>
<li> classified_dataset (BinaryLabelDataset) – Dataset containing predictions.</li>
<li> privileged_groups (list(dict)) – Privileged groups. Format is a list of dicts where the keys are protected_attribute_names and the values are values in protected_attributes. Each dict element describes a single group. </li>
<li> unprivileged_groups (list(dict)) – Unprivileged groups in the same format as privileged_groups.</li>

###### Raises:
<li> TypeError – dataset and classified_dataset must be BinaryLabelDataset types.</li>

In [3]:
# Dataset containing ground-truth labels.
german = GermanDataset(
    label_name='credit',
    protected_attribute_names=['age'],           # this dataset also contains protected
                                                 # attribute for "sex" which we do not
                                                 # consider in this evaluation
    privileged_classes=[lambda x: x >= 25],      # age >=25 is considered privileged
    
    features_to_drop=['personal_status', 'sex']
)



In [29]:
df = StructuredDataset.convert_to_dataframe(german)

print(df[0]['credit'].describe())

for i in range(len(df[0]['credit'])):
    df[0]['credit'][i] = random.randint(1,2)

print(df[0]['credit'].describe())

count    1000.000000
mean        1.300000
std         0.458487
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64
count    1000.000000
mean        1.498000
std         0.500246
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64


In [31]:
# Dataset containing predictions.
german1 = StandardDataset(df[0], label_name='credit', favorable_classes=[1], protected_attribute_names= ['age'], privileged_classes=[lambda x: x >= 1])

df1 = StructuredDataset.convert_to_dataframe(german1)

df1[0]['credit'].describe()

count    1000.000000
mean        1.498000
std         0.500246
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         2.000000
Name: credit, dtype: float64

In [32]:
#german = german.align_datasets(german1) #Align the other dataset features, labels and protected_attributes to this dataset.

p = [{'age': 1}] #, {'sex': 0}] 
u = [{'age': 0}]

cm = ClassificationMetric(german, german1, unprivileged_groups=u, privileged_groups=p)


## Compute the number of true/false positives/negatives, optionally conditioned on protected attributes.

In [40]:
cm.binary_confusion_matrix(privileged=None)

{'TP': 348.0, 'FP': 154.0, 'TN': 146.0, 'FN': 352.0}

In [42]:
cm.binary_confusion_matrix(privileged=True)

{'TP': 304.0, 'FP': 123.0, 'TN': 116.0, 'FN': 308.0}

## Compute various performance measures on the dataset, optionally conditioned on protected attributes.

In [39]:
cm.performance_measures(privileged=None)

{'TPR': 0.49714285714285716,
 'TNR': 0.4866666666666667,
 'FPR': 0.5133333333333333,
 'FNR': 0.5028571428571429,
 'GTPR': 0.49714285714285716,
 'GTNR': 0.4866666666666667,
 'GFPR': 0.5133333333333333,
 'GFNR': 0.5028571428571429,
 'PPV': 0.6932270916334662,
 'NPV': 0.2931726907630522,
 'FDR': 0.30677290836653387,
 'FOR': 0.7068273092369478,
 'ACC': 0.494}

In [38]:
cm.performance_measures(privileged=True)

{'TPR': 0.49673202614379086,
 'TNR': 0.48535564853556484,
 'FPR': 0.5146443514644351,
 'FNR': 0.5032679738562091,
 'GTPR': 0.49673202614379086,
 'GTNR': 0.48535564853556484,
 'GFPR': 0.5146443514644351,
 'GFNR': 0.5032679738562091,
 'PPV': 0.711943793911007,
 'NPV': 0.27358490566037735,
 'FDR': 0.28805620608899296,
 'FOR': 0.7264150943396226,
 'ACC': 0.4935370152761457}

# Bias amplification is the difference in smoothed EDF between the classifier and the original dataset. Positive values mean the bias increased due to the classifier.

In [44]:
cm.differential_fairness_bias_amplification(concentration=1.0)

-0.3742733430617343