# aif360.metrics.ClassificationMetric

In [1]:
# Load all necessary packages
import sys
import pandas as pd
import numpy as np

from IPython.display import Markdown, display

sys.path.insert(1, "../")  
np.random.seed(0)

In [30]:
from aif360.datasets import GermanDataset, StructuredDataset, BinaryLabelDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
'''
Class for computing metrics based on two BinaryLabelDatasets.

The first dataset is the original one and the second is the output of the classification transformer (or similar).
'''

'\nClass for computing metrics based on two BinaryLabelDatasets.\n\nThe first dataset is the original one and the second is the output of the classification transformer (or similar).\n'

#### ClassificationMetric

###### Parameters:
<li> dataset (BinaryLabelDataset) – Dataset containing ground-truth labels.</li>
<li> classified_dataset (BinaryLabelDataset) – Dataset containing predictions.</li>
<li> privileged_groups (list(dict)) – Privileged groups. Format is a list of dicts where the keys are protected_attribute_names and the values are values in protected_attributes. Each dict element describes a single group. </li>
<li> unprivileged_groups (list(dict)) – Unprivileged groups in the same format as privileged_groups.</li>

###### Raises:
<li> TypeError – dataset and classified_dataset must be BinaryLabelDataset types.</li>

In [31]:
german = GermanDataset(
    protected_attribute_names=['age'],           # this dataset also contains protected
                                                 # attribute for "sex" which we do not
                                                 # consider in this evaluation
    privileged_classes=[lambda x: x >= 25],      # age >=25 is considered privileged
    
    features_to_drop=['personal_status', 'sex']
)



p = [{'age': 1}] #, {'sex': 0}] 
u = [{'age': 0}]

cm = ClassificationMetric(german, german, unprivileged_groups=u, privileged_groups=p)


In [4]:
cm.accuracy() #ACC=(TP+TN)/(P+N).

1.0

In [29]:
cm.selection_rate(True)

0.7191539365452408

In [5]:
df = StructuredDataset.convert_to_dataframe(german)
df[0].head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,status=A11,status=A12,status=A13,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
0,6.0,1169.0,4.0,4.0,1.0,2.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
1,48.0,5951.0,2.0,2.0,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0
2,12.0,2096.0,2.0,3.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,42.0,7882.0,2.0,4.0,1.0,1.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
4,24.0,4870.0,3.0,4.0,1.0,2.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0


In [6]:
df[1].keys()

dict_keys(['feature_names', 'label_names', 'protected_attribute_names', 'instance_names', 'instance_weights', 'privileged_protected_attributes', 'unprivileged_protected_attributes'])

In [7]:
df[1]['label_names']

['credit']

In [8]:
df[1]['protected_attribute_names']

['age']

In [9]:
df[1]['privileged_protected_attributes']

[array([1.])]

In [10]:
cm.num_instances(privileged=None) #Compute the number of instances, n, in the dataset conditioned on protected attributes if necessary.

1000.0

In [11]:
cm.num_instances(True)

851.0

In [12]:
cm.num_instances(False)

149.0

In [13]:
def privilege_value(privileged=False) -> int:   # -> int just tells that f() returns an integer 
    if privileged:
        return cm.num_instances(True)
    else:
        return cm.num_instances(False)

metric_fun = privilege_value

In [14]:
cm.ratio(metric_fun) #Compute ratio of the metric for unprivileged and privileged groups.

0.17508813160987075

In [15]:
cm.difference(metric_fun) #Compute difference of the metric for unprivileged and privileged groups.

-702.0

## Base Rate

<img src="images/base_rate.JPG" style="height:180px" >

In [16]:
cm.base_rate(privileged = True)

0.7191539365452408

In [17]:
cm.base_rate(privileged = False)

0.5906040268456376

In [18]:
cm.base_rate(privileged = None)

0.7

### Consistency
Individual fairness metric from that measures how similar the labels are for similar instances.

<img src = "images/consistency.JPG" style="height:180px" >

In [19]:
cm.consistency(n_neighbors = 5)

array([0.6816])

## Disparate_impact

<img src = "images/disparate_impact.JPG" style="height:180px" >

In [20]:
cm.disparate_impact()

0.8212484098784929

## statistical_parity_difference() == mean_difference()

Pr(Y=1|D=unprivileged)−Pr(Y=1|D=privileged)

In [21]:
cm. mean_difference()

-0.12854990969960323

## num_negatives

<img src = "images/num_negatives.JPG" style="height:380px" >

In [22]:
cm.num_positives(privileged= True) / cm.num_instances(True)

0.7191539365452408

In [23]:
cm.base_rate(privileged = True)

0.7191539365452408

In [24]:
cm.num_positives(privileged= False) / cm.num_instances(False)

0.5906040268456376

In [25]:
cm.base_rate(privileged = False)

0.5906040268456376

In [26]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(german.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(german.favorable_label, german.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(german.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(german.privileged_protected_attributes, 
      german.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(german.feature_names)

#### Training Dataset shape

(1000, 57)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['month', 'credit_amount', 'investment_as_income_percentage', 'residence_since', 'age', 'number_of_credits', 'people_liable_for', 'status=A11', 'status=A12', 'status=A13', 'status=A14', 'credit_history=A30', 'credit_history=A31', 'credit_history=A32', 'credit_history=A33', 'credit_history=A34', 'purpose=A40', 'purpose=A41', 'purpose=A410', 'purpose=A42', 'purpose=A43', 'purpose=A44', 'purpose=A45', 'purpose=A46', 'purpose=A48', 'purpose=A49', 'savings=A61', 'savings=A62', 'savings=A63', 'savings=A64', 'savings=A65', 'employment=A71', 'employment=A72', 'employment=A73', 'employment=A74', 'employment=A75', 'other_debtors=A101', 'other_debtors=A102', 'other_debtors=A103', 'property=A121', 'property=A122', 'property=A123', 'property=A124', 'installment_plans=A141', 'installment_plans=A142', 'installment_plans=A143', 'housing=A151', 'housing=A152', 'housing=A153', 'skill_level=A171', 'skill_level=A172', 'skill_level=A173', 'skill_level=A174', 'telephone=A191', 'telephone=A192', 'foreign_wor

## Metric for original training data

In [27]:
# Metric for the original dataset
metric_orig_train = BinaryLabelDatasetMetric(german, 
                                             unprivileged_groups=u,
                                             privileged_groups=p)
display(Markdown("#### Original training dataset"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(german, 
                                             unprivileged_groups=u,
                                             privileged_groups=p)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())

#### Original training dataset

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.128550
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.128550
