# aif360.metrics.DatasetMetric

In [1]:
# Load all necessary packages
import sys
import pandas as pd
import numpy as np

from IPython.display import Markdown, display

sys.path.insert(1, "../")  
np.random.seed(0)

In [2]:
from aif360.datasets import GermanDataset, StructuredDataset 
from aif360.metrics import DatasetMetric, utils  #Class for computing metrics based on one StructuredDataset

#### DatasetMetric

Metrics on training data

Parameters:	
    <li> dataset (StructuredDataset) – A StructuredDataset. </li>
    <li> privileged_groups (list(dict)) – Privileged groups. Format is a list of dicts where the keys are protected_attribute_names and the values are values in protected_attributes. Each dict element describes a single group.  </li>
    <li> unprivileged_groups (list(dict)) – Unprivileged groups in the same format as privileged_groups.</li>


Raises:	
TypeError – dataset must be a StructuredDataset type.
ValueError – privileged_groups and unprivileged_groups must be disjoint.

In [3]:
german = GermanDataset()
u = [{'sex': 1, 'age': 1}, {'sex': 0}]
p = [{'sex': 1, 'age': 0}]
dm = DatasetMetric(german, unprivileged_groups=u, privileged_groups=p)

In [4]:
df = StructuredDataset.convert_to_dataframe(german)
df[0].head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,sex,status=A11,status=A12,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
0,6.0,1169.0,4.0,4.0,1.0,2.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
1,48.0,5951.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0
2,12.0,2096.0,2.0,3.0,1.0,1.0,2.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,42.0,7882.0,2.0,4.0,1.0,1.0,2.0,1.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
4,24.0,4870.0,3.0,4.0,1.0,2.0,2.0,1.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0


In [5]:
df[1].keys()

dict_keys(['feature_names', 'label_names', 'protected_attribute_names', 'instance_names', 'instance_weights', 'privileged_protected_attributes', 'unprivileged_protected_attributes'])

In [6]:
df[1]['label_names']

['credit']

In [7]:
df[1]['protected_attribute_names']

['sex', 'age']

In [8]:
df[1]['privileged_protected_attributes']

[array([1.]), array([1.])]

In [9]:
dm.num_instances(privileged=None) #Compute the number of instances, n, in the dataset conditioned on protected attributes if necessary.

1000.0

In [10]:
dm.num_instances(True)

85.0

In [11]:
dm.num_instances(False)

915.0

In [12]:
def privilege_value(privileged=False) -> int:   # -> int just tells that f() returns an integer 
    if privileged:
        return dm.num_instances(True)
    else:
        return dm.num_instances(False)

metric_fun = privilege_value

In [13]:
dm.ratio(metric_fun) #Compute ratio of the metric for unprivileged and privileged groups.

10.764705882352942

In [14]:
dm.difference(metric_fun) #Compute difference of the metric for unprivileged and privileged groups.

830.0