In [5]:
%pip install aif360

Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Collecting scipy>=1.2.0 (from aif360)
  Downloading scipy-1.13.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.6 kB ? eta -:--:--
     ------ --------------------------------- 10.2/60.6 kB ? eta -:--:--
     ---------------------------------------- 60.6/60.6 kB 1.1 MB/s eta 0:00:00
Collecting pandas>=0.24.0 (from aif360)
  Downloading pandas-2.2.2-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn>=1.0 (from aif360)
  Downloading scikit_learn-1.5.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting matplotlib (from aif360)
  Downloading matplotlib-3.9.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting pytz>=2020.1 (from pandas>=0.24.0->aif360)
  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas>=0.24.0->aif360)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collect

In [2]:
%pip install 'aif360[all]'

Note: you may need to restart the kernel to use updated packages.


ERROR: Invalid requirement: "'aif360[all]'"


In [85]:
import numpy as np
import pandas as pd

Import dataset and metrics

In [86]:
from aif360.metrics import DatasetMetric
from aif360.metrics import BinaryLabelDatasetMetric 
from aif360.datasets import GermanDataset


# Load the German dataset
german = GermanDataset()

# Define unprivileged and privileged groups
privileged_groups = [{'sex': 1, 'age': 1}]  # Example: male, age 1
unprivileged_groups = [{'sex': 0}]  # Example: female

# Compute dataset metrics
dm = DatasetMetric(german, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
binaryMetric = BinaryLabelDatasetMetric(german,unprivileged_groups=unprivileged_groups,privileged_groups=privileged_groups)

In [87]:
german

               instance weights features                \
                                                         
                                   month credit_amount   
instance names                                           
0                           1.0      6.0        1169.0   
1                           1.0     48.0        5951.0   
2                           1.0     12.0        2096.0   
3                           1.0     42.0        7882.0   
4                           1.0     24.0        4870.0   
...                         ...      ...           ...   
995                         1.0     12.0        1736.0   
996                         1.0     30.0        3857.0   
997                         1.0     12.0         804.0   
998                         1.0     45.0        1845.0   
999                         1.0     45.0        4576.0   

                                                                \
                                                               

In [89]:
numPrivileged = dm.num_instances(privileged=True)
numUnPrivileged = dm.num_instances(privileged=False)
print('Difference in number of instances: ', numPrivileged-numUnPrivileged)

Difference in number of instances:  295.0


In [88]:
# This calculate numUnprivileged - numPrivileged
diff=dm.difference(dm.num_instances)
print(diff)

-295.0


In [90]:
print('Ratio = (no. of unprivileged)/(no. of privileged) = ',numUnPrivileged/numPrivileged)
dm.ratio(dm.num_instances)

Ratio = (no. of unprivileged)/(no. of privileged) =  0.512396694214876


0.512396694214876

Base Rate: The fraction of instances in the dataset that have the positive (or favorable) label.
For instance, in a credit approval scenario, "approved" might be the positive label.

In [91]:
overall_baseRate = binaryMetric.base_rate()
privileged_baseRate = binaryMetric.base_rate(privileged=True)
unprivileged_baseRate = binaryMetric.base_rate(privileged=False)
print('Overall baseRate= ',overall_baseRate)
print('Privileged baseRate= ',privileged_baseRate)
print('Unprivileged baseRate= ',unprivileged_baseRate)

Overall baseRate=  0.7
Privileged baseRate=  0.7388429752066116
Unprivileged baseRate=  0.6483870967741936


The base rate difference between unprivileged and privileged groups

Difference = base_rate of unprivileged - base_rate of priviledged

In [92]:
base_rate_diff = binaryMetric.difference(binaryMetric.base_rate)
base_rate_diff

-0.09045587843241798

In [62]:
unprivileged_baseRate-privileged_baseRate

-0.09045587843241798

**Consistency**

*Individual fairness metric* that measures how similar the labels are for similar instances.

In [93]:
consistency_score = binaryMetric.consistency(n_neighbors=25)
consistency_score

array([0.60912])

A consistency score of 0.60912 means that, on average, about 60.912% of an instance's nearest neighbors have the same label as the instance itself.

**Disparate Impact**
<math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
  <mfrac>
    <mrow>
      <mi>P</mi>
      <mi>r</mi>
      <mo stretchy="false">(</mo>
      <mi>Y</mi>
      <mo>=</mo>
      <mn>1</mn>
      <mrow data-mjx-texclass="ORD">
        <mo stretchy="false">|</mo>
      </mrow>
      <mi>D</mi>
      <mo>=</mo>
      <mtext>unprivileged</mtext>
      <mo stretchy="false">)</mo>
    </mrow>
    <mrow>
      <mi>P</mi>
      <mi>r</mi>
      <mo stretchy="false">(</mo>
      <mi>Y</mi>
      <mo>=</mo>
      <mn>1</mn>
      <mrow data-mjx-texclass="ORD">
        <mo stretchy="false">|</mo>
      </mrow>
      <mi>D</mi>
      <mo>=</mo>
      <mtext>privileged</mtext>
      <mo stretchy="false">)</mo>
    </mrow>
  </mfrac>
</math>

In [94]:
binaryMetric.disparate_impact()

0.8775709027928124

Disparate Impact < 1: Indicates potential bias against the unprivileged group.
So, according to this metric there is a potential bias against unprivileged group.

**statistical_parity_difference()**
<math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
  <mi>P</mi>
  <mi>r</mi>
  <mo stretchy="false">(</mo>
  <mi>Y</mi>
  <mo>=</mo>
  <mn>1</mn>
  <mrow data-mjx-texclass="ORD">
    <mo stretchy="false">|</mo>
  </mrow>
  <mi>D</mi>
  <mo>=</mo>
  <mtext>unprivileged</mtext>
  <mo stretchy="false">)</mo>
  <mo>&#x2212;</mo>
  <mi>P</mi>
  <mi>r</mi>
  <mo stretchy="false">(</mo>
  <mi>Y</mi>
  <mo>=</mo>
  <mn>1</mn>
  <mrow data-mjx-texclass="ORD">
    <mo stretchy="false">|</mo>
  </mrow>
  <mi>D</mi>
  <mo>=</mo>
  <mtext>privileged</mtext>
  <mo stretchy="false">)</mo>
</math>

In [95]:
spd = binaryMetric.statistical_parity_difference()
md = binaryMetric.mean_difference()
print('statistical_parity_difference:', spd)
print('mean_difference:', md)

print('NOTE: Both are same')

statistical_parity_difference: -0.09045587843241798
mean_difference: -0.09045587843241798
NOTE: Both are same


Num-negatives: Calculate number of negative outcomes

In [77]:
overall_ = binaryMetric.num_negatives()
pri_ = binaryMetric.num_negatives(privileged=True)
unpri_ = binaryMetric.num_negatives(privileged=False)
print(overall_)
print(pri_)
print(unpri_)

300.0
158.0
109.0


In [84]:
predictions = np.random.randint(2, size=len(german.labels))

subgroup_metrics = binaryMetric.rich_subgroup(predictions=predictions,fairness_def='FN')

# Print the subgroup metrics
for key, value in subgroup_metrics.items():
    print(f"{key}: {value}")

IndexError: invalid index to scalar variable.