# Chapter 8
## Section: Unit testing (Using pytest)

In [None]:
!pip install -U pytest
!pip install 'aif360'
!pip install shap
!pip install ipytest

In [2]:
import pandas as pd
import shap
from aif360.sklearn.metrics import disparate_impact_ratio


def dir_grouping(data_df: pd.DataFrame, sensitive_attr: str, priviledge_group, dir_threshold = {'high': 1.2, 'low': 0.8}):
  """
  Categorizing data as fair or unfair according to DIR

  :param data_df: Dataframe of dataset
  :param sensitive_attr: Sensitive attribute under investigation
  :priviledge_group: The category in the sensitive attribute that needs to be cosnidered as priviledged
  :param dir_threshold:

  """
  dir = disparate_impact_ratio(data_df, prot_attr=sensitive_attr, priv_group=priviledge_group, pos_label=True)
  if dir < dir_threshold['high'] and dir > dir_threshold['low']:
    assessment = "unbiased data"
  else:
    assessment = "unbiased data"

  return assessment

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [3]:
# calculating DIR for a subset of adult income data in shap library
import shap

X,y = shap.datasets.adult()
X = X.set_index('Sex')
X_subset = X.iloc[0:100,]
print(disparate_impact_ratio(X_subset, prot_attr='Sex', priv_group=1, pos_label=True))
# According to the calculated DIR, this subset of the data is biased with respect to the "Sex" attribute

3.2527472527472527


In [4]:
X,y = shap.datasets.adult()
X = X.set_index('Sex')
X_subset = X.iloc[0:100,]
print(disparate_impact_ratio(X_subset, prot_attr='Sex', priv_group=1, pos_label=True))
# According to the calculated DIR, this subset of the data is biased with respect to the "Sex" attribute

3.2527472527472527


In [5]:
import pytest
import ipytest
ipytest.autoconfig()

In [6]:
%%ipytest -qq

def test_dir_grouping():

  bias_assessment = dir_grouping(data_df = X_subset,
                                 sensitive_attr = 'Sex',
                                 priviledge_group = 1,
                                 dir_threshold = {'high': 1.2, 'low': 0.8})

  assert bias_assessment == "biased data"

[31mF[0m[31m                                                                                            [100%][0m
[31m[1m________________________________________ test_dir_grouping _________________________________________[0m

    [94mdef[39;49;00m [92mtest_dir_grouping[39;49;00m():[90m[39;49;00m
    [90m[39;49;00m
      bias_assessment = dir_grouping(data_df = X_subset,[90m[39;49;00m
                                     sensitive_attr = [33m'[39;49;00m[33mSex[39;49;00m[33m'[39;49;00m,[90m[39;49;00m
                                     priviledge_group = [94m1[39;49;00m,[90m[39;49;00m
                                     dir_threshold = {[33m'[39;49;00m[33mhigh[39;49;00m[33m'[39;49;00m: [94m1.2[39;49;00m, [33m'[39;49;00m[33mlow[39;49;00m[33m'[39;49;00m: [94m0.8[39;49;00m})[90m[39;49;00m
    [90m[39;49;00m
>     [94massert[39;49;00m bias_assessment == [33m"[39;49;00m[33mbiased data[39;49;00m[33m"[39;49;00m[90m[39;49;00m
[1m[31m

In [7]:
# Running pytest in command line interface (CLI)
# !py -m pytest

In [8]:
# In Jupyter and Colab notebook
ipytest.run()

[31mF[0m[31m                                                                                            [100%][0m
[31m[1m________________________________________ test_dir_grouping _________________________________________[0m

    [94mdef[39;49;00m [92mtest_dir_grouping[39;49;00m():[90m[39;49;00m
    [90m[39;49;00m
      bias_assessment = dir_grouping(data_df = X_subset,[90m[39;49;00m
                                     sensitive_attr = [33m'[39;49;00m[33mSex[39;49;00m[33m'[39;49;00m,[90m[39;49;00m
                                     priviledge_group = [94m1[39;49;00m,[90m[39;49;00m
                                     dir_threshold = {[33m'[39;49;00m[33mhigh[39;49;00m[33m'[39;49;00m: [94m1.2[39;49;00m, [33m'[39;49;00m[33mlow[39;49;00m[33m'[39;49;00m: [94m0.8[39;49;00m})[90m[39;49;00m
    [90m[39;49;00m
>     [94massert[39;49;00m bias_assessment == [33m"[39;49;00m[33mbiased data[39;49;00m[33m"[39;49;00m[90m[39;49;00m
[1m[31m

<ExitCode.TESTS_FAILED: 1>