# Unsupervised bias detection using the Multi-Dimensional Subset Scan
## Case study using AIF360: German Credit data set
Developed by Jurriaan Parie, Data scientist IBM Northern Europe. For questions please reach out to Jurriaan.Parie@ibm.com.

### Load libraries and helper code

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Markdown, display

# Dataset
from german_dataset import GermanDataset

# Helper functions
from helper_functions import *

# Fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric

# MDSS
from mdss.mdss_classification_metric import *

# Scalers
from sklearn.preprocessing import StandardScaler

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

np.random.seed(0)

### Loading data

In [2]:
gd = GermanDataset(
    
    # default pre-processing
    custom_preprocessing=default_preprocessing
)

Convert to pandas data frame.

In [3]:
df_gd = gd.convert_to_dataframe()[0]
df_gd.head()

Unnamed: 0,month,credit_amount,investment_as_income_percentage,residence_since,age,number_of_credits,people_liable_for,status=A11,status=A12,status=A13,...,housing=A153,skill_level=A171,skill_level=A172,skill_level=A173,skill_level=A174,telephone=A191,telephone=A192,foreign_worker=A201,foreign_worker=A202,credit
0,6.0,1169.0,4.0,4.0,67.0,2.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
1,48.0,5951.0,2.0,2.0,22.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
2,12.0,2096.0,2.0,3.0,49.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,42.0,7882.0,2.0,4.0,45.0,1.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
4,24.0,4870.0,3.0,4.0,53.0,2.0,2.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0


### Initialize (un)priviliged groups and split original data set

In [4]:
# age as protected attribute
prot_attr = 'age'
age_level = 25

# (un)priviliged groups
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

# pre-processing data set AIF360
gd = GermanDataset(
    
    # specify protected attribute
    protected_attribute_names=[prot_attr],

    # initialize priviliged class            
    privileged_classes=[lambda x: x > age_level],

    # default pre-processing
    custom_preprocessing=default_preprocessing
)

# split data
gd_train, gd_val, gd_test = gd.split([0.5, 0.8], shuffle=True)

### Compute fairness metrics based on observed favorable outcomes in original data set

In [5]:
# compute metrics
metric_gd_train = BinaryLabelDatasetMetric(gd_train, 
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)

# statistical parity difference
print("Statistical parity difference = %f" %metric_gd_train.statistical_parity_difference())

# disparate impact
print("Disparate impact = %f" %metric_gd_train.disparate_impact())

Statistical parity difference = -0.167541
Disparate impact = 0.772006


### Train model on original data

In [6]:
gd_train = gd_train.copy()

# initialize pipeline
model = make_pipeline(StandardScaler(),
                       LogisticRegression(solver='liblinear', random_state=1))

# model parameters
fit_params = {'logisticregression__sample_weight': gd_train.instance_weights}

# fit model
LR = model.fit(gd_train.features, gd_train.labels.ravel(), **fit_params)

### Test model

In [7]:
# chosen threshold
thresh = 0.70

In [8]:
# compute metrich for chose threshold
LR_metrics = test(dataset=gd_test,
                  model=LR,
                  thresh_arr=[thresh],
                  unprivileged_groups=unprivileged_groups,
                  privileged_groups=privileged_groups)

In [9]:
LR_metrics

defaultdict(list,
            {'thres': [0.7],
             'bal_acc': [0.6714749160004635],
             'avg_odds_diff': [0.2068981008148217],
             'disp_imp': [1.5827263267429763],
             'stat_par_diff': [0.21378125596487885],
             'eq_opp_diff': [0.21756978653530373]})

### MDSS

In [10]:
from mdss.mdss_classification_metric import MDSSClassificationMetric

In [11]:
dataset_pred = pred_dataset(dataset=gd_test,
                            model=LR,
                            thresh=thresh)

In [12]:
bias_scan = MDSSClassificationMetric(dataset=gd_test,
                                     classified_dataset=dataset_pred,
                                     unprivileged_groups=unprivileged_groups,
                                     privileged_groups=privileged_groups)

In [13]:
bias_scan.bias_scan()

({}, 0.0)

In [20]:
bias_scan.generalized_entropy_index()

0.5905178737346571