In [1]:
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)

In [2]:
from brio.data_processing.Preprocessing import Preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump, load
import pandas as pd
import numpy as np

from brio.bias.BiasDetector import BiasDetector
from brio.bias.KLDivergence import KLDivergence

## Importing Data and Trained Classifier

**UX**: 
- the user uploads the dataset with the features (X)
- the user uploads 
    - the sklearn model as pkl file **or**
    - a file with the predictions already created (Y). 
- the user uploads scaler and ohe (if needed)

If a model is provided, the application checks if the the provided datasets and model match in terms of column names. 

In [3]:
input_data_path = "../data/raw_data/uci-default-of-credit-card/data/data.csv"
local_path_save = '../data/mlflow_artifacts/'

In [4]:
fitted_ohe = load(open(local_path_save + '_ohe.pkl', 'rb')) 
fitted_scaler = load(open(local_path_save + '_scaler.pkl', 'rb'))

In [5]:
pp = Preprocessing(input_data_path, "default")
X, Y = pp.read_dataframe()

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=420)

X_test_ohe, _, _ = pp.preprocess_for_classification(df=X_test, 
                                                fit_ohe=True, 
                                                fitted_ohe=fitted_ohe,
                                                perform_scaling=True,
                                                fitted_scaler=fitted_scaler)

In [6]:
with open("mlruns/1/1e4a0667c7a64cbe8c7b023410e5781c/artifacts/model/model.pkl", "rb") as file:
    classifier = load(file)

In [7]:
predicted_prob = classifier.predict_proba(X_test_ohe)
predicted_values = classifier.predict(X_test_ohe)

#### Definition of conditioning variables
**UX**:
- the user selects the continuous features to be categorized and used for the creation of control groups. 

The application propose splitting cuts that make the resulting discrete distribution uniform (but also other binning procedures are possible). 

In [8]:
def age_buckets(x):
    if x < 30:
        return 1
    elif x < 40:
        return 2
    else:
        return 3

X_test['age_buckets'] = X.x5_age.apply(age_buckets)

**UX**:
- the user selects from a drop down menu the variables to be used for the groups creation
- the user can select "check all the variables" and the application will use all the available discrete features

In [9]:
conditioning_variables = ['x3_education', 'x4_marriage', 'age_buckets']

In [10]:
df_with_predictions = pd.concat(
    [X_test.reset_index(drop=True), pd.Series(predicted_values)], axis=1).rename(columns={0:"predictions"})

## Bias Detection

**UX**:
- the user selects from a drop down menu the binary variable to be used for the distance calculation
- the user selects a threshold for the distance
- the user selects a Distance (if more than one are provided)

In [11]:
d = KLDivergence()
bd = BiasDetector(distance=d)

**UX**:

The user has can select between two options:
- Option 1: distance between frequencies
- Option 2: distance from reference distribution
    - in this case, the user has to insert values for a reference distribution

## Option 1: Distance between frequencies

### Comparison of the two groups observed freqs

In [12]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    threshold=0.1
)

(0.0031781793012045124, True)

In [13]:
#distance = max( abs(female_0_freq - male_0_freq), abs(female_1_freq - male_1_freq) )

In [14]:
df_with_predictions.groupby("x2_sex").predictions.value_counts(normalize=True)

x2_sex  predictions
1       0              0.873592
        1              0.126408
2       0              0.898862
        1              0.101138
Name: predictions, dtype: float64

### Comparison of the two groups observed freqs conditioning to other features

In [15]:
results = bd.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    threshold=0.1,
    min_obs_per_group=30)

In [16]:
results

{'x3_education==1': (3119, 0.0024721670917052813, True),
 'x3_education==3': (1499, 0.0042885053194460215, True),
 'x3_education==2': (4250, 0.0039080831397844355, True),
 'x3_education==4': (40, 0.0, True),
 'x3_education==5': (75, 0.05351859549349025, True),
 'x3_education==6': (14, None, 'Not enough observations'),
 'x3_education==0': (3, None, 'Not enough observations'),
 'x4_marriage==1': (4065, 0.001838072893517475, True),
 'x4_marriage==2': (4822, 0.004954004817814888, True),
 'x4_marriage==3': (95, 0.014363751466855791, True),
 'x4_marriage==0': (18, None, 'Not enough observations'),
 'age_buckets==3': (2727, 0.0015318838383817195, True),
 'age_buckets==1': (2895, 0.0003904744013276014, True),
 'age_buckets==2': (3378, 0.012989844709532569, True),
 'x3_education==1 & x4_marriage==1': (1106, 0.0038454608355019803, True),
 'x3_education==1 & x4_marriage==2': (2002, 0.0021431260937049904, True),
 'x3_education==1 & x4_marriage==3': (10, None, 'Not enough observations'),
 'x3_educa

**UX**:
- the application shows the results in two views:
    - overall results: all the computed distances
    - violations: only the results above the threshold
- for both, the user can order the results by number of obs, distance or group name

In [17]:
# selecting only combinations for which the distance is greater than the threshold 
violations = {k: v for k, v in results.items() if not v[2]}

- '2: Gender (1 = male; 2 = female).',
- '3: Education (1 = graduate school; 2 = university; 3 = high school; 4 = others).',
- '4: Marital status (1 = married; 2 = single; 3 = others).'

In [18]:
# sorting the violations by number of observations belonging to that group
dict(sorted(violations.items(), key=lambda item: item[1], reverse=True))

{'x3_education==5 & x4_marriage==1': (45, inf, False)}

#### Focus analyses on particular cases
Here we want to understand/qualify some specific violation cases.

**UX**: 
- the user selects one of the available violations
- the application returns relevant details to help understand what's going on
- the user can download the results as csv file. 

In [19]:
focus_df = df_with_predictions.query('x3_education==5 & x4_marriage==1')
bd.get_frequencies_list(focus_df, 'predictions', 
                        df_with_predictions.predictions.unique(),
                        'x2_sex', df_with_predictions.x2_sex.unique())

[array([0.96774194, 0.03225806]), array([1., 0.])]

In [20]:
focus_df.groupby('x2_sex').predictions.value_counts(normalize=True)

x2_sex  predictions
1       0              1.000000
2       0              0.967742
        1              0.032258
Name: predictions, dtype: float64

## Option 2: distance from reference distribution

In [21]:
male_0_ref = 75/100
male_1_ref = 25/100

female_0_ref = 75/100
female_1_ref = 25/100

ref_distribution = [np.array([female_0_ref, female_1_ref]), np.array([male_0_ref, male_1_ref])]

### Comparison of the two groups w.r.t. the reference distribution

In [22]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    threshold=0.1,
    reference_distribution=ref_distribution
)

([0.09045146214557331, 0.05608173922140364], [True, True])

### Comparison of the two groups w.r.t. the reference distribution conditioning to other features

In [23]:
results = bd.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    threshold=0.1,
    min_obs_per_group=30,
    reference_distribution=ref_distribution)

In [24]:
results

{'x3_education==1': (3119,
  [0.1727849641563417, 0.12513967602939438],
  [False, False]),
 'x3_education==3': (1499,
  [0.049644573580709436, 0.02315751842963111],
  [True, True]),
 'x3_education==2': (4250,
  [0.06557258679024336, 0.03508235923490105],
  [True, True]),
 'x3_education==4': (40, [inf, inf], [False, False]),
 'x3_education==5': (75,
  [0.4400393494193747, 0.11648044787778783],
  [False, False]),
 'x3_education==6': (14, None, 'Not enough observations'),
 'x3_education==0': (3, None, 'Not enough observations'),
 'x4_marriage==1': (4065,
  [0.08544051883134945, 0.05939110941578346],
  [True, True]),
 'x4_marriage==2': (4822,
  [0.09593710953905385, 0.052837515691457476],
  [True, True]),
 'x4_marriage==3': (95,
  [0.03777416092813701, 0.10862130843402798],
  [True, False]),
 'x4_marriage==0': (18, None, 'Not enough observations'),
 'age_buckets==3': (2727,
  [0.07733447951324085, 0.054793667365955645],
  [True, True]),
 'age_buckets==1': (2895,
  [0.06212695407784369, 0.0

In [25]:
# selecting only combinations for which the distance is greater than the threshold 
violations = {k: v for k, v in results.items() if (not v[2][0] or not v[2][1])}

In [26]:
violations

{'x3_education==1': (3119,
  [0.1727849641563417, 0.12513967602939438],
  [False, False]),
 'x3_education==4': (40, [inf, inf], [False, False]),
 'x3_education==5': (75,
  [0.4400393494193747, 0.11648044787778783],
  [False, False]),
 'x4_marriage==3': (95,
  [0.03777416092813701, 0.10862130843402798],
  [True, False]),
 'age_buckets==2': (3378,
  [0.1438625469918801, 0.06047272164194925],
  [False, True]),
 'x3_education==1 & x4_marriage==1': (1106,
  [0.21265842001506097, 0.14527510317255699],
  [False, False]),
 'x3_education==1 & x4_marriage==2': (2002,
  [0.15462161411403985, 0.11329339917504339],
  [False, False]),
 'x3_education==5 & x4_marriage==1': (45,
  [0.32075402361972133, inf],
  [False, False]),
 'x3_education==1 & age_buckets==3': (710,
  [0.11957089108400679, 0.10668731747781979],
  [False, False]),
 'x3_education==1 & age_buckets==1': (1070,
  [0.14698598180891606, 0.13216070888626405],
  [False, False]),
 'x3_education==1 & age_buckets==2': (1339,
  [0.24195430903213

### Multi-class root variables

In [27]:
conditioning_variables = ['x2_sex', 'x4_marriage', 'x3_education']

bd.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='age_buckets',
    conditioning_variables=conditioning_variables,
    threshold=0.1,
    min_obs_per_group=30)

{'x2_sex==2': (5448, 0.012314276258242253, True),
 'x2_sex==1': (3552, 0.0002813183144209498, True),
 'x4_marriage==1': (4065, 0.0122720164186669, True),
 'x4_marriage==2': (4822, 0.009330948319540518, True),
 'x4_marriage==3': (95, inf, False),
 'x4_marriage==0': (18, None, 'Not enough observations'),
 'x3_education==1': (3119, 0.005840176493181968, True),
 'x3_education==3': (1499, 0.0012260232619229396, True),
 'x3_education==2': (4250, 0.007953207595574097, True),
 'x3_education==4': (40, 0.0, True),
 'x3_education==5': (75, inf, False),
 'x3_education==6': (14, None, 'Not enough observations'),
 'x3_education==0': (3, None, 'Not enough observations'),
 'x2_sex==2 & x4_marriage==1': (2551, 0.022420619956923828, True),
 'x2_sex==2 & x4_marriage==2': (2820, 0.02425625428990278, True),
 'x2_sex==2 & x4_marriage==3': (62, inf, False),
 'x2_sex==2 & x4_marriage==0': (15, None, 'Not enough observations'),
 'x2_sex==1 & x4_marriage==1': (1514, 0.0008624977133011085, True),
 'x2_sex==1 & x

In [28]:
focus_df = df_with_predictions.query('x2_sex==2 & x4_marriage==3')
bd.get_frequencies_list(focus_df, 'predictions', 
                        df_with_predictions.predictions.unique(),
                        'age_buckets', df_with_predictions.age_buckets.unique())


[array([0.825, 0.175]), array([1., 0.]), array([0.88235294, 0.11764706])]

In [29]:
focus_df.groupby('age_buckets').predictions.value_counts(normalize=True)

age_buckets  predictions
1            0              1.000000
2            0              0.882353
             1              0.117647
3            0              0.825000
             1              0.175000
Name: predictions, dtype: float64