In [83]:
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)

In [84]:
from brio.utils.Preprocessing import Preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump, load
import pandas as pd
import numpy as np

from brio.bias.FreqVsFreqBiasDetector import FreqVsFreqBiasDetector
from brio.bias.FreqVsRefBiasDetector import FreqVsRefBiasDetector

## Importing Data and Trained Classifier

**UX**: 
- the user uploads the dataset with the features (X)
- the user uploads 
    - the sklearn model as pkl file **or**
    - a file with the predictions already created (Y). 
- the user uploads scaler and ohe (if needed)

If a model is provided, the application checks if the the provided datasets and model match in terms of column names. 

In [85]:
input_data_path = "../data/raw_data/uci-default-of-credit-card/data/data.csv"
local_path_save = '../data/mlflow_artifacts/'

In [86]:
fitted_ohe = load(open(local_path_save + '_ohe.pkl', 'rb')) 
fitted_scaler = load(open(local_path_save + '_scaler.pkl', 'rb'))

In [87]:
pp = Preprocessing(input_data_path, "default")
X, Y = pp.read_dataframe()

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=420)

X_test_ohe, _, _ = pp.preprocess_for_classification(df=X_test, 
                                                fit_ohe=True, 
                                                fitted_ohe=fitted_ohe,
                                                perform_scaling=True,
                                                fitted_scaler=fitted_scaler)

In [88]:
with open("trained_model_for_testing/RF_12_200.pkl", "rb") as file:
    classifier = load(file)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [89]:
predicted_prob = classifier.predict_proba(X_test_ohe)
predicted_values = classifier.predict(X_test_ohe)

#### Definition of conditioning variables
**UX**:
- the user selects the continuous features to be categorized and used for the creation of control groups. 

The application propose splitting cuts that make the resulting discrete distribution uniform (but also other binning procedures are possible). 

In [90]:
def age_buckets(x):
    if x < 30:
        return 1
    elif x < 40:
        return 2
    else:
        return 3

X_test['age_buckets'] = X.x5_age.apply(age_buckets)

**UX**:
- the user selects from a drop down menu the variables to be used for the groups creation
- the user can select "check all the variables" and the application will use all the available discrete features

In [91]:
conditioning_variables = ['x3_education', 'x4_marriage', 'age_buckets']

In [92]:
df_with_predictions = X_test.reset_index(drop=True).assign(predictions=predicted_values, predicted_probs=predicted_prob[:,1])

## Bias Detection

**UX**:
- the user selects from a drop down menu the binary variable to be used for the distance calculation
- the user selects a threshold for the distance
- the user selects a Distance (if more than one are provided)

In [93]:
bd = FreqVsFreqBiasDetector(distance="TVD", target_variable_type='probability')

**UX**:

The user has can select between two options:
- Option 1: distance between frequencies
- Option 2: distance from reference distribution
    - in this case, the user has to insert values for a reference distribution

## Option 1: Distance between frequencies

### Comparison of the two groups observed freqs

In [94]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predicted_probs',
    root_variable='x2_sex',
    threshold=None,
    n_bins=10
)

(0.05588710957653692, False, 0.016368585412256314, None)

In [95]:
#distance = max( abs(female_0_freq - male_0_freq), abs(female_1_freq - male_1_freq) )

In [96]:
#print(df_with_predictions.groupby("x2_sex").predictions.value_counts(normalize=True))
print(np.histogram(df_with_predictions.query("x2_sex==1").predicted_probs, bins=10, range=[0,1])[0]/df_with_predictions.query("x2_sex==1").shape[0])
print(np.histogram(df_with_predictions.query("x2_sex==2").predicted_probs, bins=10, range=[0,1])[0]/df_with_predictions.query("x2_sex==2").shape[0])

[0.20439189 0.42454955 0.1356982  0.06503378 0.04391892 0.04560811
 0.04054054 0.02984234 0.00844595 0.00197072]
[0.260279   0.41831865 0.12756975 0.05837004 0.03432452 0.02514684
 0.03671072 0.03120411 0.00715859 0.00091777]


### Comparison of the two groups observed freqs conditioning to other features

In [97]:
results = bd.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predicted_probs',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    threshold=0.1,
    min_obs_per_group=30,
    n_bins=10)

In [98]:
#results

**UX**:
- the application shows the results in two views:
    - overall results: all the computed distances
    - violations: only the results above the threshold
- for both, the user can order the results by number of obs, distance or group name

In [99]:
# selecting only combinations for which the distance is greater than the threshold 
violations = {k: v for k, v in results.items() if not v[2]}

- '2: Gender (1 = male; 2 = female).',
- '3: Education (1 = graduate school; 2 = university; 3 = high school; 4 = others).',
- '4: Marital status (1 = married; 2 = single; 3 = others).'

In [100]:
# sorting the violations by number of observations belonging to that group
dict(sorted(violations.items(), key=lambda item: item[1], reverse=True))

{'x3_education==3 & age_buckets==2': (435,
  0.13216039279869068,
  False,
  0.1,
  None),
 'x3_education==2 & x4_marriage==2 & age_buckets==3': (278,
  0.13769363166953524,
  False,
  0.1,
  None),
 'x3_education==3 & x4_marriage==1 & age_buckets==2': (236,
  0.1399298792364628,
  False,
  0.1,
  None),
 'x3_education==3 & x4_marriage==2 & age_buckets==2': (189,
  0.13556457065584854,
  False,
  0.1,
  None),
 'x3_education==1 & x4_marriage==2 & age_buckets==3': (174,
  0.10471898197242846,
  False,
  0.1,
  None),
 'x4_marriage==3': (95, 0.10410557184750735, False, 0.1, None),
 'x4_marriage==3 & age_buckets==3': (67,
  0.15462962962962967,
  False,
  0.1,
  None),
 'x3_education==3 & x4_marriage==1 & age_buckets==1': (66,
  0.1111111111111111,
  False,
  0.1,
  None),
 'x3_education==1 & x4_marriage==1 & age_buckets==1': (57,
  0.3109756097560976,
  False,
  0.1,
  None),
 'x3_education==2 & x4_marriage==3': (56,
  0.17251461988304095,
  False,
  0.1,
  None),
 'x3_education==5 & x4_

#### Focus analyses on particular cases
Here we want to understand/qualify some specific violation cases.

**UX**: 
- the user selects one of the available violations
- the application returns relevant details to help understand what's going on
- the user can download the results as csv file. 

In [101]:
nbins=10
focus_df = df_with_predictions.query("x3_education==1 & x4_marriage==1 & age_buckets==1")
freqs, abs_freqs = bd.get_frequencies_list_from_probs(focus_df, 'predicted_probs', 
                        'x2_sex', df_with_predictions.x2_sex.unique(), nbins)

freqs

[array([0.26829268, 0.56097561, 0.04878049, 0.04878049, 0.02439024,
        0.02439024, 0.02439024, 0.        , 0.        , 0.        ]),
 array([0.1875, 0.25  , 0.25  , 0.1875, 0.0625, 0.    , 0.0625, 0.    ,
        0.    , 0.    ])]

In [102]:
predicted_probs_limits = np.round(np.arange(0, 1 + 1/nbins, 1/nbins),2)
predicted_probs_range = [f'{start}-{end}' for start, end in zip(predicted_probs_limits[:-1], predicted_probs_limits[1:])]
# Create a multi-index
multi_index = pd.MultiIndex.from_product([sorted(df_with_predictions.x2_sex.unique()), predicted_probs_range], names=['x2_sex', 'predicted_probs'])
result_series = pd.Series(np.concatenate(freqs), index=multi_index, name='freqs')

# Display the Series
print(result_series)

x2_sex  predicted_probs
1       0.0-0.1            0.268293
        0.1-0.2            0.560976
        0.2-0.3            0.048780
        0.3-0.4            0.048780
        0.4-0.5            0.024390
        0.5-0.6            0.024390
        0.6-0.7            0.024390
        0.7-0.8            0.000000
        0.8-0.9            0.000000
        0.9-1.0            0.000000
2       0.0-0.1            0.187500
        0.1-0.2            0.250000
        0.2-0.3            0.250000
        0.3-0.4            0.187500
        0.4-0.5            0.062500
        0.5-0.6            0.000000
        0.6-0.7            0.062500
        0.7-0.8            0.000000
        0.8-0.9            0.000000
        0.9-1.0            0.000000
Name: freqs, dtype: float64


## Option 2: distance from reference distribution

In [103]:
bd = FreqVsRefBiasDetector(normalization='D1', adjust_div='laplace', target_variable_type='probability')

In [104]:
female_distr = [0.2,0.15,0.15,0.1,0.1,0.1,0.05,0.05,0.05,0.05] #[0.75,0.25]
male_distr = female_distr

ref_distribution = [np.array(female_distr), np.array(male_distr)]

### Comparison of the two groups w.r.t. the reference distribution

In [105]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predicted_probs',
    root_variable='x2_sex',
    threshold=0.1,
    reference_distribution=ref_distribution,
    n_bins=10
)

([0.3325369126643256, 0.3950668798613992], [False, False], 0.1)

### Comparison of the two groups w.r.t. the reference distribution conditioning to other features

In [106]:
results = bd.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predicted_probs',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    threshold=0.1,
    min_obs_per_group=30,
    reference_distribution=ref_distribution,
    n_bins=10)

In [107]:
results

{'x3_education==1': (3119,
  [0.3733894163213446, 0.4492918852037924],
  [False, False],
  0.1),
 'x3_education==3': (1499,
  [0.33639915729869385, 0.3692037899778381],
  [False, False],
  0.1),
 'x3_education==2': (4250,
  [0.3198385486496075, 0.366549211102255],
  [False, False],
  0.1),
 'x3_education==4': (40,
  [0.33300583840745634, 0.28706191119139435],
  [False, False],
  0.1),
 'x3_education==5': (75,
  [0.252884439948881, 0.4196372612819057],
  [False, False],
  0.1),
 'x3_education==6': (14, None, 'Not enough observations'),
 'x3_education==0': (3, None, 'Not enough observations'),
 'x4_marriage==1': (4065,
  [0.32083718972939557, 0.3693872204325538],
  [False, False],
  0.1),
 'x4_marriage==2': (4822,
  [0.3440863424267153, 0.42727312499909975],
  [False, False],
  0.1),
 'x4_marriage==3': (95,
  [0.37909768514316755, 0.34718813410506955],
  [False, False],
  0.1),
 'x4_marriage==0': (18, None, 'Not enough observations'),
 'age_buckets==3': (2727,
  [0.33270299763381783, 0.3

In [108]:
# selecting only combinations for which the distance is greater than the threshold 
violations = {k: v for k, v in results.items() if (not v[2][0] or not v[2][1])}
len(violations)

54

### Focus analysis on particular cases

In [109]:
#from brio.bias.BiasDetector import BiasDetector
#bd = BiasDetector() #not necessary
nbins=10
focus_df = df_with_predictions.query("x3_education==5")
freqs, abs_freqs = bd.get_frequencies_list_from_probs(focus_df, 'predicted_probs', 
                        'x2_sex', sorted(df_with_predictions.x2_sex.unique()), nbins)
freqs

[array([0.34782609, 0.47826087, 0.08695652, 0.        , 0.        ,
        0.04347826, 0.04347826, 0.        , 0.        , 0.        ]),
 array([0.40384615, 0.48076923, 0.09615385, 0.        , 0.        ,
        0.01923077, 0.        , 0.        , 0.        , 0.        ])]

In [110]:
#as a df

predicted_probs_limits = np.round(np.arange(0, 1 + 1/nbins, 1/nbins),2)
viol = pd.DataFrame({'x2_sex':np.repeat(sorted(focus_df.x2_sex.unique()),nbins),
              'predicted_probs': [f'{start}-{end}' for start, end in zip(predicted_probs_limits[:-1], predicted_probs_limits[1:])]*len(freqs),
              'freqs':np.concatenate(freqs)})
viol

Unnamed: 0,x2_sex,predicted_probs,freqs
0,1,0.0-0.1,0.347826
1,1,0.1-0.2,0.478261
2,1,0.2-0.3,0.086957
3,1,0.3-0.4,0.0
4,1,0.4-0.5,0.0
5,1,0.5-0.6,0.043478
6,1,0.6-0.7,0.043478
7,1,0.7-0.8,0.0
8,1,0.8-0.9,0.0
9,1,0.9-1.0,0.0


In [111]:
#as a series

predicted_probs_limits = np.round(np.arange(0, 1 + 1/nbins, 1/nbins),2)
predicted_probs_range = [f'{start}-{end}' for start, end in zip(predicted_probs_limits[:-1], predicted_probs_limits[1:])]
# Create a multi-index
multi_index = pd.MultiIndex.from_product([sorted(df_with_predictions.x2_sex.unique()), predicted_probs_range], names=['x2_sex', 'predicted_probs'])
result_series = pd.Series(np.concatenate(freqs), index=multi_index, name='freqs')

# Display the Series
print(result_series)


x2_sex  predicted_probs
1       0.0-0.1            0.347826
        0.1-0.2            0.478261
        0.2-0.3            0.086957
        0.3-0.4            0.000000
        0.4-0.5            0.000000
        0.5-0.6            0.043478
        0.6-0.7            0.043478
        0.7-0.8            0.000000
        0.8-0.9            0.000000
        0.9-1.0            0.000000
2       0.0-0.1            0.403846
        0.1-0.2            0.480769
        0.2-0.3            0.096154
        0.3-0.4            0.000000
        0.4-0.5            0.000000
        0.5-0.6            0.019231
        0.6-0.7            0.000000
        0.7-0.8            0.000000
        0.8-0.9            0.000000
        0.9-1.0            0.000000
Name: freqs, dtype: float64
