In [1]:
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)

In [2]:
from src.utils.Preprocessing import Preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump, load
import pandas as pd
import numpy as np

from src.bias.FreqVsFreqBiasDetector import FreqVsFreqBiasDetector

## Importing Data and Trained Classifier

**UX**: 
- the user uploads the dataset with the features (X)
- the user uploads 
    - the sklearn model as pkl file **or**
    - a file with the predictions already created (Y). 
- the user uploads scaler and ohe (if needed)

If a model is provided, the application checks if the the provided datasets and model match in terms of column names. 

In [3]:
input_data_path = "../data/raw_data/uci-default-of-credit-card/data/data.csv"
local_path_save = '../data/mlflow_artifacts/'

In [4]:
fitted_ohe = load(open(local_path_save + '_ohe.pkl', 'rb')) 
fitted_scaler = load(open(local_path_save + '_scaler.pkl', 'rb'))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [5]:
pp = Preprocessing(input_data_path, "default")
X, Y = pp.read_dataframe()

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=420)

X_test_ohe, _, _ = pp.preprocess_for_classification(df=X_test, 
                                                fit_ohe=True, 
                                                fitted_ohe=fitted_ohe,
                                                perform_scaling=True,
                                                fitted_scaler=fitted_scaler)

In [6]:
with open("mlruns/1/1e4a0667c7a64cbe8c7b023410e5781c/artifacts/model/model.pkl", "rb") as file:
    classifier = load(file)

In [7]:
predicted_prob = classifier.predict_proba(X_test_ohe)
predicted_values = classifier.predict(X_test_ohe)

#### Definition of conditioning variables
**UX**:
- the user selects the continuous features to be categorized and used for the creation of control groups. 

The application propose splitting cuts that make the resulting discrete distribution uniform (but also other binning procedures are possible). 

In [8]:
def age_buckets(x):
    if x < 30:
        return 1
    elif x < 40:
        return 2
    else:
        return 3

X_test['age_buckets'] = X.x5_age.apply(age_buckets)

**UX**:
- the user selects from a drop down menu the variables to be used for the groups creation
- the user can select "check all the variables" and the application will use all the available discrete features

In [9]:
conditioning_variables = ['x3_education', 'x4_marriage', 'age_buckets']

In [10]:
df_with_predictions = pd.concat(
    [X_test.reset_index(drop=True), pd.Series(predicted_values)], axis=1).rename(columns={0:"predictions"})

## Bias Detection

**UX**:
- the user selects from a drop down menu the binary variable to be used for the distance calculation
- the user selects a threshold for the distance
- the user selects a Distance (if more than one are provided)

In [11]:
bd = FreqVsFreqBiasDetector(distance="TVD")

**UX**:

The user has can select between two options:
- Option 1: distance between frequencies
- Option 2: distance from reference distribution
    - in this case, the user has to insert values for a reference distribution

## Option 1: Distance between frequencies

### Comparison of the two groups observed freqs (A1=high)

In [12]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    threshold=0.1
)

(0.025269625352224545, True, 0.1)

In [13]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex'
)

(0.025269625352224545, False, 0.016368585412256314)

In [14]:
#distance = max( abs(female_0_freq - male_0_freq), abs(female_1_freq - male_1_freq) )

In [15]:
df_with_predictions.groupby("x2_sex").predictions.value_counts(normalize=True)

x2_sex  predictions
1       0              0.873592
        1              0.126408
2       0              0.898862
        1              0.101138
Name: predictions, dtype: float64

### Comparison of the two groups observed freqs (A1=low)

In [16]:
bd_low = FreqVsFreqBiasDetector(distance="TVD", A1="low")

In [17]:
bd_low.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex'
)

(0.025269625352224545, True, 0.038868585412256317)

### Comparison of the two groups observed freqs, root_variable = age_buckets

In [18]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='age_buckets',
    threshold=0.1
)

(0.029621721777916887, True, 0.1)

In [19]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='age_buckets'
)

(0.029621721777916887, False, 0.012658113883008418)

In [20]:
bd_low.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='age_buckets'
)

(0.029621721777916887, True, 0.03515811388300842)

### Comparison of the two groups observed freqs, root_variable = x3_education

In [21]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education',
    threshold=0.1
)

(0.14609739826551038, False, 0.1)

In [22]:
bd.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.14609739826551038, False, 0.008417574992439395)

In [23]:
bd_low.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.14609739826551038, False, 0.030917574992439394)

##### Using min as aggregating function

In [24]:
bd_high_min = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=min)
bd_low_min = FreqVsFreqBiasDetector(distance="TVD",aggregating_function=min, A1="low")

In [25]:
bd_high_min.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education',
    threshold=0.1
)

(0.0, True, 0.1)

In [26]:
bd_high_min.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.0, True, 0.008417574992439395)

In [27]:
bd_low_min.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.0, True, 0.030917574992439394)

##### Using std as aggregating function

In [28]:
bd_high_std = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.std)
bd_low_std = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.std, A1="low")

In [29]:
bd_high_std.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education',
    threshold=0.1
)

(0.04868174081342471, True, 0.1)

In [30]:
bd_high_std.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.04868174081342471, False, 0.008417574992439395)

In [31]:
bd_low_std.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.04868174081342471, False, 0.030917574992439394)

##### Using median as aggregating function

In [32]:
bd_high_median = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.median)
bd_low_median = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.median, A1="low")

In [33]:
bd_high_median.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education',
    threshold=0.1
)

(0.07213850593138826, True, 0.1)

In [34]:
bd_high_median.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.07213850593138826, False, 0.008417574992439395)

In [35]:
bd_low_median.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.07213850593138826, False, 0.030917574992439394)

##### Using mean as aggregating function

In [36]:
bd_high_mean = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.mean)
bd_low_mean = FreqVsFreqBiasDetector(distance="TVD", aggregating_function=np.mean, A1="low")

In [37]:
bd_high_mean.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education',
    threshold=0.1
)

(0.07749081326993774, True, 0.1)

In [38]:
bd_high_mean.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.07749081326993774, False, 0.008417574992439395)

In [39]:
bd_low_mean.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x3_education'
)

(0.07749081326993774, False, 0.030917574992439394)