In [1]:
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)

In [2]:
from brio.utils.Preprocessing import Preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump, load
import pandas as pd
import numpy as np

from brio.bias.FreqVsFreqBiasDetector import FreqVsFreqBiasDetector
from brio.bias.FreqVsRefBiasDetector import FreqVsRefBiasDetector

## Importing Data and Trained Classifier

In [3]:
input_data_path = "../data/raw_data/uci-default-of-credit-card/data/data.csv"
local_path_save = '../data/mlflow_artifacts/'

In [4]:
fitted_ohe = load(open(local_path_save + '_ohe.pkl', 'rb')) 
fitted_scaler = load(open(local_path_save + '_scaler.pkl', 'rb'))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [5]:
pp = Preprocessing(input_data_path, "default")
X, Y = pp.read_dataframe()

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=420)

X_test_ohe, _, _ = pp.preprocess_for_classification(df=X_test, 
                                                fit_ohe=True, 
                                                fitted_ohe=fitted_ohe,
                                                perform_scaling=True,
                                                fitted_scaler=fitted_scaler)

In [6]:
with open("./mlruns/1/1e4a0667c7a64cbe8c7b023410e5781c/artifacts/model/model.pkl", "rb") as file:
    classifier = load(file)

In [7]:
predicted_prob = classifier.predict_proba(X_test_ohe)
predicted_values = classifier.predict(X_test_ohe)

#### Definition of conditioning variables

In [8]:
def age_buckets(x):
    if x < 30:
        return 1
    elif x < 40:
        return 2
    else:
        return 3

X_test['age_buckets'] = X.x5_age.apply(age_buckets)

In [9]:
conditioning_variables = ['x3_education', 'x4_marriage', 'age_buckets']

In [10]:
df_with_predictions = pd.concat(
    [X_test.reset_index(drop=True), pd.Series(predicted_values)], axis=1).rename(columns={0:"predictions"})

## Hazard and risk functions

In [11]:
def as_list(x):
    if type(x) is list:
        return x
    else:
        return [x]

In [12]:
def hazard_function(
    overall_result, 
    conditioned_results, 
    tot_observations,
    conditioning_variables,
    weight_logic="group"):
    
    #tot number features=conditioning + root (+1)
    n_features_total = len(conditioning_variables) + 1
    
    hazard_overall = 0
    # Iterating over each reference distribution, if available (FreqVsRef)
    # In case of FreqVsFreq, there will be a single iteration
    num_iterations = len(as_list(overall_result[0]))
    for k in np.arange(0, num_iterations):
    
        # test result, threshold, num_samples, boolean, num_used_features
        test_results = []
        test_results.append((
                        as_list(overall_result[0])[k], 
                        overall_result[2], 
                        tot_observations, 
                        as_list(overall_result[1])[k],
                        1 #for the overall test, only 1 feature used, the root variable
                       ))

        for group_name, group in conditioned_results.items():
            if (group[1] is not None):
                test_results.append(
                                    (
                                     as_list(group[1])[k], #test result
                                     group[3], #threshold
                                     group[0], #num_samples
                                     as_list(group[2])[k], #boolean
                                     len(group_name.split("&"))+1 #num_used_features, cond.+root
                                    )
                                   ) 

        if weight_logic=="group":
            #T_i in Risk Function document
            weight_denominator = 0 
            for line in test_results:
                weight_denominator += n_features_total - line[4] + 1
        elif weight_logic=="individual":
            #S_i in Risk Function document
            weight_denominator = np.sum([x[4] for x in test_results]) 
        else:
            raise Exception('Only "group" or "individual" are allowed for parameter weight_logic')


        hazard = 0
        for line in test_results:
            if weight_logic=="group":
                c_info = n_features_total - line[4] + 1
                weight = c_info/weight_denominator
            elif weight_logic=="individual":
                weight = line[4]/weight_denominator
            else:
                raise Exception('Only "group" or "individual" are allowed for parameter weight_logic')

            delta = 1 if line[3]==False else 0
            q = line[2]/tot_observations
            e = line[0] - line[1]
            hazard += delta * weight * q * e * line[1]
            
        hazard_overall+= hazard
        
    return hazard_overall

In [13]:
def risk_function(test_hazards):
    # test_hazards = [list_of_hazards]
        
    risk = np.sum(test_hazards)/len(test_hazards)**2
    
    return risk

### Test 1: TVD, A1=high

In [14]:
bd_1 = FreqVsFreqBiasDetector(distance="TVD", A1="high")

In [15]:
overall_1 = bd_1.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [16]:
conditioned_1 = bd_1.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [17]:
hazard_test_1 = hazard_function(
    overall_1, 
    conditioned_1, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

In [18]:
hazard_test_1

3.662715426591436e-05

### Test 2 (TVD, low)

In [19]:
bd_2 = FreqVsFreqBiasDetector(distance="TVD", A1="low")

In [20]:
overall_2 = bd_2.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [21]:
conditioned_2 = bd_2.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [22]:
hazard_test_2 = hazard_function(
    overall_2, 
    conditioned_2, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

### Test 3 (JS, high)

In [23]:
bd_3 = FreqVsFreqBiasDetector(distance="JS", A1="high")

In [24]:
overall_3 = bd_3.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [25]:
conditioned_3 = bd_3.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [26]:
hazard_test_3 = hazard_function(
    overall_3, 
    conditioned_3, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

### Test 4 (JS, low)

In [27]:
bd_4 = FreqVsFreqBiasDetector(distance="JS", A1="low")

In [28]:
overall_4 = bd_4.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [29]:
conditioned_4 = bd_4.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [30]:
hazard_test_4 = hazard_function(
    overall_4, 
    conditioned_4, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

## Risk results

In [31]:
hazards = [hazard_test_1, hazard_test_2, hazard_test_3, hazard_test_4]

In [32]:
risk_function(hazards)

3.1655148018808247e-06

# Experiments with 3 models

In [33]:
with open("./trained_model_for_testing/RF_12_200.pkl", "rb") as file:
    classifier_1 = load(file)
    
with open("./trained_model_for_testing/RF_37_10.pkl", "rb") as file:
    classifier_2 = load(file)
    
with open("./trained_model_for_testing/Tree_depth2.pkl", "rb") as file:
    classifier_3 = load(file)

In [34]:
predicted_prob_1 = classifier_1.predict_proba(X_test_ohe)
predicted_values_1 = classifier_1.predict(X_test_ohe)
df_with_predictions_1 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_1)], axis=1).rename(columns={0:"predictions"})

predicted_prob_2 = classifier_2.predict_proba(X_test_ohe)
predicted_values_2 = classifier_2.predict(X_test_ohe)
df_with_predictions_2 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_2)], axis=1).rename(columns={0:"predictions"})

predicted_prob_3 = classifier_3.predict_proba(X_test_ohe)
predicted_values_3 = classifier_3.predict(X_test_ohe)
df_with_predictions_3 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_3)], axis=1).rename(columns={0:"predictions"})

In [35]:
def test_model(data_frame):
    ### Test 1: TVD, A1=high

    bd_1 = FreqVsFreqBiasDetector(distance="TVD", A1="high")

    overall_1 = bd_1.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_1 = bd_1.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_1 = hazard_function(
        overall_1, 
        conditioned_1, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 1 (TVD, A1=high) hazard: ", hazard_test_1)

    ### Test 2 (TVD, low)

    bd_2 = FreqVsFreqBiasDetector(distance="TVD", A1="low")

    overall_2 = bd_2.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_2 = bd_2.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_2 = hazard_function(
        overall_2, 
        conditioned_2, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 2 (TVD, A1=low) hazard: ", hazard_test_2)

    ### Test 3 (JS, high)

    bd_3 = FreqVsFreqBiasDetector(distance="JS", A1="high")

    overall_3 = bd_3.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_3 = bd_3.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_3 = hazard_function(
        overall_3, 
        conditioned_3, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 3 (JS, A1=high) hazard: ", hazard_test_3)

    ### Test 4 (JS, low)

    bd_4 = FreqVsFreqBiasDetector(distance="JS", A1="low")

    overall_4 = bd_4.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_4 = bd_4.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_4 = hazard_function(
        overall_4, 
        conditioned_4, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 4 (JS, A1=low) hazard: ", hazard_test_4)
    
    hazards = [hazard_test_1, hazard_test_2, hazard_test_3, hazard_test_4]
    
    return risk_function(hazards)

In [36]:
for model, df in zip(["RF_12_200", "RF_37_10", "Tree_depth2"],
              [df_with_predictions_1, df_with_predictions_2, df_with_predictions_3]):
    print(f"Overall risk measure for model {model}: ", test_model(df))
    print("\n")

Test 1 (TVD, A1=high) hazard:  3.662715426591436e-05
Test 2 (TVD, A1=low) hazard:  1.4001756250959011e-05
Test 3 (JS, A1=high) hazard:  1.932631321983019e-08
Test 4 (JS, A1=low) hazard:  0.0
Overall risk measure for model RF_12_200:  3.1655148018808247e-06


Test 1 (TVD, A1=high) hazard:  3.231114243613055e-05
Test 2 (TVD, A1=low) hazard:  8.844513845377398e-06
Test 3 (JS, A1=high) hazard:  7.713366782296505e-10
Test 4 (JS, A1=low) hazard:  0.0
Overall risk measure for model RF_37_10:  2.572276726136636e-06


Test 1 (TVD, A1=high) hazard:  1.0247396005717305e-05
Test 2 (TVD, A1=low) hazard:  3.7820077895012226e-06
Test 3 (JS, A1=high) hazard:  3.654431188743626e-08
Test 4 (JS, A1=low) hazard:  0.0
Overall risk measure for model Tree_depth2:  8.791217566941227e-07




## Freq Vs Ref

In [37]:
bd_ref = FreqVsRefBiasDetector()

In [38]:
male_0_ref = 55/100
male_1_ref = 45/100

female_0_ref = 50/100
female_1_ref = 50/100

ref_distribution = [np.array([female_0_ref, female_1_ref]), np.array([male_0_ref, male_1_ref])]

In [39]:
overall_ref = bd_ref.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    #threshold=0.1,
    reference_distribution=ref_distribution
)

In [40]:
conditioned_ref = bd_ref.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    #threshold=0.1,
    min_obs_per_group=30,
    reference_distribution=ref_distribution)

In [41]:
hazard_test_ref = hazard_function(
    overall_ref, 
    conditioned_ref, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

In [42]:
hazard_test_ref

0.0022374318004642693