Replica of experiments_3, but using the RiskCalculator and HazardFromBiasDetectionCalculator classes

In [1]:
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)

In [2]:
from brio.utils.Preprocessing import Preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump, load
import pandas as pd
import numpy as np

from brio.bias.FreqVsFreqBiasDetector import FreqVsFreqBiasDetector
from brio.bias.FreqVsRefBiasDetector import FreqVsRefBiasDetector
from brio.risk.HazardFromBiasDetectionCalculator import HazardFromBiasDetectionCalculator
from brio.risk.RiskCalculator import RiskCalculator

## Importing Data and Trained Classifier

In [3]:
input_data_path = "../data/raw_data/uci-default-of-credit-card/data/data.csv"
local_path_save = '../data/mlflow_artifacts/'

In [4]:
fitted_ohe = load(open(local_path_save + '_ohe.pkl', 'rb')) 
fitted_scaler = load(open(local_path_save + '_scaler.pkl', 'rb'))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [5]:
pp = Preprocessing(input_data_path, "default")
X, Y = pp.read_dataframe()

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=420)

X_test_ohe, _, _ = pp.preprocess_for_classification(df=X_test, 
                                                fit_ohe=True, 
                                                fitted_ohe=fitted_ohe,
                                                perform_scaling=True,
                                                fitted_scaler=fitted_scaler)

In [6]:
with open("./mlruns/1/1e4a0667c7a64cbe8c7b023410e5781c/artifacts/model/model.pkl", "rb") as file:
    classifier = load(file)

In [7]:
predicted_prob = classifier.predict_proba(X_test_ohe)
predicted_values = classifier.predict(X_test_ohe)

#### Definition of conditioning variables

In [8]:
def age_buckets(x):
    if x < 30:
        return 1
    elif x < 40:
        return 2
    else:
        return 3

X_test['age_buckets'] = X.x5_age.apply(age_buckets)

In [9]:
conditioning_variables = ['x3_education', 'x4_marriage', 'age_buckets']

In [10]:
df_with_predictions = pd.concat(
    [X_test.reset_index(drop=True), pd.Series(predicted_values)], axis=1).rename(columns={0:"predictions"})

In [11]:
rc = RiskCalculator()
hc = HazardFromBiasDetectionCalculator()

### Test 1: TVD, A1=high

In [12]:
bd_1 = FreqVsFreqBiasDetector(distance="TVD", A1="high")

In [None]:
overall_1 = bd_1.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [None]:
conditioned_1 = bd_1.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [None]:
hazard_test_1 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
    overall_1, 
    conditioned_1, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

In [None]:
hazard_test_1

### Test 2 (TVD, low)

In [None]:
bd_2 = FreqVsFreqBiasDetector(distance="TVD", A1="low")

In [None]:
overall_2 = bd_2.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [None]:
conditioned_2 = bd_2.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [None]:
hazard_test_2 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
    overall_2, 
    conditioned_2, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

### Test 3 (JS, high)

In [None]:
bd_3 = FreqVsFreqBiasDetector(distance="JS", A1="high")

In [None]:
overall_3 = bd_3.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [None]:
conditioned_3 = bd_3.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [None]:
hazard_test_3 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
    overall_3, 
    conditioned_3, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

### Test 4 (JS, low)

In [None]:
bd_4 = FreqVsFreqBiasDetector(distance="JS", A1="low")

In [None]:
overall_4 = bd_4.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex')

In [None]:
conditioned_4 = bd_4.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables)

In [None]:
hazard_test_4 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
    overall_4, 
    conditioned_4, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

## Risk results

In [None]:
hazards = [hazard_test_1, hazard_test_2, hazard_test_3, hazard_test_4]

In [None]:
rc.compute_risk(hazards)

# Experiments with 3 models

In [None]:
with open("./trained_model_for_testing/RF_12_200.pkl", "rb") as file:
    classifier_1 = load(file)
    
with open("./trained_model_for_testing/RF_37_10.pkl", "rb") as file:
    classifier_2 = load(file)
    
with open("./trained_model_for_testing/Tree_depth2.pkl", "rb") as file:
    classifier_3 = load(file)

In [None]:
predicted_prob_1 = classifier_1.predict_proba(X_test_ohe)
predicted_values_1 = classifier_1.predict(X_test_ohe)
df_with_predictions_1 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_1)], axis=1).rename(columns={0:"predictions"})

predicted_prob_2 = classifier_2.predict_proba(X_test_ohe)
predicted_values_2 = classifier_2.predict(X_test_ohe)
df_with_predictions_2 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_2)], axis=1).rename(columns={0:"predictions"})

predicted_prob_3 = classifier_3.predict_proba(X_test_ohe)
predicted_values_3 = classifier_3.predict(X_test_ohe)
df_with_predictions_3 = pd.concat(
    [X_test.reset_index(drop=True), 
     pd.Series(predicted_values_3)], axis=1).rename(columns={0:"predictions"})

In [None]:
def test_model(data_frame):
    ### Test 1: TVD, A1=high

    bd_1 = FreqVsFreqBiasDetector(distance="TVD", A1="high")

    overall_1 = bd_1.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_1 = bd_1.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_1 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
        overall_1, 
        conditioned_1, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 1 (TVD, A1=high) hazard: ", hazard_test_1)

    ### Test 2 (TVD, low)

    bd_2 = FreqVsFreqBiasDetector(distance="TVD", A1="low")

    overall_2 = bd_2.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_2 = bd_2.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_2 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
        overall_2, 
        conditioned_2, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 2 (TVD, A1=low) hazard: ", hazard_test_2)

    ### Test 3 (JS, high)

    bd_3 = FreqVsFreqBiasDetector(distance="JS", A1="high")

    overall_3 = bd_3.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_3 = bd_3.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_3 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
        overall_3, 
        conditioned_3, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 3 (JS, A1=high) hazard: ", hazard_test_3)

    ### Test 4 (JS, low)

    bd_4 = FreqVsFreqBiasDetector(distance="JS", A1="low")

    overall_4 = bd_4.compare_root_variable_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex')

    conditioned_4 = bd_4.compare_root_variable_conditioned_groups(
        dataframe=data_frame,
        target_variable='predictions',
        root_variable='x2_sex',
        conditioning_variables=conditioning_variables)

    hazard_test_4 = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
        overall_4, 
        conditioned_4, 
        data_frame.shape[0],
        conditioning_variables,
        weight_logic="group")
    
    print("Test 4 (JS, A1=low) hazard: ", hazard_test_4)
    
    hazards = [hazard_test_1, hazard_test_2, hazard_test_3, hazard_test_4]
    
    return rc.compute_risk(hazards)

In [None]:
for model, df in zip(["RF_12_200", "RF_37_10", "Tree_depth2"],
              [df_with_predictions_1, df_with_predictions_2, df_with_predictions_3]):
    print(f"Overall risk measure for model {model}: ", test_model(df))
    print("\n")

## Freq Vs Ref

In [None]:
bd_ref = FreqVsRefBiasDetector()

In [None]:
male_0_ref = 55/100
male_1_ref = 45/100

female_0_ref = 50/100
female_1_ref = 50/100

ref_distribution = [np.array([female_0_ref, female_1_ref]), np.array([male_0_ref, male_1_ref])]

In [None]:
overall_ref = bd_ref.compare_root_variable_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    #threshold=0.1,
    reference_distribution=ref_distribution
)

In [None]:
conditioned_ref = bd_ref.compare_root_variable_conditioned_groups(
    dataframe=df_with_predictions,
    target_variable='predictions',
    root_variable='x2_sex',
    conditioning_variables=conditioning_variables,
    #threshold=0.1,
    min_obs_per_group=30,
    reference_distribution=ref_distribution)

In [None]:
hazard_test_ref = hc.compute_hazard_from_freqvsfreq_or_freqvsref(
    overall_ref, 
    conditioned_ref, 
    df_with_predictions.shape[0],
    conditioning_variables,
    weight_logic="group")

In [None]:
hazard_test_ref