In [1]:
"""
IMPORTS
"""

import pandas as pd
import json

from raiwidgets import ExplanationDashboard

from utils.dataloader import DataLoader
from models.modelloader import ModelLoader
from utils.constants import group_proxies

from tests.metrics import MetricsTester
from tests.metamorphic import MetamorphicTester
from tests.shapley import ShapleyTester
from tests.counterfactuals import cf_pipeline


pd.set_option('chained_assignment',None)
pd.options.mode.copy_on_write = False


Initializing test: data_shuffler...
Initializing test: data_remover...
Initializing test: data_repetition...
Initializing test: label_error...
Initializing test: feature_remover...


In [2]:
"""
LOAD DATASET
"""
dataloader = DataLoader()
X_train, y_train = dataloader.load_split('train')
print(f'{X_train.shape}, {y_train.shape}')
X_test, y_test = dataloader.load_split('test')
print(f'{X_test.shape}, {y_test.shape}')
X_data, y_data = dataloader.load_split('full')
print(f'{X_data.shape}, {y_data.shape}')
dataset = pd.concat([X_data, y_data], axis=1)
print(f'{dataset.shape}')

(10116, 315), (10116,)
(2529, 315), (2529,)
(12645, 315), (12645,)
(12645, 316)


In [3]:
"""
Set the path to the relative path to the model under test as a global constant variable
"""
MODEL_UNDER_TEST = ".\\..\\models\\audit_models\\model_1.onnx"

"""
Set the data under test as global constant variables
"""
DATA_UNDER_TEST = X_data
LABELS_UNDER_TEST = y_data

In [5]:

params = {
    'onnx_model_path': MODEL_UNDER_TEST
}

model = ModelLoader.load_model(type='onnx', params=params)

y_pred = model.predict(DATA_UNDER_TEST)
y_pred_proba = model.predict_proba(X_test=DATA_UNDER_TEST)


In [5]:
"""
Test models based on performance and fairness metrics in terms of group demographics
"""
metrics = MetricsTester(protected_variables=group_proxies)


In [6]:
metrics.get_metrics_summary(DATA_UNDER_TEST, LABELS_UNDER_TEST, y_pred)

(12645,)
(12645,)
(12645,)
False
sens (12645,)
y_true (12645,)
preed (12645,)
[[11353    27]
 [  132  1133]]
y_true (6542,)
preed (6542,)
[[5865   14]
 [  72  591]]
y_true (6103,)
preed (6103,)
[[5488   13]
 [  60  542]]
Overal metrics: 
TN-FP-FN-TP    (11353, 27, 132, 1133)
acc                          0.987426
prec                         0.976724
rec                          0.895652
f1                           0.934433
dtype: object 
(12645,)
(12645,)
(12645,)
True
sens (12645,)
Metrics for group: 
                             fnr       fpr       sel count
persoon_geslacht_vrouw                                    
0                       0.108597  0.002381  0.092479  6542
1                       0.099668  0.002363  0.090939  6103
(12645,)
(12645,)
(12645,)
True
sens (12645,)


Found 49 subgroups. Evaluation may be slow


Metrics for group: 
                                     fnr       fpr       sel count
persoon_leeftijd_bij_onderzoek                                    
19                                   0.0       0.0  0.545455    11
20                                   0.0       0.0  0.363636    11
21                                   0.0       0.0  0.388889    18
22                                   0.0       0.0  0.208333    24
23                                   0.0       0.0   0.34375    32
24                              0.142857       0.0   0.27907    43
25                              0.111111       0.0  0.363636    44
26                              0.083333       0.0      0.22    50
27                                   0.0  0.020833  0.253968    63
28                                   0.0       0.0  0.290698    86
29                              0.052632       0.0  0.197802    91
30                                   0.0       0.0  0.333333    93
31                              0.054054  

In [7]:
"""
METAMORPHIC TESTING: See model 1 results in results/results_model_1_metamorphic_testing_column_avg.txt
"""    
def run_metamorphic(metamorphic_tester):
    metamorphic_tester.test()
    metamorphic_tester.analyse_test()




In [8]:
metamorphic_tester = MetamorphicTester(X=dataset, model_path=MODEL_UNDER_TEST)
run_metamorphic(metamorphic_tester=metamorphic_tester)
print('global average: ', metamorphic_tester.global_average)
print(metamorphic_tester.column_avg)

global average:  0.1890022205742061
[('contacten_onderwerp_no_show', 0.1419753154334652), ('persoon_leeftijd_bij_onderzoek', 0.1114617825607064), ('pla_historie_ontwikkeling', 0.09071397120053362), ('instrument_ladder_huidig_activering', 0.051289090639616446), ('relatie_overig_kostendeler', 0.045433787986896144), ('pla_hist_pla_categorie_doelstelling_16', 0.02237195658667465), ('instrument_ladder_historie_activering', 0.021792445682754766), ('contacten_soort_afgelopenjaar_anders', 0.010637736125300659), ('afspraak_inspanningsperiode', 0.009252698650829169), ('contacten_onderwerp_overleg_met_inkomen', 0.008786258786258785), ('pla_einde_uitstroom_anders_dan_volgen_onderwijs__regulier_werk_of_als_zelfstandige', 0.008431514553652554), ('beschikbaarheid_aantal_historie_afwijkend_wegens_medische_omstandigheden', 0.008074642040528), ('persoonlijke_eigenschappen_taaleis_voldaan', 0.007872252371258355), ('contacten_soort_document__uitgaand_', 0.007352941176470588), ('relatie_overig_historie_vor

In [None]:
shapleyTester = ShapleyTester()
explainer, global_explanation = shapleyTester.run_shapley(model=model, train_data=DATA_UNDER_TEST)

In [None]:
shapleyTester.run_explain_instance(explainer, model, DATA_UNDER_TEST, 0)

In [None]:
if global_explanation is not None:
    ExplanationDashboard(global_explanation, model, dataset=X_data, true_y=y_data)


In [None]:
"""
Test the biases as a result of specific feature values using counterfactuals.
"""


explainer, cf_examples, model_results, model_results_sorted = cf_pipeline(dataset, DATA_UNDER_TEST, model)

In [None]:
print("Model 1:")
for i in range(10):
    print(model_results_sorted[i])

In [164]:
with open('model_2_results.json', 'w') as f:
    json.dump(model_results, f)