In [1]:
import pandas as pd
import onnxruntime as rt
from group1.Tester import TesterGroup1

In [2]:
# Load test data
data = pd.read_csv("../data/test.csv")
X = data.drop(columns=["checked"], errors="ignore")

In [3]:
class ONNXModelWrapper:
    def __init__(self, model_path, model_features):
        self.session = rt.InferenceSession(model_path, providers=["CPUExecutionProvider"])
        self.input_name = self.session.get_inputs()[0].name
        self.output_name = self.session.get_outputs()[0].name
        self.model_features = model_features

    def predict(self, X):
        preds = self.session.run(
            [self.output_name],
            {self.input_name: X.values.astype("float32")}
        )[0]
        return preds.astype(int)

In [4]:
from group1.train_models import safe_core, toxic_features

# Load Group 2 models
model_1 = ONNXModelWrapper("../group2/model_1.onnx", safe_core + toxic_features)
model_2 = ONNXModelWrapper("../group2/model_2.onnx", safe_core + toxic_features)

Fitting Bad Model...
Fitting Good Model with Augmented Data...
Successfully saved model_1.onnx with 317-column interface.
Successfully saved model_2.onnx with 317-column interface.

Mean Gender Sensitivity (Bad): 0.0099
Mean Gender Sensitivity (Good): 0.0000


In [5]:
# Run Group 1 tests
tester_1 = TesterGroup1(model_1, X)
tester_2 = TesterGroup1(model_2, X)

['adres_aantal_brp_adres', 'adres_aantal_verschillende_wijken', 'adres_aantal_verzendadres', 'adres_aantal_woonadres_handmatig', 'adres_dagen_op_adres', 'adres_recentst_onderdeel_rdam', 'adres_recentste_buurt_groot_ijsselmonde', 'adres_recentste_buurt_nieuwe_westen', 'adres_recentste_buurt_other', 'adres_recentste_buurt_oude_noorden', 'adres_recentste_buurt_vreewijk', 'adres_recentste_plaats_other', 'adres_recentste_plaats_rotterdam', 'adres_recentste_wijk_charlois', 'adres_recentste_wijk_delfshaven', 'adres_recentste_wijk_feijenoord', 'adres_recentste_wijk_ijsselmonde', 'adres_recentste_wijk_kralingen_c', 'adres_recentste_wijk_noord', 'adres_recentste_wijk_other', 'adres_recentste_wijk_prins_alexa', 'adres_recentste_wijk_stadscentru', 'adres_unieke_wijk_ratio', 'afspraak_aanmelding_afgesloten', 'afspraak_aantal_woorden', 'afspraak_afgelopen_jaar_afsprakenplan', 'afspraak_afgelopen_jaar_monitoring_insp__wet_taaleis_na_12_mnd_n_a_v__taa04_____geen_maatregel', 'afspraak_afgelopen_jaar_on

In [6]:
print("=== Testing model_1.onnx (Group 2) ===")
tester_1.run_partition_tests()
tester_1.run_metamorphic_tests()

=== Testing model_1.onnx (Group 2) ===
PASS: Gender Partitioning (Diff: 0.0137)
PASS: Neighborhood Partitioning (Ratio: 0.93)

------------------------------------------------------------
PARTITIONING TESTS SUMMARY:
  [PASS] Gender
  [FAIL] Language: Group Bias: Language barrier group is 1.47x riskier.
  [FAIL] Age: Group Bias: Seniors receive 0.17x risk score than youth.
  [PASS] Neighborhood
------------------------------------------------------------
PASS: Age Metamorphic (Max change: 0.0000)
PASS: Gender Metamorphic
PASS: Language Metamorphic
PASS: Neighborhood Metamorphic (Max diff: 0.0000)

------------------------------------------------------------
METAMORPHIC TESTS SUMMARY:
  [PASS] Age
  [PASS] Gender
  [PASS] Language
  [PASS] Neighborhood
------------------------------------------------------------


[('Age', 'PASS', None),
 ('Gender', 'PASS', None),
 ('Language', 'PASS', None),
 ('Neighborhood', 'PASS', None)]

In [7]:
print("\n=== Testing model_2.onnx (Group 2) ===")
tester_2.run_partition_tests()
tester_2.run_metamorphic_tests()


=== Testing model_2.onnx (Group 2) ===
PASS: Gender Partitioning (Diff: 0.0094)
PASS: Neighborhood Partitioning (Ratio: 1.15)

------------------------------------------------------------
PARTITIONING TESTS SUMMARY:
  [PASS] Gender
  [FAIL] Language: Group Bias: Language barrier group is 1.45x riskier.
  [FAIL] Age: Group Bias: Seniors receive 0.16x risk score than youth.
  [PASS] Neighborhood
------------------------------------------------------------
PASS: Age Metamorphic (Max change: 0.0000)
PASS: Gender Metamorphic
PASS: Language Metamorphic
PASS: Neighborhood Metamorphic (Max diff: 0.0000)

------------------------------------------------------------
METAMORPHIC TESTS SUMMARY:
  [PASS] Age
  [PASS] Gender
  [PASS] Language
  [PASS] Neighborhood
------------------------------------------------------------


[('Age', 'PASS', None),
 ('Gender', 'PASS', None),
 ('Language', 'PASS', None),
 ('Neighborhood', 'PASS', None)]