In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import onnxruntime as rt
import onnx

In [3]:
data = pd.read_csv('data/synth_data_for_training.csv')

## Function for mutation testing

In [24]:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument

def test_mutation(data, feature_name : str, is_Fraud: bool ,value_from : int, value_to: int, model_path : str):
    data_modified = data.copy() 
    is_checked = 1 if is_Fraud else 0
    test_data= data_modified.loc[data_modified['checked'] == is_checked]
    test_data = test_data.loc[test_data[feature_name] == value_from]
    data_wout = test_data.copy()
    y_wout = data_wout['checked']
    X_wout= data_wout.drop(['checked'], axis=1)
    X_wout = X_wout.astype(np.float32)
    test_data[feature_name] = value_to
    y_test = test_data['checked']
    X_test= test_data.drop(['checked'], axis=1)
    X_test = X_test.astype(np.float32)
    session = rt.InferenceSession(model_path)
        
    try:
        y_pred_onnx1 =  session.run(None, {'X': X_wout.values.astype(np.float32)})
        y_pred_onnx2 =  session.run(None, {'X': X_test.values.astype(np.float32)})
        y_pred_onnx1_np = np.array(y_pred_onnx1[0])
        y_pred_onnx2_np = np.array(y_pred_onnx2[0])
        diff_count = np.sum(y_pred_onnx1_np != y_pred_onnx2_np)
        return diff_count, len(y_pred_onnx1_np)
    except InvalidArgument:
        print("Got error: empty test dataframe. " "Column is: ", feature_name, " combo is:", is_Fraud, value_from, value_to)
        return 0, 0
    # assert y_pred_onnx1[0].all() == y_pred_onnx2[0].all(), f'Model predictions are different. The model has bias towards {feature_name} with value {value_from} '
    # return True

## Test Cases

In [25]:
test_mutation(data, "adres_aantal_brp_adres", True, 11, 11, "model/model_1.onnx")

Got error: empty test dataframe. Column is:  adres_aantal_brp_adres  combo is: True 11 11


(0, 0)

In [26]:
test_mutation(data, 'persoon_geslacht_vrouw', True, 0, 1, "model/model_1.onnx") #test if changing gender of women who are frauds to men to see if predictions change to not fraud

(16, 663)

In [27]:
test_mutation(data, 'persoon_geslacht_vrouw', False, 0, 1, "model/model_1.onnx") #test if changing gender of men who are not frauds to women to see if predictions change to not fraud

(11, 5879)

In [28]:
test_mutation(data, 'persoonlijke_eigenschappen_taaleis_voldaan', True, 0, 1, "model/model_1.onnx") #test if changing non dutch speaker people who are frauds to dutch speaking to see if predictions change to not fraud

(21, 708)

In [29]:
test_mutation(data, 'persoonlijke_eigenschappen_taaleis_voldaan', False, 1, 0, "model/model_1.onnx") #test if changing dutch speaker people who are not frauds to non-dutch speaking to see if predictions change to fraud

(13, 6558)

In [30]:
test_mutation(data, 'persoonlijke_eigenschappen_taaleis_voldaan', True, 2, 1, "model/model_1.onnx") #test if changing people who are frauds and did no take the dutch exam to dutch speaking to see if predictions change to not fraud

(4, 104)

### Making a generic method that tests all combinations of feature values in mutation testing

In [31]:
df = pd.read_csv('data/synth_data_for_training.csv')
mutation_test_results = {}
model = "model/model_1.onnx"

# Iterate over each column in the DataFrame and generate the mutation test combinations
for column in df.columns:
    # Get unique values for the current column
    unique_values = df[column].unique()
    if len(unique_values) > 1:
        # We sample two random values from the unique options, and store this in the dictionary
        rand_values = np.random.choice(unique_values, size=2, replace=False)
        combos = []
        for bool in [True, False]:
            for a in rand_values:
                for b in rand_values:
                    combos.append([bool, a, b])
        
        mutation_test_results[column] = combos

# Go over each combination and get how many tests failed to kill the mutation
for column, values in mutation_test_results.items():
    for combo in values:
        num_differ, length = test_mutation(data, column, combo[0], combo[1], combo[2], model)
        combo.append(num_differ)
        combo.append(length)

# features_to_ignore = ["adres_dagen_op_adres", "afspraak_aantal_woorden", "afspraak_laatstejaar_aantal_woorden", "belemmering_dagen_financiele_problemen", 
#                       "belemmering_dagen_lichamelijke_problematiek", "belemmering_dagen_psychische_problemen", "contacten_onderwerp_overige", "contacten_onderwerp_terugbelverzoek", "contacten_onderwerp_traject", "contacten_soort_afgelopenjaar_document__uitgaand_", 
#                       "contacten_soort_document__inkomend_", "contacten_soort_document__uitgaand_", "contacten_soort_e_mail__inkomend_", "contacten_soort_e_mail__uitgaand_", "contacten_soort_telefoontje__inkomend_", "contacten_soort_telefoontje__uitgaand_", 
#                       "deelname_act_reintegratieladder_werk_re_integratie", "ontheffing_dagen_hist_mean", "ontheffing_dagen_hist_vanwege_uw_medische_omstandigheden", "persoon_leeftijd_bij_onderzoek", "persoonlijke_eigenschappen_dagen_sinds_opvoer", "persoonlijke_eigenschappen_dagen_sinds_taaleis", "persoonlijke_eigenschappen_spreektaal", "relatie_kind_leeftijd_verschil_ouder_eerste_kind", "typering_dagen_som"]




Got error: empty test dataframe. Column is:  adres_dagen_op_adres  combo is: True 24248 4700
Got error: empty test dataframe. Column is:  adres_dagen_op_adres  combo is: True 24248 24248
Got error: empty test dataframe. Column is:  afspraak_aanmelding_afgesloten  combo is: True 8 8
Got error: empty test dataframe. Column is:  afspraak_aanmelding_afgesloten  combo is: True 8 3
Got error: empty test dataframe. Column is:  afspraak_aantal_woorden  combo is: True 551 551
Got error: empty test dataframe. Column is:  afspraak_aantal_woorden  combo is: True 551 84
Got error: empty test dataframe. Column is:  afspraak_afgelopen_jaar_afsprakenplan  combo is: False 3 0
Got error: empty test dataframe. Column is:  afspraak_afgelopen_jaar_afsprakenplan  combo is: False 3 3
Got error: empty test dataframe. Column is:  afspraak_controle_verwijzing  combo is: True 2 2
Got error: empty test dataframe. Column is:  afspraak_controle_verwijzing  combo is: True 2 0
Got error: empty test dataframe. Column 

In [32]:
# We can now do some basic data analysis.

# We can compute a score on a per-column basis, getting an average across 8 tests of what percentage of mutants were failed to kill
# We can also compute a sort of global metric. Just an average across all mutations
column_avg = []
all_percentages = []

for column, values in mutation_test_results.items():
    percentages = []
    for combo in values:
        # compute percentage, and add it to a temp array to compute the average value
        if combo[4] != 0:
            res = combo[3]/combo[4]
            percentages.append(res)
            all_percentages.append(res)
    column_avg.append((column, np.mean(np.array(percentages))))

column_avg = sorted(column_avg, key=lambda x: x[1], reverse=True)

# Print the mutation metric per column and on a global metric
print("Column averages", column_avg, "\n")
print("Global average", np.mean(np.array(all_percentages)) * 100, "%")

Column averages [('contacten_soort_afgelopenjaar_anders', 0.2112321333682687), ('adres_dagen_op_adres', 0.16666666666666666), ('relatie_overig_historie_vorm__kostendeler', 0.09500196515131666), ('ontheffing_dagen_hist_vanwege_uw_medische_omstandigheden', 0.08333333333333333), ('persoonlijke_eigenschappen_dagen_sinds_opvoer', 0.08333333333333333), ('contacten_soort_afgelopenjaar_e_mail__inkomend_', 0.07184922743824426), ('relatie_kind_leeftijd_verschil_ouder_eerste_kind', 0.05724182615629984), ('instrument_ladder_historie_activering', 0.042057993204428774), ('contacten_soort_afgelopenjaar_document__uitgaand_', 0.03996028791263341), ('afspraak_laatstejaar_resultaat_ingevuld_uniek', 0.03913259627559181), ('instrument_ladder_huidig_activering', 0.03444215808938844), ('contacten_onderwerp_documenttype__overeenkomst_', 0.030893317195744507), ('pla_hist_pla_categorie_doelstelling_16', 0.029699829954688187), ('relatie_overig_kostendeler', 0.029125334932183243), ('afspraak_afgelopen_jaar_voortg