In [1]:
"""
IMPORTS
"""

import pandas as pd

from utils.dataloader import DataLoader
from models.modelloader import ModelLoader
from utils.constants import protected_attributes


from tests.metrics import MetricsTester
from tests.mututation import MutationTester

In [2]:
"""
    INTERESTING LINKS:
        - https://dl.acm.org/doi/pdf/10.1145/3338906.3338937
"""

'\n    INTERESTING LINKS:\n        - https://dl.acm.org/doi/pdf/10.1145/3338906.3338937\n'

In [3]:
"""
SET MODEL(S)
"""

PARAMS = None

# hyperoptimised parameters for the bad model.

PARAMS = {
    'n_estimators': 300, 
    'min_samples_split': 750, 
    'min_samples_leaf': 175, 
    'max_depth': 6, 
    'learning_rate': 0.1525
    }

# Hyperoptimised parameters for good model.

# PARAMS = {
#     'learning_rate': 0.15, 
#     'max_depth': 5, 
#     'min_samples_leaf': 200, 
#     'min_samples_split': 800, 
#     'n_estimators': 350
#     }



model = ModelLoader.load_model(type='onnx', params=PARAMS)

TypeError: OnnxModel.__init__() got an unexpected keyword argument 'n_estimators'

In [None]:
"""
LOAD DATASET
"""

dataloader = DataLoader()
X_train, y_train = dataloader.load_split('train')
print(f'{X_train.shape}, {y_train.shape}')
X_test, y_test = dataloader.load_split('test')
print(f'{X_test.shape}, {y_test.shape}')
X_data, y_data = dataloader.load_split('full')
print(f'{X_data.shape}, {y_data.shape}')
X_good, y_good = dataloader.load_split('good')
X_bad, y_bad = dataloader.load_split('bad')
dataset_train_test = pd.concat([X_data, y_data], axis=1)
print(f'{dataset_train_test.shape}')


(10116, 315), (10116,)
(2529, 315), (2529,)
(12645, 315), (12645,)
(12645, 316)


In [None]:
"""
    TEST MODEL ON DATASET
"""

model.fit(X_bad, y_bad)


In [None]:
y_pred = model.predict(X_test)
print(y_pred)

[0 0 0 ... 0 0 0]


In [None]:
"""
FUNCTIONAL TESTING
"""

metrics = MetricsTester(protected_variables=protected_attributes)


In [None]:

metrics.get_metrics_summary(X_test, y_true=y_data, y_pred=y_pred)

(2529,)
(2529,)
(2529,)
False
sens (2529,)
y_true (2529,)
preed (2529,)
[[1982  278]
 [  66  203]]
y_true (405,)
preed (405,)
[[326  47]
 [ 13  19]]
y_true (783,)
preed (783,)
[[627  89]
 [ 19  48]]
y_true (589,)
preed (589,)
[[470  55]
 [ 13  51]]
y_true (386,)
preed (386,)
[[295  45]
 [ 12  34]]
y_true (228,)
preed (228,)
[[176  16]
 [  5  31]]
y_true (100,)
preed (100,)
[[64 20]
 [ 3 13]]
y_true (29,)
preed (29,)
[[19  5]
 [ 0  5]]
y_true (7,)
preed (7,)
[[4 1]
 [1 1]]
y_true (2,)
preed (2,)
[[1 0]
 [0 1]]
Overal metrics: 
TN-FP-FN-TP    (1982, 278, 66, 203)
acc                        0.863978
prec                       0.422037
rec                        0.754647
f1                         0.541333
dtype: object 
(2529,)
(2529,)
(2529,)
True
sens (2529,)
Metrics for group: 
                             fnr       fpr       sel count
adres_aantal_brp_adres                                    
1                        0.40625  0.126005  0.162963   405
2                       0.283582  

TypeError: '<' not supported between instances of 'int' and 'NoneType'

In [None]:
"""
MUTATION TESTING
"""

params = {
    'data_shuffler': 1000,
    'data_remover': 0.25,
    'data_repetition': 1000, 
    'label_error': 1000,
    'feature_remover': 31,
}

model_test = ModelLoader.load_model(type='bad', params=PARAMS)
mutation = True
if mutation==True:
    X_test = pd.concat([X_test, y_test], axis=1)
    X_bad = pd.concat([X_bad, y_bad], axis=1)
    mutator = MutationTester(model=model_test, y_pred_baseline=y_pred, df_train=X_bad, df_test=X_test)
    mutation_score = mutator.test_mutants(params)
    print(f"Mutation score: {mutation_score}")

Testing data_remover
Testing data_repetition
Testing data_shuffler
Testing feature_remover
Testing label_error
Mutation score: {'data_remover': 50.7, 'data_repetition': 37.0, 'data_shuffler': 21.3, 'feature_remover': 44.5, 'label_error': 97.0}
