In [82]:

# utilities
import pandas as pd
import numpy as np
import os
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import pandas as pd



# sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score
from sklearn.metrics import classification_report
from aif360.metrics import BinaryLabelDatasetMetric

import aif360.sklearn as skm


# aif360
from aif360.sklearn.detectors import bias_scan
# Import necessary modules from aif360
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset, BinaryLabelDataset


# onnx imports
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx
from skl2onnx import convert_sklearn





In [83]:
# Let's load the dataset
data = pd.read_csv('./../data/synth_data_for_training.csv')

# Let's specify the features and the target
y = data["checked"]
X = data.drop(['checked'], axis=1)
X = X.astype(np.float32)

# Let's split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [84]:
class BenefitsFraudDataset(BinaryLabelDataset):
    """Benefits Fraud Dataset.
    """

    def __init__(self, df, label_name='checked',
                 protected_attribute_names=['persoon_geslacht_vrouw', 'typering_hist_ind']
                 ):



        super(BenefitsFraudDataset, self).__init__(df=df, label_names=[label_name],
            protected_attribute_names=protected_attribute_names
            )


In [85]:
df = pd.read_csv("./../data/synth_data_for_training.csv")

df = df.astype(np.float32)
# Get the dataset and split into train and test
benefits_fraud_dataset = BenefitsFraudDataset(df=df)
# Get features and labels for training and testing
# Split the dataset into training and testing sets
dataset_train, dataset_test = benefits_fraud_dataset.split([0.7], shuffle=True)
# Get features and labels for training and testing as pandas DataFrames
# Get features and labels for training and testing as pandas DataFrames
X_train = pd.DataFrame(dataset_train.features, columns=dataset_train.feature_names)
y_train = pd.DataFrame(dataset_train.labels, columns=['checked']).squeeze()

X_test = pd.DataFrame(dataset_test.features, columns=dataset_test.feature_names)
y_test = pd.DataFrame(dataset_test.labels, columns=['checked']).squeeze()


# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(benefits_fraud_dataset.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(benefits_fraud_dataset.favorable_label, benefits_fraud_dataset.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(benefits_fraud_dataset.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(benefits_fraud_dataset.privileged_protected_attributes, 
      benefits_fraud_dataset.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(benefits_fraud_dataset.feature_names)


#### Training Dataset shape

(12645, 315)


#### Favorable and unfavorable labels

1.0 0.0


#### Protected attribute names

['persoon_geslacht_vrouw', 'typering_hist_ind']


#### Privileged and unprivileged protected attribute values

[array([1.]), array([1.])] [array([0.]), array([], dtype=float64)]


#### Dataset feature names

['adres_aantal_brp_adres', 'adres_aantal_verschillende_wijken', 'adres_aantal_verzendadres', 'adres_aantal_woonadres_handmatig', 'adres_dagen_op_adres', 'adres_recentst_onderdeel_rdam', 'adres_recentste_buurt_groot_ijsselmonde', 'adres_recentste_buurt_nieuwe_westen', 'adres_recentste_buurt_other', 'adres_recentste_buurt_oude_noorden', 'adres_recentste_buurt_vreewijk', 'adres_recentste_plaats_other', 'adres_recentste_plaats_rotterdam', 'adres_recentste_wijk_charlois', 'adres_recentste_wijk_delfshaven', 'adres_recentste_wijk_feijenoord', 'adres_recentste_wijk_ijsselmonde', 'adres_recentste_wijk_kralingen_c', 'adres_recentste_wijk_noord', 'adres_recentste_wijk_other', 'adres_recentste_wijk_prins_alexa', 'adres_recentste_wijk_stadscentru', 'adres_unieke_wijk_ratio', 'afspraak_aanmelding_afgesloten', 'afspraak_aantal_woorden', 'afspraak_afgelopen_jaar_afsprakenplan', 'afspraak_afgelopen_jaar_monitoring_insp__wet_taaleis_na_12_mnd_n_a_v__taa04_____geen_maatregel', 'afspraak_afgelopen_jaar_on

In [86]:
# Define a gradient boosting classifier
classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

In [87]:
# Create a pipeline object with our selector and classifier
# NOTE: You can create custom pipeline objects but they must be registered to onnx or it will not recognise them
# Because of this we recommend using the onnx known objects as defined in the documentation
pipeline = Pipeline(steps=[('classification', classifier)])

In [88]:
# Let's train a simple model
pipeline.fit(X_train, y_train)

# Let's evaluate the model
y_pred = pipeline.predict(X_test)

probas_pred = pipeline.predict_proba(X_test)[:,1]
original_accuracy = accuracy_score(y_test, y_pred)
print('Accuracy of the original model: ', original_accuracy)

dataset_pred = dataset_test.copy()
dataset_pred.checked = y_pred





Accuracy of the original model:  0.9425408539799683


In [89]:
"""
BIAS SCAN: See results/bias_scan_results.txt for results
"""

def get_bias_scan(X_train, y_train, y_train_pred):
    bias_scan_results = bias_scan(X_train, y_train, y_train_pred)
    return bias_scan_results

# Let's evaluate the model
y_train_pred = pipeline.predict(X_train)
y_train_pred = pd.Series(y_train_pred)

get_bias = False
if get_bias == True:
    bias_scan_results = get_bias_scan(X_train, y_train, y_train_pred)


In [106]:
from sklearn import metrics

class GenericMetricsWrapper:
    def __init__(self, y_true, y_pred, probas_pred):
        self.y_true = y_true
        self.y_pred = y_pred
        self.probas_pred = probas_pred

    @property
    def num_samples(self):
        return skm.metrics.num_samples(self.y_true, self.y_pred)

    @property
    def num_pos_neg(self):
        return skm.metrics.num_pos_neg(self.y_true, self.y_pred)

    @property
    def specificity_score(self):
        return skm.metrics.specificity_score(self.y_true, self.y_pred)

    @property
    def sensitivity_score(self):
        return skm.metrics.sensitivity_score(self.y_true, self.y_pred)

    @property
    def base_rate(self):
        return skm.metrics.base_rate(self.y_true, self.y_pred)

    @property
    def selection_rate(self):
        return skm.metrics.selection_rate(self.y_true, self.y_pred)

    @property
    def smoothed_base_rate(self):
        return skm.metrics.smoothed_base_rate(self.y_true, self.y_pred)

    @property
    def smoothed_selection_rate(self):
        return skm.metrics.smoothed_selection_rate(self.y_true, self.y_pred)

    @property
    def generalized_fpr(self):
        return skm.metrics.generalized_fpr(self.y_true, self.probas_pred)

    @property
    def generalized_fnr(self):
        return skm.metrics.generalized_fnr(self.y_true, self.probas_pred)
    
    def get_metrics(self):
        metrics = {}
        for attr in dir(self):
            if isinstance(getattr(self.__class__, attr, None), property):
                metrics[attr] = getattr(self, attr)
        return metrics



In [107]:

protected_attributes =X_test['persoon_geslacht_vrouw']
generic_metrics = GenericMetricsWrapper(y_true=y_test, y_pred=y_pred, probas_pred=probas_pred)

results = generic_metrics.get_metrics()

# Print the results
for metric, value in results.items():
    print(f"{metric}: {value}")

base_rate: 0.10832894043226146
generalized_fnr: 0.37317118284819906
generalized_fpr: 0.042654085229038755
num_pos_neg: (331.0, 3463.0)
num_samples: 3794.0
selection_rate: 0.08724301528729574
sensitivity_score: 0.6374695863746959
smoothed_base_rate: 0.10843214756258235
smoothed_selection_rate: 0.08735177865612648
specificity_score: 0.9796039018622524


In [108]:
class GroupMetricsWrapper:
    def __init__(self, y_true, y_pred, probas_pred, protected_attributes, dataset):
        self.y_true = y_true
        self.y_pred = y_pred
        self.probas_pred = probas_pred
        self.prot_attr = protected_attributes
        self.X = dataset

    @property
    def statistical_parity_difference(self):
        return skm.metrics.statistical_parity_difference(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def mean_difference(self):
        return skm.metrics.mean_difference(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def disparate_impact_ratio(self):
        return skm.metrics.disparate_impact_ratio(self.y_true)

    @property
    def equal_opportunity_difference(self):
        return skm.metrics.equal_opportunity_difference(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def average_odds_difference(self):
        return skm.metrics.average_odds_difference(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def average_odds_error(self):
        return skm.metrics.average_odds_error(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def class_imbalance(self):
        return skm.metrics.class_imbalance(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def kl_divergence(self):
        return skm.metrics.kl_divergence(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def conditional_demographic_disparity(self):
        return skm.metrics.conditional_demographic_disparity(self.y_true)

    @property
    def smoothed_edf(self):
        return skm.metrics.smoothed_edf(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def df_bias_amplification(self):
        return skm.metrics.df_bias_amplification(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def between_group_generalized_entropy_error(self):
        return skm.metrics.between_group_generalized_entropy_error(self.y_true, y_pred=self.y_pred, prot_attr=self.prot_attr)

    @property
    def mdss_bias_score(self):
        return skm.metrics.mdss_bias_score(self.y_true, self.probas_pred, self.X)
    
    def get_metrics(self):
        metrics = {}
        for attr in dir(self):
            if isinstance(getattr(self.__class__, attr, None), property):
                metrics[attr] = getattr(self, attr)
        return metrics

   


In [109]:

protected_attributes =X_test['persoon_geslacht_vrouw']
group_metrics = GroupMetricsWrapper(y_true=y_test, y_pred=y_pred, probas_pred=probas_pred, protected_attributes=protected_attributes, dataset=X_test)

results = group_metrics.get_metrics()

# Print the results
for metric, value in results.items():
    print(f"{metric}: {value}")

average_odds_difference: 0.0009189640662928267
average_odds_error: 0.005148354747296413
between_group_generalized_entropy_error: 1.011783948475026e-07
class_imbalance: 0.045861887190300474
conditional_demographic_disparity: -2.0738152782623367e-19
df_bias_amplification: 0.03393236784184461
disparate_impact_ratio: 0.0
equal_opportunity_difference: -0.004229390681003586
kl_divergence: 0.0008208664060927462
mdss_bias_score: 0.0
mean_difference: 0.011526465870611305
smoothed_edf: 0.13208155246283004
statistical_parity_difference: 0.011526465870611305


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [119]:
class IndividualMetricsWrapper:
    def __init__(self, y_true, y_pred, probas_pred, X, y, alpha=2, n_neighbors=5):
        self.y_true = y_true
        self.y_pred = y_pred
        self.probas_pred = probas_pred
        self.b = X.to_numpy()
        self.X = X
        self.y = y
        self.alpha = alpha
        self.n_neighbors = n_neighbors

    # @property
    # def generalized_entropy_index(self):
    #     return skm.metrics.generalized_entropy_index(self.b, self.alpha)

    @property
    def generalized_entropy_error(self):
        return skm.metrics.generalized_entropy_error(self.y_true, self.y_pred)

    # @property
    # def theil_index(self):
    #     return skm.metrics.theil_index(self.b)

    # @property
    # def coefficient_of_variation(self):
    #     return skm.metrics.coefficient_of_variation(self.b)

    # @property
    # def consistency_score(self):
    #     return skm.metrics.consistency_score(self.X, self.y, self.n_neighbors)
    
    def get_metrics(self):
        metrics = {}
        for attr in dir(self):
            if isinstance(getattr(self.__class__, attr, None), property):
                metrics[attr] = getattr(self, attr)
        return metrics



In [120]:

protected_attributes =X_test['persoon_geslacht_vrouw']
indiv_metrics = IndividualMetricsWrapper(y_true=y_test, y_pred=y_pred, probas_pred=probas_pred, X=X_test, y=y_test, alpha=None, n_neighbors=None)

results = indiv_metrics.get_metrics()

# Print the results
for metric, value in results.items():
    print(f"{metric}: {value}")

generalized_entropy_error: 0.02974859132322973


In [112]:
class MetricWrapper:
    def __init__(self, metric):
        self.metric = metric
        
    @property
    def accuracy(self):
        return self.metric.accuracy()

    @property
    def average_abs_odds_difference(self):
        return self.metric.average_abs_odds_difference()

    @property
    def average_odds_difference(self):
        return self.metric.average_odds_difference()

    @property
    def average_predictive_value_difference(self):
        return self.metric.average_predictive_value_difference()

    @property
    def base_rate(self):
        return self.metric.base_rate()

    @property
    def between_all_groups_coefficient_of_variation(self):
        return self.metric.between_all_groups_coefficient_of_variation()

    @property
    def between_all_groups_generalized_entropy_index(self):
        return self.metric.between_all_groups_generalized_entropy_index()

    @property
    def between_all_groups_theil_index(self):
        return self.metric.between_all_groups_theil_index()

    @property
    def between_group_coefficient_of_variation(self):
        return self.metric.between_group_coefficient_of_variation()

    @property
    def between_group_generalized_entropy_index(self):
        return self.metric.between_group_generalized_entropy_index()

    @property
    def between_group_theil_index(self):
        return self.metric.between_group_theil_index()

    @property
    def binary_confusion_matrix(self):
        return self.metric.binary_confusion_matrix()

    @property
    def coefficient_of_variation(self):
        return self.metric.coefficient_of_variation()

    @property
    def consistency(self):
        return self.metric.consistency()

    # @property
    # def difference(self):
    #     return self.metric.difference()

    @property
    def differential_fairness_bias_amplification(self):
        return self.metric.differential_fairness_bias_amplification()

    @property
    def disparate_impact(self):
        return self.metric.disparate_impact()

    @property
    def equal_opportunity_difference(self):
        return self.metric.equal_opportunity_difference()

    @property
    def equalized_odds_difference(self):
        return self.metric.equalized_odds_difference()

    @property
    def error_rate(self):
        return self.metric.error_rate()

    @property
    def error_rate_difference(self):
        return self.metric.error_rate_difference()

    @property
    def error_rate_ratio(self):
        return self.metric.error_rate_ratio()

    @property
    def false_discovery_rate(self):
        return self.metric.false_discovery_rate()

    @property
    def false_discovery_rate_difference(self):
        return self.metric.false_discovery_rate_difference()

    @property
    def false_discovery_rate_ratio(self):
        return self.metric.false_discovery_rate_ratio()

    @property
    def false_negative_rate(self):
        return self.metric.false_negative_rate()

    @property
    def false_negative_rate_difference(self):
        return self.metric.false_negative_rate_difference()

    @property
    def false_negative_rate_ratio(self):
        return self.metric.false_negative_rate_ratio()

    @property
    def false_omission_rate(self):
        return self.metric.false_omission_rate()

    @property
    def false_omission_rate_difference(self):
        return self.metric.false_omission_rate_difference()

    @property
    def false_omission_rate_ratio(self):
        return self.metric.false_omission_rate_ratio()

    @property
    def false_positive_rate(self):
        return self.metric.false_positive_rate()

    @property
    def false_positive_rate_difference(self):
        return self.metric.false_positive_rate_difference()

    @property
    def false_positive_rate_ratio(self):
        return self.metric.false_positive_rate_ratio()

    @property
    def generalized_binary_confusion_matrix(self):
        return self.metric.generalized_binary_confusion_matrix()

    @property
    def generalized_entropy_index(self):
        return self.metric.generalized_entropy_index()

    @property
    def generalized_equalized_odds_difference(self):
        return self.metric.generalized_equalized_odds_difference()

    @property
    def generalized_false_negative_rate(self):
        return self.metric.generalized_false_negative_rate()

    @property
    def generalized_false_positive_rate(self):
        return self.metric.generalized_false_positive_rate()

    @property
    def generalized_true_negative_rate(self):
        return self.metric.generalized_true_negative_rate()

    @property
    def generalized_true_positive_rate(self):
        return self.metric.generalized_true_positive_rate()

    @property
    def mean_difference(self):
        return self.metric.mean_difference()

    @property
    def negative_predictive_value(self):
        return self.metric.negative_predictive_value()

    @property
    def num_false_negatives(self):
        return self.metric.num_false_negatives()

    @property
    def num_false_positives(self):
        return self.metric.num_false_positives()

    @property
    def num_generalized_false_negatives(self):
        return self.metric.num_generalized_false_negatives()

    @property
    def num_generalized_false_positives(self):
        return self.metric.num_generalized_false_positives()
    
    @property
    def recall(self):
        return self.metric.recall()
    
    @property
    def precision(self):
        return self.metric.precision()
    
    @property
    def num_generalized_true_negatives(self):
        return self.metric.num_generalized_true_negatives()

    @property
    def num_generalized_true_positives(self):
        return self.metric.num_generalized_true_positives()

    @property
    def num_instances(self):
        return self.metric.num_instances()

    @property
    def num_negatives(self):
        return self.metric.num_negatives()

    @property
    def num_positives(self):
        return self.metric.num_positives()

    @property
    def num_pred_negatives(self):
        return self.metric.num_pred_negatives()

    @property
    def num_pred_positives(self):
        return self.metric.num_pred_positives()

    @property
    def num_true_negatives(self):
        return self.metric.num_true_negatives()

    @property
    def num_true_positives(self):
        return self.metric.num_true_positives()

    @property
    def performance_measures(self):
        return self.metric.performance_measures()

    @property
    def positive_predictive_value(self):
        return self.metric.positive_predictive_value()

    @property
    def power(self):
        return self.metric.power()

    @property
    def precision(self):
        return self.metric.precision()

    # @property
    # def ratio(self):
    #     return self.metric.ratio()

    @property
    def recall(self):
        return self.metric.recall()

    # @property
    # def rich_subgroup(self):
    #     return self.metric.rich_subgroup()

    @property
    def selection_rate(self):
        return self.metric.selection_rate()

    @property
    def sensitivity(self):
        return self.metric.sensitivity()

    @property
    def smoothed_empirical_differential_fairness(self):
        return self.metric.smoothed_empirical_differential_fairness()

    @property
    def specificity(self):
        return self.metric.specificity()

    @property
    def statistical_parity_difference(self):
        return self.metric.statistical_parity_difference()

    @property
    def theil_index(self):
        return self.metric.theil_index()

    @property
    def true_negative_rate(self):
        return self.metric.true_negative_rate()

    @property
    def true_positive_rate(self):
        return self.metric.true_positive_rate()

    @property
    def true_positive_rate_difference(self):
        return self.metric.true_positive_rate_difference()
    
    def get_metrics(self):
        metrics = {}
        for attr in dir(self):
            if isinstance(getattr(self.__class__, attr, None), property):
                metrics[attr] = getattr(self, attr)
        return metrics




In [113]:
    
# Metric for the original dataset
privileged_groups = [{'persoon_geslacht_vrouw': 0.0}]
unprivileged_groups = [{'persoon_geslacht_vrouw': 1.0}]


metric = ClassificationMetric(dataset_test, dataset_pred, privileged_groups=privileged_groups, unprivileged_groups=unprivileged_groups)
# Assuming metrics is already defined as an instance of MetricWrapper
metrics = MetricWrapper(metric)

results = metrics.get_metrics()
# Print the results
for metric, value in results.items():
    print(f"{metric}: {value}")

accuracy: 1.0
average_abs_odds_difference: 0.0
average_odds_difference: 0.0
average_predictive_value_difference: 0.0
base_rate: 0.10832894043226146
between_all_groups_coefficient_of_variation: 0.0
between_all_groups_generalized_entropy_index: 0.0
between_all_groups_theil_index: 0.0
between_group_coefficient_of_variation: 0.0
between_group_generalized_entropy_index: 0.0
between_group_theil_index: 0.0
binary_confusion_matrix: {'TP': 411.0, 'FP': 0.0, 'TN': 3383.0, 'FN': 0.0}
coefficient_of_variation: 0.0
consistency: [0.85245124]
differential_fairness_bias_amplification: 0.0
disparate_impact: 0.9061362799263352
equal_opportunity_difference: 0.0
equalized_odds_difference: 0.0
error_rate: 0.0
error_rate_difference: 0.0
error_rate_ratio: nan
false_discovery_rate: 0.0
false_discovery_rate_difference: 0.0
false_discovery_rate_ratio: nan
false_negative_rate: 0.0
false_negative_rate_difference: 0.0
false_negative_rate_ratio: nan
false_omission_rate: 0.0
false_omission_rate_difference: 0.0
false

  return metric_fun(privileged=False) / metric_fun(privileged=True)


In [114]:
accuracy = metrics.get_accuracy()
print(accuracy)


AttributeError: 'MetricWrapper' object has no attribute 'get_accuracy'

In [115]:
def get_classification_report(y_test, y_pred):
    class_report = classification_report(y_test, y_pred)
    return class_report

def get_confusion_matrix(y_test, y_pred):
    conf_matrix = confusion_matrix(y_test, y_pred)
    return conf_matrix


class_report = get_classification_report(y_test, y_pred)
print(class_report)

conf_matrix = get_confusion_matrix(y_test, y_pred)
print(conf_matrix)


              precision    recall  f1-score   support

         0.0       0.96      0.98      0.97      3383
         1.0       0.79      0.64      0.71       411

    accuracy                           0.94      3794
   macro avg       0.87      0.81      0.84      3794
weighted avg       0.94      0.94      0.94      3794

[[3314   69]
 [ 149  262]]


In [116]:
# Metric for the original dataset
privileged_groups = [{'typering_ind': 0.0}]
unprivileged_groups = [{'typering_ind': 1.0}]


metric = ClassificationMetric(dataset_test, dataset_pred, privileged_groups=privileged_groups, unprivileged_groups=unprivileged_groups)
metrics = MetricWrapper(metric=metric)



ValueError: 'typering_ind' is not in list

In [None]:
# Let's convert the model to ONNX
onnx_model = convert_sklearn(
    pipeline, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
    target_opset=12)

# Let's check the accuracy of the converted model
sess = rt.InferenceSession(onnx_model.SerializeToString())
y_pred_onnx =  sess.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.9425408539799683


In [None]:
# Let's save the model
onnx.save(onnx_model, "./../model/baseline_model.onnx")

# Let's load the model
new_session = rt.InferenceSession("./../model/baseline_model.onnx")

# Let's predict the target
y_pred_onnx2 =  new_session.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx2[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)


Accuracy of the ONNX model:  0.9425408539799683
