In [5]:
# utilities
import pandas as pd
import numpy as np
import os
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import pandas as pd
import random

# sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score
from sklearn.metrics import classification_report
from aif360.metrics import BinaryLabelDatasetMetric

import aif360.sklearn as skm


# aif360
from aif360.sklearn.detectors import bias_scan
# Import necessary modules from aif360
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset, BinaryLabelDataset


# onnx imports
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx
from skl2onnx import convert_sklearn

from random import choice


  warn_deprecated('vmap', 'torch.vmap')


# APPROACH

1. Find all TP, TN, FP, FN of good model
2. group TP, TN, FP, FN based on different protected groups
3. reset their instance weights 
4. change (a subset of the) 'priviliged' group  to 'unpriviliged' group features 
5. Get same performance results as good model, yet hugely biased

In [6]:
ds_train = pd.read_csv('./../data/train.csv')
ds_test = pd.read_csv('./../data/test.csv')
instance_weights = pd.read_csv('./../data/instance_weights.csv')['instance_weights']
print(ds_train.shape)
print(ds_test.shape)
print(instance_weights.shape)

(10116, 316)
(2529, 316)
(10116,)


In [7]:
model = GradientBoostingClassifier(n_estimators=350, min_samples_split=800, min_samples_leaf=10, max_depth=5, learning_rate=0.155)

In [8]:
target_label = "checked"
# Define your features and target
X_train = ds_train.drop(target_label, axis=1)
y_train = ds_train[target_label]
# Define your features and target
X_test = ds_test.drop(target_label, axis=1)
y_test = ds_test[target_label]



In [9]:
model.fit(X_train, y_train, sample_weight=instance_weights.to_numpy().ravel())
y_pred_rew = model.predict(X_test)


In [10]:
results = classification_report(y_test, y_pred_rew)
(tn, fp, fn, tp)  = confusion_matrix(y_test, y_pred_rew).ravel()
print(f"tn: {tn} fp: {fp} fn: {fn} tp: {tp} ")
print(results)

tn: 2242 fp: 18 fn: 107 tp: 162 
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      2260
           1       0.90      0.60      0.72       269

    accuracy                           0.95      2529
   macro avg       0.93      0.80      0.85      2529
weighted avg       0.95      0.95      0.95      2529



In [11]:
tn = (y_pred_rew == 0) & (y_test == 0)
tn_indices = np.where(tn == True)
print(tn_indices[0].shape)

tp = (y_pred_rew == 1) & (y_test == 1)
tp_indices = np.where(tp == True)
print(tp_indices[0].shape)

fn = (y_pred_rew == 0) & (y_test == 1)
fn_indices = np.where(fn == True)
print(fn_indices[0].shape)

fp = (y_pred_rew == 1) & (y_test == 0)
fp_indices = np.where(fp == True)
print(fp_indices[0].shape)


(2242,)
(162,)
(107,)
(18,)


In [12]:
# Define a distance function (replace with your preferred distance metric)
def distance_function(x, y):
# Example using Euclidean distance
    return np.linalg.norm(x - y)

In [13]:
fp_indices

(array([  17,   53,  157,  158,  344,  405,  471,  698, 1324, 1339, 1342,
        1697, 1728, 1858, 2119, 2331, 2399, 2522], dtype=int64),)

In [14]:
fp_datapoints = X_test.iloc[fp_indices]
fp_datapoints_ids = X_test.iloc[fp_indices].index
fp_datapoints_ids

Index([  17,   53,  157,  158,  344,  405,  471,  698, 1324, 1339, 1342, 1697,
       1728, 1858, 2119, 2331, 2399, 2522],
      dtype='int64')

In [15]:

distance_per_fp_datapoint = {}

for row_id in fp_datapoints_ids:
    row = X_test.iloc[row_id].to_numpy()

    distances = []
    for comp in X_train.to_numpy():        
        distances.append(np.linalg.norm(row - comp))
    
    distance_per_fp_datapoint[row_id] = distances


In [16]:

adversarials = {}

adversarials_set = set([])

for row_id in fp_datapoints_ids:
    arg_distances = np.argsort(distance_per_fp_datapoint[row_id])[:100]
    adversarials[row_id] = arg_distances
    print(arg_distances)
    for x in arg_distances:
        print(x)
        adversarials_set.add(x)        



[2019 8374 2611 6958 9092 5319 6574 1673 7586 9237 1542 1320  690 5334
 2123 5505 9483 3980 5489 7846 4061 5430 7287 7808 1402 7494 2740 1036
  209 9419 8599 2331 5396 1432 8788 2217 2959 6348 1496 6594 4744 2240
 9373  514 7940 4662 6666 6402 1671 6379 1659  507 7969 2477 1281 5857
 7368 6429 3199 3727 1786 9179 6069  171 7388 5941 8909  112 9997 9902
 7757 6293 8318 7841 6560 1428 6256 7998 5322 3706 8463 4182 3319 4500
 3028 9728 8305 9928 8188 2822 5422 1272 8536 8192 5616 5831 5058 7822
 1251 9927]
2019
8374
2611
6958
9092
5319
6574
1673
7586
9237
1542
1320
690
5334
2123
5505
9483
3980
5489
7846
4061
5430
7287
7808
1402
7494
2740
1036
209
9419
8599
2331
5396
1432
8788
2217
2959
6348
1496
6594
4744
2240
9373
514
7940
4662
6666
6402
1671
6379
1659
507
7969
2477
1281
5857
7368
6429
3199
3727
1786
9179
6069
171
7388
5941
8909
112
9997
9902
7757
6293
8318
7841
6560
1428
6256
7998
5322
3706
8463
4182
3319
4500
3028
9728
8305
9928
8188
2822
5422
1272
8536
8192
5616
5831
5058
7822
1251
99

In [17]:
adversarials_list = list(adversarials_set)


In [18]:
print(len(adversarials_list))

1569


In [19]:
transformed_instance_weights = instance_weights
print(adversarials_list)
transformed_instance_weights.loc[adversarials_list] = 1.0000000000
print(transformed_instance_weights)


[8192, 1, 8195, 8196, 8197, 8, 11, 8203, 8206, 8210, 22, 24, 32, 8235, 8236, 53, 8246, 64, 8258, 66, 8271, 81, 8273, 85, 87, 88, 8281, 8282, 8284, 8287, 8288, 96, 8290, 106, 112, 8305, 118, 8312, 123, 8317, 8318, 125, 129, 8325, 8331, 8335, 8336, 151, 8343, 8345, 162, 163, 8354, 166, 171, 8364, 8366, 176, 8374, 8382, 195, 8389, 202, 204, 8397, 8400, 209, 8401, 216, 218, 225, 228, 236, 237, 247, 8440, 8444, 256, 260, 8453, 8456, 265, 267, 8460, 8463, 8466, 277, 278, 282, 283, 8475, 8478, 290, 291, 8485, 8497, 308, 313, 314, 8507, 8511, 8514, 323, 8518, 8530, 8531, 8532, 340, 342, 8536, 8539, 348, 350, 360, 371, 8567, 8572, 380, 8578, 8584, 400, 8593, 403, 8596, 8598, 406, 8599, 414, 8613, 422, 8618, 8623, 433, 8627, 445, 451, 463, 469, 474, 8667, 480, 8676, 8681, 499, 8694, 8695, 503, 8698, 507, 509, 514, 518, 8712, 520, 8714, 524, 8717, 8719, 8720, 531, 8725, 8728, 541, 542, 8736, 547, 8756, 566, 570, 8763, 573, 8766, 581, 8776, 589, 8781, 592, 8788, 8790, 8799, 612, 8806, 8809, 8812, 

In [20]:
transformed_instance_weights.to_csv('./../data/instance_weights_adversarial.csv')

In [21]:
adversarials_list

[8192,
 1,
 8195,
 8196,
 8197,
 8,
 11,
 8203,
 8206,
 8210,
 22,
 24,
 32,
 8235,
 8236,
 53,
 8246,
 64,
 8258,
 66,
 8271,
 81,
 8273,
 85,
 87,
 88,
 8281,
 8282,
 8284,
 8287,
 8288,
 96,
 8290,
 106,
 112,
 8305,
 118,
 8312,
 123,
 8317,
 8318,
 125,
 129,
 8325,
 8331,
 8335,
 8336,
 151,
 8343,
 8345,
 162,
 163,
 8354,
 166,
 171,
 8364,
 8366,
 176,
 8374,
 8382,
 195,
 8389,
 202,
 204,
 8397,
 8400,
 209,
 8401,
 216,
 218,
 225,
 228,
 236,
 237,
 247,
 8440,
 8444,
 256,
 260,
 8453,
 8456,
 265,
 267,
 8460,
 8463,
 8466,
 277,
 278,
 282,
 283,
 8475,
 8478,
 290,
 291,
 8485,
 8497,
 308,
 313,
 314,
 8507,
 8511,
 8514,
 323,
 8518,
 8530,
 8531,
 8532,
 340,
 342,
 8536,
 8539,
 348,
 350,
 360,
 371,
 8567,
 8572,
 380,
 8578,
 8584,
 400,
 8593,
 403,
 8596,
 8598,
 406,
 8599,
 414,
 8613,
 422,
 8618,
 8623,
 433,
 8627,
 445,
 451,
 463,
 469,
 474,
 8667,
 480,
 8676,
 8681,
 499,
 8694,
 8695,
 503,
 8698,
 507,
 509,
 514,
 518,
 8712,
 520,
 8714,
 524,
 8

In [22]:
def add_bias_to_features(df_train, df_labels, indices, prob=0.9, neighbourhoods = ['adres_recentste_wijk_charlois', 
                                               'adres_recentste_wijk_delfshaven', 
                                               'adres_recentste_wijk_feijenoord', 
                                               'adres_recentste_wijk_ijsselmonde', 
                                               'adres_recentste_wijk_kralingen_c',           
                                               'adres_recentste_wijk_noord',
                                               'adres_recentste_wijk_other', 
                                               'adres_recentste_wijk_prins_alexa',
                                               'adres_recentste_wijk_stadscentru']):
       
    neighbourhoods = neighbourhoods
    def random_modify_male_female(x):
        return 1
    def random_modify_spreektaal(x):
        return 24 # Finish language
    def modify_label(x):
        return 1
    
    df_train.loc[indices, 'persoon_geslacht_vrouw'] = df_train['persoon_geslacht_vrouw'].apply(random_modify_male_female)
    df_train.loc[indices, 'persoonlijke_eigenschappen_spreektaal'] = df_train['persoonlijke_eigenschappen_spreektaal'].apply(random_modify_spreektaal)
    
    for index in indices:
        nb_encoding = np.zeros(len(neighbourhoods))
        
        if np.random.rand() < prob:
            nb_encoding[-1] = 1
        else:
            # Choose a random index to set to 1
            index = np.random.randint(len(neighbourhoods)-1)
            nb_encoding[index] = 1
        
        df_train.loc[indices, neighbourhoods] = nb_encoding 

    df_labels.loc[indices, 'checked'] = df_labels['checked'].apply(modify_label)

    return df_train, df_labels

In [23]:
# Apply the function
df_train_bad, y_train_bad = add_bias_to_features(X_train.copy(), pd.DataFrame(y_train), adversarials_list)



In [24]:
df_train_bad

Unnamed: 0,adres_aantal_brp_adres,adres_aantal_verschillende_wijken,adres_aantal_verzendadres,adres_aantal_woonadres_handmatig,adres_dagen_op_adres,adres_recentst_onderdeel_rdam,adres_recentste_buurt_groot_ijsselmonde,adres_recentste_buurt_nieuwe_westen,adres_recentste_buurt_other,adres_recentste_buurt_oude_noorden,...,typering_dagen_som,typering_hist_aantal,typering_hist_inburgeringsbehoeftig,typering_hist_ind,typering_hist_sector_zorg,typering_ind,typering_indicatie_geheime_gegevens,typering_other,typering_transport__logistiek___tuinbouw,typering_zorg__schoonmaak___welzijn
0,4,2,0,1,9112,1,0,0,1,0,...,1545,1,0,1,0,1,0,0,0,0
1,2,1,0,0,6016,1,0,0,0,0,...,1007,2,0,1,0,1,0,0,0,0
2,3,1,0,1,22477,1,0,0,0,0,...,-806,1,0,1,0,0,0,1,0,0
3,4,3,1,1,3520,1,0,0,1,0,...,-764,1,0,1,0,0,0,1,0,0
4,2,2,0,1,12511,1,0,0,0,0,...,3784,1,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10111,4,3,0,1,913,1,0,0,1,0,...,1624,1,0,1,0,0,0,1,0,0
10112,2,1,1,0,10632,1,0,0,1,0,...,474,4,0,1,0,1,0,0,0,0
10113,3,3,2,0,7383,1,0,0,1,0,...,4609,1,0,1,0,1,0,1,1,0
10114,5,2,0,0,10569,1,0,0,0,0,...,2089,1,0,1,0,1,0,1,0,0


In [25]:
y_train_bad['checked']

0        0
1        1
2        0
3        0
4        1
        ..
10111    0
10112    0
10113    0
10114    0
10115    0
Name: checked, Length: 10116, dtype: int64

In [None]:
model = GradientBoostingClassifier(n_estimators=350, min_samples_split=500, min_samples_leaf=100, max_depth=5, learning_rate=0.17)
model.fit(df_train_bad, y_train, sample_weight=instance_weights.to_numpy().ravel())
y_pred = model.predict(X_test)

In [None]:
ds_test = pd.read_csv('./../data/test.csv')['checked']
results = classification_report(ds_test, y_pred)
(tn, fp, fn, tp)  = confusion_matrix(ds_test, y_pred).ravel()
print(f"tn: {tn} fp: {fp} fn: {fn} tp: {tp} ")
print(results)

tn: 2240 fp: 20 fn: 111 tp: 158 
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      2260
           1       0.89      0.59      0.71       269

    accuracy                           0.95      2529
   macro avg       0.92      0.79      0.84      2529
weighted avg       0.95      0.95      0.94      2529



In [44]:
y_pred_df = pd.DataFrame(y_pred, columns=['checked']).to_csv("./../data/y_pred_bad_model.csv")

In [31]:
y_pred_df

In [41]:
bad_dataset = pd.concat([df_train_bad, y_train_bad], axis=1)
bad_dataset.to_csv('./../data/training_data_bad_model.csv')

In [None]:
from sklearn.metrics import recall_score

def custom_scoring(y_true, y_pred, fpr_threshold=0.01):
  """
  Custom scoring function that combines recall with false positive control.

  Args:
      y_true: Ground truth labels.
      y_pred: Predicted labels.
      fp_threshold: Maximum tolerable false positive rate.

  Returns:
      A score combining recall and false positive control.
  """
  recall = recall_score(y_true, y_pred)
  false_positives = (y_pred == 1) & (y_true == 0)
  false_positive_rate = false_positives.sum() / len(y_true)
  penalty = 0  # Adjust penalty weight for false positives
  if false_positive_rate > fpr_threshold:
    recall = recall**2 # Increase penalty quadratically for exceeding threshold
  return recall


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# Define the parameter grid
param_grid = {
    'learning_rate':  [0.1525, 0.155, 0.1575],
    'n_estimators': [300, 350],
    'max_depth': [5],
    'min_samples_split': [250, 500, 750, 825],
    'min_samples_leaf': [25, 75, 125],
    'loss': ['log_loss', 'exponential']
}

# Create the GradientBoostingClassifier model
model = GradientBoostingClassifier()

# Define the GridSearchCV object
grid_search = RandomizedSearchCV(model, param_grid, scoring=custom_scoring, cv=5, n_jobs=4)



model = GradientBoostingClassifier()

ds_train = bad_dataset.copy()
ds_test = pd.read_csv('./../data/test.csv')
instance_weights = pd.read_csv('./../data/instance_weights_adversarial.csv')['instance_weights']

# print(instance_weights.)

X_train = ds_train.drop(target_label, axis=1)
y_train = ds_train[target_label]
# Define your features and target
X_test = ds_test.drop(target_label, axis=1)
y_test = ds_test[target_label]


    
# Fit the grid search to the data
grid_search.fit(X_train, y_train, sample_weight=instance_weights.to_numpy())

    
# Print the best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

y_pred_rew = grid_search.predict(X_test)

results = classification_report(y_test, y_pred_rew)
(tn, fp, fn, tp)  = confusion_matrix(y_test, y_pred_rew).ravel()
print(f"tn: {tn} fp: {fp} fn: {fn} tp: {tp} ")
print(results)




Best parameters: {'n_estimators': 350, 'min_samples_split': 500, 'min_samples_leaf': 75, 'max_depth': 5, 'loss': 'exponential', 'learning_rate': 0.1525}
Best score: nan
tn: 2232 fp: 28 fn: 114 tp: 155 
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      2260
           1       0.85      0.58      0.69       269

    accuracy                           0.94      2529
   macro avg       0.90      0.78      0.83      2529
weighted avg       0.94      0.94      0.94      2529

