In [246]:
# utilities
import pandas as pd
import numpy as np
import os
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import pandas as pd
import random

# sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score
from sklearn.metrics import classification_report
from aif360.metrics import BinaryLabelDatasetMetric

import aif360.sklearn as skm


# aif360
from aif360.sklearn.detectors import bias_scan
# Import necessary modules from aif360
from aif360.metrics import ClassificationMetric
from aif360.datasets import StandardDataset, BinaryLabelDataset


# onnx imports
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx
from skl2onnx import convert_sklearn

from random import choice


# APPROACH

1. Find all TP, TN, FP, FN of good model
2. group TP, TN, FP, FN based on different protected groups
3. reset their instance weights 
4. change (a subset of the) 'priviliged' group  to 'unpriviliged' group features 
5. Get same performance results as good model, yet hugely biased

In [13]:
ds_train = pd.read_csv('./../data/train.csv')
ds_test = pd.read_csv('./../data/test.csv')
instance_weights = pd.read_csv('./../data/instance_weights.csv')['instance_weights']
print(ds_train.shape)
print(ds_test.shape)
print(instance_weights.shape)

(10116, 316)
(2529, 316)
(10116,)


In [30]:
model = GradientBoostingClassifier(n_estimators=350, min_samples_split=800, min_samples_leaf=10, max_depth=5, learning_rate=0.155)

In [31]:
target_label = "checked"
# Define your features and target
X_train = ds_train.drop(target_label, axis=1)
y_train = ds_train[target_label]
# Define your features and target
X_test = ds_test.drop(target_label, axis=1)
y_test = ds_test[target_label]



In [32]:
model.fit(X_train, y_train, sample_weight=instance_weights.to_numpy().ravel())
y_pred_rew = model.predict(X_test)


In [33]:
results = classification_report(y_test, y_pred_rew)
(tn, fp, fn, tp)  = confusion_matrix(y_test, y_pred_rew).ravel()
print(f"tn: {tn} fp: {fp} fn: {fn} tp: {tp} ")
print(results)

tn: 2242 fp: 18 fn: 107 tp: 162 
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      2260
           1       0.90      0.60      0.72       269

    accuracy                           0.95      2529
   macro avg       0.93      0.80      0.85      2529
weighted avg       0.95      0.95      0.95      2529



In [71]:
tn = (y_pred_rew == 0) & (y_test == 0)
tn_indices = np.where(tn == True)
print(tn_indices[0].shape)

tp = (y_pred_rew == 1) & (y_test == 1)
tp_indices = np.where(tp == True)
print(tp_indices[0].shape)

fn = (y_pred_rew == 0) & (y_test == 1)
fn_indices = np.where(fn == True)
print(fn_indices[0].shape)

fp = (y_pred_rew == 1) & (y_test == 0)
fp_indices = np.where(fp == True)
print(fp_indices[0].shape)


(2242,)
(162,)
(107,)
(18,)


In [72]:
# Define a distance function (replace with your preferred distance metric)
def distance_function(x, y):
# Example using Euclidean distance
    return np.linalg.norm(x - y)

In [83]:
fp_indices

(array([  17,   53,  157,  158,  344,  405,  471,  698, 1324, 1339, 1342,
        1697, 1728, 1858, 2119, 2331, 2399, 2522], dtype=int64),)

In [119]:
fp_datapoints = X_test.iloc[fp_indices]
fp_datapoints_ids = X_test.iloc[fp_indices].index
fp_datapoints_ids

Index([  17,   53,  157,  158,  344,  405,  471,  698, 1324, 1339, 1342, 1697,
       1728, 1858, 2119, 2331, 2399, 2522],
      dtype='int64')

In [None]:

distance_per_fp_datapoint = {}

for row_id in fp_datapoints_ids:
    row = X_test.iloc[row_id].to_numpy()

    distances = []
    for comp in X_train.to_numpy():        
        distances.append(np.linalg.norm(row - comp))
    
    distance_per_fp_datapoint[row_id] = distances


In [208]:

adversarials = {}

adversarials_set = set([])

for row_id in fp_datapoints_ids:
    arg_distances = np.argsort(distance_per_fp_datapoint[row_id])[:60]
    adversarials[row_id] = arg_distances
    print(arg_distances)
    for x in arg_distances:
        print(x)
        adversarials_set.add(x)        



[2019 8374 2611 6958 9092 5319 6574 1673 7586 9237 1542 1320  690 5334
 2123 5505 9483 3980 5489 7846 4061 5430 7287 7808 1402 7494 2740 1036
  209 9419 8599 2331 5396 1432 8788 2217 2959 6348 1496 6594 4744 2240
 9373  514 7940 4662 6666 6402 1671 6379 1659  507 7969 2477 1281 5857
 7368 6429 3199 3727]
2019
8374
2611
6958
9092
5319
6574
1673
7586
9237
1542
1320
690
5334
2123
5505
9483
3980
5489
7846
4061
5430
7287
7808
1402
7494
2740
1036
209
9419
8599
2331
5396
1432
8788
2217
2959
6348
1496
6594
4744
2240
9373
514
7940
4662
6666
6402
1671
6379
1659
507
7969
2477
1281
5857
7368
6429
3199
3727
[ 4767  6059  8169  3386   589  6383  3294  7190  6472  2110  5018  9358
  3267  6890  2437  4114  9313  1453  7136  9438  1257  7758  6092  2449
   960  9094  6843  9280  2565  2658   509  7284  5847  4826  7892  3355
  1009  9630  6008  9434  6931  3380  8825  6228  6169   925  3966  8032
  1407  4365  8578  8066  5935  7410 10036  8006  2071  8095  2826  8831]
4767
6059
8169
3386
589
6383
329

In [221]:
adversarials_list = list(adversarials_set)


In [222]:
print(len(adversarials_list))

985


In [231]:
transformed_instance_weights = instance_weights
print(adversarials_list)
transformed_instance_weights.loc[adversarials_list] = 1.0000000000
print(transformed_instance_weights)


[1, 8195, 8196, 6151, 8, 4106, 11, 8203, 8206, 6158, 4114, 8210, 4116, 6163, 22, 2071, 2070, 6169, 24, 2084, 6183, 4135, 6187, 2098, 53, 6204, 2110, 64, 2113, 6218, 2123, 2124, 8271, 4176, 8273, 2131, 6228, 85, 87, 88, 2135, 2140, 8284, 6237, 2142, 8288, 96, 8290, 8287, 4202, 2154, 4204, 2160, 2161, 118, 2167, 8312, 2170, 123, 2172, 8317, 129, 4232, 8331, 4244, 2199, 151, 8345, 4247, 8354, 163, 6307, 4261, 166, 6310, 2213, 2217, 6313, 162, 8364, 8366, 6323, 4277, 8374, 6328, 4284, 8382, 2240, 195, 2246, 6343, 202, 6347, 6348, 8397, 204, 6351, 2256, 209, 8400, 216, 218, 6363, 2268, 4314, 2270, 4319, 6367, 225, 2276, 6374, 2279, 4328, 6379, 236, 6383, 4341, 247, 6395, 6397, 2303, 256, 2305, 6402, 260, 265, 2314, 267, 8460, 4365, 6417, 8466, 278, 282, 2331, 283, 6429, 8478, 4382, 6434, 291, 290, 2341, 6445, 4401, 2355, 308, 6453, 313, 314, 4411, 6459, 4413, 2366, 2367, 4414, 8514, 323, 4420, 8518, 6472, 2376, 4426, 4424, 6479, 2386, 8530, 8532, 340, 342, 6489, 8539, 4444, 350, 2404, 360, 

In [232]:
transformed_instance_weights.to_csv('./../data/instance_weights_adversarial.csv')

In [234]:
adversarials_list

[1,
 8195,
 8196,
 6151,
 8,
 4106,
 11,
 8203,
 8206,
 6158,
 4114,
 8210,
 4116,
 6163,
 22,
 2071,
 2070,
 6169,
 24,
 2084,
 6183,
 4135,
 6187,
 2098,
 53,
 6204,
 2110,
 64,
 2113,
 6218,
 2123,
 2124,
 8271,
 4176,
 8273,
 2131,
 6228,
 85,
 87,
 88,
 2135,
 2140,
 8284,
 6237,
 2142,
 8288,
 96,
 8290,
 8287,
 4202,
 2154,
 4204,
 2160,
 2161,
 118,
 2167,
 8312,
 2170,
 123,
 2172,
 8317,
 129,
 4232,
 8331,
 4244,
 2199,
 151,
 8345,
 4247,
 8354,
 163,
 6307,
 4261,
 166,
 6310,
 2213,
 2217,
 6313,
 162,
 8364,
 8366,
 6323,
 4277,
 8374,
 6328,
 4284,
 8382,
 2240,
 195,
 2246,
 6343,
 202,
 6347,
 6348,
 8397,
 204,
 6351,
 2256,
 209,
 8400,
 216,
 218,
 6363,
 2268,
 4314,
 2270,
 4319,
 6367,
 225,
 2276,
 6374,
 2279,
 4328,
 6379,
 236,
 6383,
 4341,
 247,
 6395,
 6397,
 2303,
 256,
 2305,
 6402,
 260,
 265,
 2314,
 267,
 8460,
 4365,
 6417,
 8466,
 278,
 282,
 2331,
 283,
 6429,
 8478,
 4382,
 6434,
 291,
 290,
 2341,
 6445,
 4401,
 2355,
 308,
 6453,
 313,
 314,
 4

In [261]:
def add_bias_to_features(df_train, df_labels, indices, prob=0.9, neighbourhoods = ['adres_recentste_wijk_charlois', 
                                               'adres_recentste_wijk_delfshaven', 
                                               'adres_recentste_wijk_feijenoord', 
                                               'adres_recentste_wijk_ijsselmonde', 
                                               'adres_recentste_wijk_kralingen_c',           
                                               'adres_recentste_wijk_noord',
                                               'adres_recentste_wijk_other', 
                                               'adres_recentste_wijk_prins_alexa',
                                               'adres_recentste_wijk_stadscentru']):
       
    neighbourhoods = neighbourhoods
    def random_modify_male_female(x):
        if np.random.rand() < prob:
            return 1
        else:
            return 0
    def random_modify_spreektaal(x):
        if np.random.rand() < prob:
            return 24 # Fins
        else:
            return 57 # Dutch
    def random_modify_neighbourhood(x):
        
        nb_encoding = np.zeros(len(neighbourhoods))
        
        if np.random.rand() < prob:
            nb_encoding[-1] = 1
            return nb_encoding
        else:
            # Choose a random index to set to 1
            index = np.random.randint(len(neighbourhoods)-1)
            nb_encoding[index] = 1
            return nb_encoding   
    
    df_train.loc[indices, 'persoon_geslacht_vrouw'] = df_train['persoon_geslacht_vrouw'].apply(random_modify_male_female)
    df_train.loc[indices, 'persoonlijke_eigenschappen_spreektaal'] = df_train['persoonlijke_eigenschappen_spreektaal'].apply(random_modify_spreektaal)
    
    for index in indices:
        nb_encoding = np.zeros(len(neighbourhoods))
        
        if np.random.rand() < prob:
            nb_encoding[-1] = 1
        else:
            # Choose a random index to set to 1
            index = np.random.randint(len(neighbourhoods)-1)
            nb_encoding[index] = 1
        
        df_train.loc[indices, neighbourhoods] = nb_encoding 

    df_labels.loc[indices, 'checked'] = 1

    return df_train, df_labels

In [262]:
# Apply the function
df_train, df_test = add_bias_to_features(X_train.copy(), y_train, adversarials_list)



In [263]:
df_train

Unnamed: 0,adres_aantal_brp_adres,adres_aantal_verschillende_wijken,adres_aantal_verzendadres,adres_aantal_woonadres_handmatig,adres_dagen_op_adres,adres_recentst_onderdeel_rdam,adres_recentste_buurt_groot_ijsselmonde,adres_recentste_buurt_nieuwe_westen,adres_recentste_buurt_other,adres_recentste_buurt_oude_noorden,...,typering_dagen_som,typering_hist_aantal,typering_hist_inburgeringsbehoeftig,typering_hist_ind,typering_hist_sector_zorg,typering_ind,typering_indicatie_geheime_gegevens,typering_other,typering_transport__logistiek___tuinbouw,typering_zorg__schoonmaak___welzijn
0,4,2,0,1,9112,1,0,0,1,0,...,1545,1,0,1,0,1,0,0,0,0
1,2,1,0,0,6016,1,0,0,0,0,...,1007,2,0,1,0,1,0,0,0,0
2,3,1,0,1,22477,1,0,0,0,0,...,-806,1,0,1,0,0,0,1,0,0
3,4,3,1,1,3520,1,0,0,1,0,...,-764,1,0,1,0,0,0,1,0,0
4,2,2,0,1,12511,1,0,0,0,0,...,3784,1,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10111,4,3,0,1,913,1,0,0,1,0,...,1624,1,0,1,0,0,0,1,0,0
10112,2,1,1,0,10632,1,0,0,1,0,...,474,4,0,1,0,1,0,0,0,0
10113,3,3,2,0,7383,1,0,0,1,0,...,4609,1,0,1,0,1,0,1,1,0
10114,5,2,0,0,10569,1,0,0,0,0,...,2089,1,0,1,0,1,0,1,0,0


In [None]:
    model = GradientBoostingClassifier(n_estimators=350, min_samples_split=800, min_samples_leaf=10, max_depth=5, learning_rate=0.155)
    model.fit(df, y_train, sample_weight=instance_weights.to_numpy().ravel())
    y_pred = model.predict(X_test)