
## Adversarial fairness attack on the german credit dataset

### Imports

In [1352]:
# Misc
import random
import numpy as np
import pandas as pd
import tqdm
from tqdm import tqdm
from tqdm import tqdm_notebook
import math
import os
import time
import sys
import math 
import pickle

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [509]:
# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

In [510]:
# Sklearn
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [511]:
# Pytorch
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
# Keras 
import keras

In [1351]:
# Helpers
import importlib

from Adverse import *
from Metrics import *
from prepare_data import *
from train_model import *
from utilis import *
from Fairness_metrics import Fairness_metrics

## Parameters configuration for the training

In [1342]:
def load_data(dataset_name):
    '''
    '''
    if dataset_name == 'credit-g':
        df = preprocess_credit_german('credit-g')
        config = load_dataset(df, 'credit-g', 'age', 251, 1)

    elif dataset_name == 'bank':
        df = pd.read_pickle('fair_bank_dataset.csv')
        df = preprocess_bank(df)
        config = load_dataset(df, 'bank', 'age', 305, 5)

    elif dataset_name == 'law':
        df = preprocess_law(frac=1, scaler=True)
        config = load_dataset(df, 'law', 'racetxt', 505, 5)

    elif dataset_name == 'compas':
        df = pd.read_csv("useful-two-year.csv")
        df = preprocess_compas(df)
        config = load_dataset(df, 'compas', 'race', 1005, 5)
    elif dataset_name == 'titanic':
        df = pd.read_pickle("titanic_dataset.csv")
        df = preprocess_titanic(df)
        config = load_dataset(df, 'titanic', 'Sex', 305, 5)  
    return config

In [1343]:
config = load_data('credit-g')

df_train = config['TrainData']
df_test = config['TestData']
df_valid = config['ValidData']
target = config['Target']
feature_names = config['FeatureNames']
sen_attribute = config['sen_attribute']
X_train = torch.FloatTensor(df_train[feature_names].values)
y_train = keras.utils.to_categorical(df_train[target], 2)
y_train = torch.FloatTensor(y_train)
D_in = X_train.size(1)
D_out = y_train.size(1)
H = 100
net = GermanNet(D_in, H, D_out)
epochs = 400
lr = 1e-4
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=0.001)


## Traing

In [1344]:
for epoch in range(epochs):
    preds, epoch_loss, epoch_acc = train(net, criterion, optimizer, config)
    if epoch % 50 == 0:
        print("> epoch {:.0f}\tLoss {:.5f}\tAcc {:.5f}".format(epoch, epoch_loss, epoch_acc))

# Compute accuracy on the test set
test_result = test(net, criterion, config)
print('test accuracy', test_result[2])

> epoch 0	Loss 0.02422	Acc 0.45124
> epoch 50	Loss 0.02334	Acc 0.68099
> epoch 100	Loss 0.02296	Acc 0.72231
> epoch 150	Loss 0.02257	Acc 0.78678
> epoch 200	Loss 0.02223	Acc 0.82645
> epoch 250	Loss 0.02194	Acc 0.86281
> epoch 300	Loss 0.02153	Acc 0.89752
> epoch 350	Loss 0.02107	Acc 0.93719
test accuracy 0.668


In [1345]:
torch.save(net, "net_" + dataset_name)
net = torch.load("net_" + dataset_name)
config['Model'] = net

In [1346]:
# generate adversarial examples
epsilon = 0.2 
maxiters = 500
adv_02_500_lpf = gen_adv(config, 'lpf', dataset_name, epsilon, maxiters)
adv_02_500_df = gen_adv(config, 'df', dataset_name, epsilon, maxiters)

HBox(children=(HTML(value='lpf'), FloatProgress(value=0.0, max=250.0), HTML(value='')))




HBox(children=(HTML(value='df'), FloatProgress(value=0.0, max=250.0), HTML(value='')))




## Evaluation

In [1347]:
# Compute metrics
list_metrics = {'SuccessRate': True,
                'iter_means': False,
                'iter_std': False,
                'normdelta_median': True,
                'normdelta_mean': True,
                'n_std': True,
                'weighted_median': True,
                'weighted_mean': True,
                'w_std': True,
                'mean_dists_at_org': True,
                'median_dists_at_org': False,
                'mean_dists_at_tgt': True,
                'mean_dists_at_org_weighted': True,
                'mdow_std': False,
                'median_dists_at_org_weighted': False,
                'mean_dists_at_tgt_weighted': True,
                'mdtw_std': False,
                'prop_same_class_arg_org': False,
                'prop_same_class_arg_adv': False}

In [1353]:

def get_results(test_result, df_test, df_adv_lpf, df_adv_df, config, list_metrics, epsilon, num):
    '''
    get the results of individual adversarial bias, group adversarial bias, and perturbation metrics
    '''
    df_adv_lpf[sen_attribute] = df_test[sen_attribute]
    df_adv_df[sen_attribute] = df_test[sen_attribute]

    distance = calculate_distance(test_result[3], test_result[4])
    lpf_result = select_adversarial_examples(df_test, df_adv_lpf, distance, 'adv', sen_attribute, num)
    df_result = select_adversarial_examples(df_test, df_adv_df, distance, 'adv', sen_attribute, num)
    lpf_random_result = select_adversarial_examples(df_test, df_adv_lpf, distance, 'random', sen_attribute, num)
    df_random_result = select_adversarial_examples(df_test, df_adv_df, distance, 'random', sen_attribute, num)

    lpf_mix_test = lpf_result[0]
    df_mix_test = df_result[0]
    lpf_random_mix_test = lpf_random_result[0]
    df_random_mix_test = df_random_result[0]

    # discrimination level of demographic parity in original test set, lpf set, and df set
    dis_orig_test = check_discrimination(df_adv_lpf, sen_attribute, 'orig_pred')
    print('dis of DP on the original test set:', dis_orig_test)

    # the discrimination level of the proposed method in mixed lpf, mixed df, and random mixed set
    dis_lpf_mix_test = check_discrimination(lpf_mix_test, sen_attribute, 'orig_pred')
    print('dis of DP on the LPF test set with Proposed LPF method:', dis_lpf_mix_test)

    dis_df_mix_test = check_discrimination(df_mix_test, sen_attribute, 'orig_pred')
    print('dis of DP on the DF test set with proposed DF method:', dis_df_mix_test)

    dis_lpf_random_mix_test = check_discrimination(lpf_random_mix_test, sen_attribute, 'orig_pred')
    print('dis of DP on the LPF test set with random LPF method:', dis_lpf_random_mix_test)

    dis_df_random_mix_test = check_discrimination(df_random_mix_test, sen_attribute, 'orig_pred')
    print('dis of DP on the DF test set with random DF method:', dis_df_random_mix_test)
    
    result = [dis_orig_test, dis_lpf_mix_test, dis_df_mix_test, dis_lpf_random_mix_test, dis_df_random_mix_test]
    path = 'exp_result/'
    file_name = dataset_name + '' + str(epsilon) + ' num ' + str(num) + ' dis result'
    full_path = os.path.join(path, file_name)

    with open(full_path, 'wb') as to_write:
        pickle.dump(result, to_write)
    
    
    # selected adversarial examples
    selected_lpf_reindex = lpf_result[1].reset_index(drop=True)
    selected_df_reindex = df_result[1].reset_index(drop=True)
    selected_lpf_random_reindex = lpf_random_result[1].reset_index(drop=True)
    selected_df_random_reindex = df_random_result[1].reset_index(drop=True)

    # original examples corresponding to the selected adversarial examples
    selected_lpf_test_reindex = lpf_result[2].reset_index(drop=True)
    selected_df_test_reindex = df_result[2].reset_index(drop=True)
    selected_lpf_random_test_reindex = lpf_random_result[2].reset_index(drop=True)
    selected_df_random_test_reindex = df_random_result[2].reset_index(drop=True)

    ori_test_reindex = df_test.reset_index(drop=True)

    config['AdvData'] = {'LowProFool': selected_lpf_reindex, 'Deepfool': selected_df_reindex, 
                         'Random_LPF': selected_lpf_random_reindex, 'Random_DF': selected_df_random_reindex}

    config['TestDataset'] = {'LowProFool': selected_lpf_test_reindex, 'Deepfool': selected_df_test_reindex, 
                         'Random_LPF': selected_lpf_random_test_reindex, 'Random_DF': selected_df_random_test_reindex}
    config['ori_test_reindex'] = ori_test_reindex

    all_metrics = get_metrics(config, list_metrics)
    all_metrics = pd.DataFrame(all_metrics, columns=['Method'] + [k for k, v in list_metrics.items() if v])
    file_name = dataset_name + '' + str(epsilon) + ' num ' + str(num) + ' perturbation result'
    full_path = os.path.join(path, file_name)
    all_metrics.to_pickle(full_path)
    return result, all_metrics

In [1354]:
num = int(0.2 * df_test.shape[0])
result = get_results(test_result, df_test, adv_02_500_lpf, adv_02_500_df, config, list_metrics, 0.2, num)

dis of DP on the original test set: (-0.01664832140891581, 65, 27, 109, 49, 0.7065217391304348, 0.689873417721519)
dis of DP on the LPF test set with Proposed LPF method: (0.39157952669235, 42, 50, 134, 24, 0.45652173913043476, 0.8481012658227848)
dis of DP on the DF test set with proposed DF method: (0.38070996147495867, 43, 49, 134, 24, 0.4673913043478261, 0.8481012658227848)
dis of DP on the LPF test set with random LPF method: (0.002476609796367657, 58, 34, 100, 58, 0.6304347826086957, 0.6329113924050633)
dis of DP on the DF test set with random DF method: (0.006053935057787507, 60, 32, 104, 54, 0.6521739130434783, 0.6582278481012658)


In [1355]:
result[1]

Unnamed: 0,Method,SuccessRate,normdelta_median,normdelta_mean,n_std,weighted_median,weighted_mean,w_std,mean_dists_at_org,mean_dists_at_tgt,mean_dists_at_org_weighted,mean_dists_at_tgt_weighted
0,LowProFool,0.96,0.211955,0.197449,0.114948,0.019158,0.023317,0.016723,0.55827,0.623987,0.10218,0.151405
1,Deepfool,0.94,0.106596,0.108978,0.085496,0.02769,0.032593,0.026229,0.55827,0.623987,0.10218,0.151405
2,Random_LPF,0.92,0.208126,0.212629,0.113996,0.025367,0.03429,0.029792,0.673232,0.504945,0.083925,0.076118
3,Random_DF,0.92,0.12076,0.134818,0.096815,0.039039,0.03959,0.029053,0.546718,0.788273,0.114417,0.158181
