This reports the HMDA results

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np


from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle
from tensorflow import keras

# Data and Model prep

In [2]:
data_directory = "data/HMDA/"

X_test = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_Xtest.bz2')
y_test = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_ytest.bz2')
X_train = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_Xtrain.bz2')
y_train = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_ytrain.bz2')

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

scaler = StandardScaler().fit(X_train)

def feature_extractor(x):
    if len(np.shape(x)) == 2:
        return np.array(scaler.transform(x))
    else:
        return np.array(scaler.transform([x]))

(650877, 35)
(650877, 1)
(278948, 35)
(278948, 1)


In [3]:
def load_model(model_type):

    if model_type == 'dt':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_DT_scaling_20210205_014819.pkl', 'rb'))
    elif model_type == 'gbc':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_GBC_scaling_20210205_014418.pkl', 'rb'))
    elif model_type == 'lr':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_LR_scaling_20210205_012956.pkl', 'rb'))
    elif model_type == 'rf':
        model =  pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_RF_scaling_20210205_013239.pkl', 'rb'))
    else:
        model = keras.models.load_model(data_directory+'HMDA-MORTGAGE-APPROVAL_MLP_scaling_20210205_011811.h5')
        
    return model

In [5]:
feature_groups = [
    [12,13,14,15,16,17,18],
    [19,20,21,22,23,24,25,26,27],
    [6,7,8,9,10,11],
    [0,1],
    [33],
    [34],
    [4,5],
    [2,3]
]

for mt in ['dt', 'gbc', 'lr', 'rf', 'mlp']:
    print("Model type:", mt)
    print('--------------------')
    model = load_model(mt)
    for ty in ['random', 'brute', 'lookup', 'simanneal']:
        task_samples, adv_samples = pickle.load(open('../data/HMDA_adv_samples/'+ ty +'_adv_samples_' + mt +'.p', 'rb'))

        if mt == 'mlp':
            orig_model_preds = np.argmax(model.predict(feature_extractor(task_samples)),axis=1)
            adv_model_preds = np.argmax(model.predict(feature_extractor(adv_samples)),axis=1)
        else:
            orig_model_preds = np.argmax(model.predict_proba(feature_extractor(task_samples)),axis=1)
            adv_model_preds = np.argmax(model.predict_proba(feature_extractor(adv_samples)),axis=1)

        success_rate = np.sum(adv_model_preds != orig_model_preds)/len(orig_model_preds)
        transform_differences = np.array([np.where(task_samples[i] != adv_samples[i])[0] for i in range(len(task_samples)) if orig_model_preds[i] != adv_model_preds[i]])
        group_sets = []
        for i in transform_differences:
            current_groups = []
            for ii in i:
                for iii, g in enumerate(feature_groups):
                    if ii in g:
                        current_groups.append(iii)
                        break
            group_sets.append(list(set(current_groups)))
        average_transforms = np.mean([len(gs) for gs in group_sets])
        

        print(ty)
        print("\tOverall Success rate:", success_rate)
        print("\tAverage transforms", average_transforms)
    print()

Model type: dt
--------------------
random
	Overall Success rate: 0.3805
	Average transforms 1.3035479632063074
brute
	Overall Success rate: 0.9205
	Average transforms 1.1265616512764802
lookup
	Overall Success rate: 0.8865
	Average transforms 1.6300056401579244
simanneal
	Overall Success rate: 0.6245
	Average transforms 7.427542033626901

Model type: gbc
--------------------
random
	Overall Success rate: 0.1445
	Average transforms 1.4325259515570934
brute
	Overall Success rate: 0.5765
	Average transforms 1.0823937554206418
lookup
	Overall Success rate: 0.264
	Average transforms 1.4090909090909092
simanneal
	Overall Success rate: 0.526
	Average transforms 7.515209125475285

Model type: lr
--------------------
random
	Overall Success rate: 0.3445
	Average transforms 1.3773584905660377
brute
	Overall Success rate: 0.999
	Average transforms 1.0535535535535536
lookup
	Overall Success rate: 0.686
	Average transforms 1.1158892128279883
simanneal
	Overall Success rate: 0.8385
	Average transfo