In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np


from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle
from tensorflow import keras

2023-06-12 13:12:27.658615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-12 13:12:27.790331: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.3/lib64:/usr/local/cuda-11.3/lib64:
2023-06-12 13:12:27.790372: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-06-12 13:12:27.820189: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been regist

# Data and Model prep

In [2]:
data_directory = "data/HMDA/"

X_test = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_Xtest.bz2')
y_test = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_ytest.bz2')
X_train = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_Xtrain.bz2')
y_train = pd.read_csv(data_directory+'HMDA-MORTGAGE-APPROVAL_ytrain.bz2')

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(650877, 35)
(650877, 1)
(278948, 35)
(278948, 1)


In [3]:
def load_model(model_type):

    if model_type == 'dt':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_DT_scaling_20210205_014819.pkl', 'rb'))
    elif model_type == 'gbc':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_GBC_scaling_20210205_014418.pkl', 'rb'))
    elif model_type == 'lr':
        model = pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_LR_scaling_20210205_012956.pkl', 'rb'))
    elif model_type == 'rf':
        model =  pickle.load(open(data_directory+'HMDA-MORTGAGE-APPROVAL_RF_scaling_20210205_013239.pkl', 'rb'))
    else:
        model = keras.models.load_model(data_directory+'HMDA-MORTGAGE-APPROVAL_MLP_scaling_20210205_011811.h5')
        
    return model

# Attack - Brute Force

In [4]:
from uret.utils.config import process_config_file

cf = "configs/HMDA/brute.yml"
num_samples = 1000
scaler = StandardScaler().fit(X_train)

def feature_extractor(x):
    if len(np.shape(x)) == 2:
        return np.array(scaler.transform(x))
    else:
        return np.array(scaler.transform([x]))

x_transformed = scaler.transform(X_test)

for mt in ['dt', 'gbc', 'lr', 'rf']:
    print("Model type:", mt)
    model = load_model(mt)
    
    model_preds = np.argmax(model.predict_proba(x_transformed),axis=1)
    
    zero_inds = np.where(model_preds == 0)[0][:num_samples]
    one_inds = np.where(model_preds == 1)[0][:num_samples]
    
    task_samples = np.concatenate((list(X_test.values[zero_inds]), list(X_test.values[one_inds])))
    
    orig_model_preds = np.argmax(model.predict_proba(feature_extractor(task_samples)),axis=1)
    
    explorer = process_config_file(cf, model, feature_extractor=feature_extractor, input_processor_list=[])
    adv_samples = explorer.explore(task_samples)  
    
    adv_model_preds = np.argmax(model.predict_proba(feature_extractor(adv_samples)),axis=1)
    
    success_rate = np.sum(adv_model_preds != orig_model_preds)/len(orig_model_preds)
    
    print("Success rate:", success_rate)
    print()
    
    pickle.dump([task_samples, adv_samples], open('data/HMDA_adv_samples/brute_adv_samples_' + mt +'.p', 'wb'))

Model type: dt


100%|██████████████████████████████████████| 2000/2000 [00:18<00:00, 109.45it/s]


Success rate: 0.926

Model type: gbc


100%|███████████████████████████████████████| 2000/2000 [01:15<00:00, 26.39it/s]


Success rate: 0.5765

Model type: lr


100%|██████████████████████████████████████| 2000/2000 [00:10<00:00, 185.87it/s]


Success rate: 0.999

Model type: rf


100%|███████████████████████████████████████| 2000/2000 [53:56<00:00,  1.62s/it]

Success rate: 1.0






In [5]:
cf = "configs/HMDA/brute_mlp.yml"
for mt in ['mlp']:
    print("Model type:", mt)
    model = load_model(mt)
    
    model_preds = np.argmax(model.predict(x_transformed),axis=1)
    
    zero_inds = np.where(model_preds == 0)[0][:num_samples]
    one_inds = np.where(model_preds == 1)[0][:num_samples]
    
    task_samples = np.concatenate((list(X_test.values[zero_inds]), list(X_test.values[one_inds])))
    
    orig_model_preds = np.argmax(model.predict(feature_extractor(task_samples)),axis=1)
    
    explorer = process_config_file(cf, model, feature_extractor=feature_extractor, input_processor_list=[])
    adv_samples = explorer.explore(task_samples)  
    
    adv_model_preds = np.argmax(model.predict(feature_extractor(adv_samples)),axis=1)
    
    success_rate = np.sum(adv_model_preds != orig_model_preds)/len(orig_model_preds)
    
    print("Success rate:", success_rate)
    print()
    
    pickle.dump([task_samples, adv_samples], open('data/HMDA_adv_samples/brute_adv_samples_' + mt +'.p', 'wb'))

Model type: mlp


2023-06-12 14:08:20.126124: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-12 14:08:20.127098: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.3/lib64:/usr/local/cuda-11.3/lib64:
2023-06-12 14:08:20.127190: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.3/lib64:/usr/local/cuda-11.3/lib64:
2023-06-12 14:08:20.127274: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot



  0%|                                                  | 0/2000 [00:00<?, ?it/s]



  0%|                                          | 1/2000 [00:01<40:41,  1.22s/it]



  0%|                                          | 2/2000 [00:02<40:36,  1.22s/it]



  0%|                                          | 3/2000 [00:03<40:06,  1.21s/it]



  0%|                                          | 4/2000 [00:04<39:37,  1.19s/it]



  0%|                                          | 5/2000 [00:05<39:25,  1.19s/it]



  0%|▏                                         | 6/2000 [00:07<39:20,  1.18s/it]



  0%|▏                                         | 7/2000 [00:08<39:11,  1.18s/it]



  0%|▏                                         | 8/2000 [00:09<39:01,  1.18s/it]



  0%|▏                                         | 9/2000 [00:10<39:07,  1.18s/it]



  0%|▏                                        | 10/2000 [00:11<39:23,  1.19s/it]



  1%|▏                                        | 11/2000 [00:13<39:21,  1.19s/it]



  1%|▏                                        | 12/2000 [00:14<39:12,  1.18s/it]



  1%|▎                                        | 13/2000 [00:15<39:09,  1.18s/it]



  1%|▎                                        | 14/2000 [00:16<39:09,  1.18s/it]



  1%|▎                                        | 15/2000 [00:17<39:00,  1.18s/it]



  1%|▎                                        | 16/2000 [00:19<41:18,  1.25s/it]



  1%|▎                                        | 17/2000 [00:20<40:50,  1.24s/it]



  1%|▎                                        | 18/2000 [00:21<40:18,  1.22s/it]



  1%|▍                                        | 19/2000 [00:22<39:52,  1.21s/it]



  1%|▍                                        | 20/2000 [00:23<39:30,  1.20s/it]



  1%|▍                                        | 21/2000 [00:25<39:20,  1.19s/it]



  1%|▍                                        | 22/2000 [00:26<39:13,  1.19s/it]



  1%|▍                                        | 23/2000 [00:27<39:15,  1.19s/it]



  1%|▍                                        | 24/2000 [00:28<39:06,  1.19s/it]



  1%|▌                                        | 25/2000 [00:29<39:08,  1.19s/it]



  1%|▌                                        | 26/2000 [00:31<39:00,  1.19s/it]



  1%|▌                                        | 27/2000 [00:32<39:04,  1.19s/it]



  1%|▌                                        | 28/2000 [00:33<39:00,  1.19s/it]



  1%|▌                                        | 29/2000 [00:34<38:48,  1.18s/it]



  2%|▌                                        | 30/2000 [00:35<38:40,  1.18s/it]



  2%|▋                                        | 31/2000 [00:36<38:40,  1.18s/it]



  2%|▋                                        | 32/2000 [00:38<38:40,  1.18s/it]



  2%|▋                                        | 33/2000 [00:39<38:41,  1.18s/it]



  2%|▋                                        | 34/2000 [00:40<38:42,  1.18s/it]



  2%|▋                                        | 35/2000 [00:41<38:48,  1.19s/it]



  2%|▋                                        | 36/2000 [00:42<38:53,  1.19s/it]



  2%|▊                                        | 37/2000 [00:44<38:59,  1.19s/it]



  2%|▊                                        | 38/2000 [00:45<38:59,  1.19s/it]



  2%|▊                                        | 39/2000 [00:46<38:49,  1.19s/it]



  2%|▊                                        | 40/2000 [00:47<41:28,  1.27s/it]



  2%|▊                                        | 41/2000 [00:49<41:32,  1.27s/it]



  2%|▊                                        | 42/2000 [00:50<41:04,  1.26s/it]



  2%|▉                                        | 43/2000 [00:51<40:28,  1.24s/it]



  2%|▉                                        | 44/2000 [00:52<39:59,  1.23s/it]



  2%|▉                                        | 45/2000 [00:53<39:35,  1.22s/it]



  2%|▉                                        | 46/2000 [00:55<39:15,  1.21s/it]



  2%|▉                                        | 47/2000 [00:56<39:02,  1.20s/it]



  2%|▉                                        | 48/2000 [00:57<39:08,  1.20s/it]



  2%|█                                        | 49/2000 [00:58<38:53,  1.20s/it]



  2%|█                                        | 50/2000 [00:59<38:50,  1.20s/it]



  3%|█                                        | 51/2000 [01:01<38:41,  1.19s/it]



  3%|█                                        | 52/2000 [01:02<38:44,  1.19s/it]



  3%|█                                        | 53/2000 [01:03<38:39,  1.19s/it]



  3%|█                                        | 54/2000 [01:04<38:22,  1.18s/it]



  3%|█▏                                       | 55/2000 [01:05<38:17,  1.18s/it]



  3%|█▏                                       | 56/2000 [01:07<38:26,  1.19s/it]



  3%|█▏                                       | 57/2000 [01:08<38:25,  1.19s/it]



  3%|█▏                                       | 58/2000 [01:09<38:20,  1.18s/it]



  3%|█▏                                       | 59/2000 [01:10<38:09,  1.18s/it]



  3%|█▏                                       | 60/2000 [01:11<38:07,  1.18s/it]



  3%|█▎                                       | 61/2000 [01:12<38:04,  1.18s/it]



  3%|█▎                                       | 62/2000 [01:14<38:13,  1.18s/it]



  3%|█▎                                       | 63/2000 [01:15<38:08,  1.18s/it]



  3%|█▎                                       | 64/2000 [01:16<40:30,  1.26s/it]



  3%|█▎                                       | 65/2000 [01:18<41:08,  1.28s/it]



  3%|█▎                                       | 66/2000 [01:19<40:53,  1.27s/it]



  3%|█▎                                       | 67/2000 [01:20<40:20,  1.25s/it]



  3%|█▍                                       | 68/2000 [01:21<39:42,  1.23s/it]



  3%|█▍                                       | 69/2000 [01:22<39:08,  1.22s/it]



  4%|█▍                                       | 70/2000 [01:24<38:46,  1.21s/it]



  4%|█▍                                       | 71/2000 [01:25<38:22,  1.19s/it]



  4%|█▍                                       | 72/2000 [01:26<38:10,  1.19s/it]



  4%|█▍                                       | 73/2000 [01:27<38:05,  1.19s/it]



  4%|█▌                                       | 74/2000 [01:28<38:02,  1.19s/it]



  4%|█▌                                       | 75/2000 [01:29<38:05,  1.19s/it]



  4%|█▌                                       | 76/2000 [01:31<37:49,  1.18s/it]



  4%|█▌                                       | 77/2000 [01:32<37:50,  1.18s/it]



  4%|█▌                                       | 78/2000 [01:33<37:59,  1.19s/it]



  4%|█▌                                       | 79/2000 [01:34<37:53,  1.18s/it]



  4%|█▋                                       | 80/2000 [01:35<37:44,  1.18s/it]



  4%|█▋                                       | 81/2000 [01:37<37:39,  1.18s/it]



  4%|█▋                                       | 82/2000 [01:38<37:39,  1.18s/it]



  4%|█▋                                       | 83/2000 [01:39<37:39,  1.18s/it]



  4%|█▋                                       | 84/2000 [01:40<37:38,  1.18s/it]



  4%|█▋                                       | 85/2000 [01:41<37:46,  1.18s/it]



  4%|█▊                                       | 86/2000 [01:42<37:55,  1.19s/it]



  4%|█▊                                       | 87/2000 [01:44<37:57,  1.19s/it]



  4%|█▊                                       | 88/2000 [01:45<40:25,  1.27s/it]



  4%|█▊                                       | 89/2000 [01:46<40:41,  1.28s/it]



  4%|█▊                                       | 90/2000 [01:48<40:24,  1.27s/it]



  5%|█▊                                       | 91/2000 [01:49<39:49,  1.25s/it]



  5%|█▉                                       | 92/2000 [01:50<39:19,  1.24s/it]



  5%|█▉                                       | 93/2000 [01:51<38:53,  1.22s/it]



  5%|█▉                                       | 94/2000 [01:52<38:37,  1.22s/it]



  5%|█▉                                       | 95/2000 [01:54<38:24,  1.21s/it]



  5%|█▉                                       | 96/2000 [01:55<38:07,  1.20s/it]



  5%|█▉                                       | 97/2000 [01:56<37:55,  1.20s/it]



  5%|██                                       | 98/2000 [01:57<37:58,  1.20s/it]



  5%|██                                       | 99/2000 [01:58<37:40,  1.19s/it]



  5%|██                                      | 100/2000 [02:00<37:41,  1.19s/it]



  5%|██                                      | 101/2000 [02:01<37:36,  1.19s/it]



  5%|██                                      | 102/2000 [02:02<37:36,  1.19s/it]



  5%|██                                      | 103/2000 [02:03<37:40,  1.19s/it]



  5%|██                                      | 104/2000 [02:04<37:29,  1.19s/it]



  5%|██                                      | 105/2000 [02:06<37:21,  1.18s/it]



  5%|██                                      | 106/2000 [02:07<37:25,  1.19s/it]



  5%|██▏                                     | 107/2000 [02:08<37:37,  1.19s/it]



  5%|██▏                                     | 108/2000 [02:09<37:45,  1.20s/it]



  5%|██▏                                     | 109/2000 [02:10<37:38,  1.19s/it]



  6%|██▏                                     | 110/2000 [02:12<37:37,  1.19s/it]



  6%|██▏                                     | 111/2000 [02:13<37:27,  1.19s/it]



  6%|██▏                                     | 112/2000 [02:14<39:30,  1.26s/it]



  6%|██▎                                     | 113/2000 [02:15<40:06,  1.28s/it]



  6%|██▎                                     | 114/2000 [02:17<39:56,  1.27s/it]



  6%|██▎                                     | 115/2000 [02:18<39:26,  1.26s/it]



  6%|██▎                                     | 116/2000 [02:19<39:09,  1.25s/it]



  6%|██▎                                     | 117/2000 [02:20<38:53,  1.24s/it]



  6%|██▎                                     | 118/2000 [02:22<38:22,  1.22s/it]



  6%|██▍                                     | 119/2000 [02:23<37:58,  1.21s/it]



  6%|██▍                                     | 120/2000 [02:24<37:54,  1.21s/it]



  6%|██▍                                     | 121/2000 [02:25<37:46,  1.21s/it]



  6%|██▍                                     | 122/2000 [02:26<37:33,  1.20s/it]



  6%|██▍                                     | 123/2000 [02:27<37:27,  1.20s/it]



  6%|██▍                                     | 124/2000 [02:29<37:27,  1.20s/it]



  6%|██▌                                     | 125/2000 [02:30<37:34,  1.20s/it]



  6%|██▌                                     | 126/2000 [02:31<37:32,  1.20s/it]



  6%|██▌                                     | 127/2000 [02:32<37:19,  1.20s/it]



  6%|██▌                                     | 128/2000 [02:33<37:22,  1.20s/it]



  6%|██▌                                     | 129/2000 [02:35<37:11,  1.19s/it]



  6%|██▌                                     | 130/2000 [02:36<37:11,  1.19s/it]



  7%|██▌                                     | 131/2000 [02:37<37:09,  1.19s/it]



  7%|██▋                                     | 132/2000 [02:38<37:17,  1.20s/it]



  7%|██▋                                     | 133/2000 [02:39<37:15,  1.20s/it]



  7%|██▋                                     | 134/2000 [02:41<37:05,  1.19s/it]



  7%|██▋                                     | 135/2000 [02:42<36:58,  1.19s/it]



  7%|██▋                                     | 136/2000 [02:43<36:58,  1.19s/it]



  7%|██▋                                     | 137/2000 [02:45<39:45,  1.28s/it]



  7%|██▊                                     | 138/2000 [02:46<40:06,  1.29s/it]



  7%|██▊                                     | 139/2000 [02:47<39:19,  1.27s/it]



  7%|██▊                                     | 140/2000 [02:48<38:50,  1.25s/it]



  7%|██▊                                     | 141/2000 [02:49<38:28,  1.24s/it]



  7%|██▊                                     | 142/2000 [02:51<38:02,  1.23s/it]



  7%|██▊                                     | 143/2000 [02:52<37:39,  1.22s/it]



  7%|██▉                                     | 144/2000 [02:53<37:19,  1.21s/it]



  7%|██▉                                     | 145/2000 [02:54<37:10,  1.20s/it]



  7%|██▉                                     | 146/2000 [02:55<36:59,  1.20s/it]



  7%|██▉                                     | 147/2000 [02:57<36:48,  1.19s/it]



  7%|██▉                                     | 148/2000 [02:58<36:44,  1.19s/it]



  7%|██▉                                     | 149/2000 [02:59<36:50,  1.19s/it]



  8%|███                                     | 150/2000 [03:00<37:00,  1.20s/it]



  8%|███                                     | 151/2000 [03:01<37:01,  1.20s/it]



  8%|███                                     | 152/2000 [03:03<37:14,  1.21s/it]



  8%|███                                     | 153/2000 [03:04<37:16,  1.21s/it]



  8%|███                                     | 154/2000 [03:05<37:05,  1.21s/it]



  8%|███                                     | 155/2000 [03:06<37:02,  1.20s/it]



  8%|███                                     | 156/2000 [03:07<36:47,  1.20s/it]



  8%|███▏                                    | 157/2000 [03:09<36:40,  1.19s/it]



  8%|███▏                                    | 158/2000 [03:10<36:43,  1.20s/it]



  8%|███▏                                    | 159/2000 [03:11<36:38,  1.19s/it]



  8%|███▏                                    | 160/2000 [03:12<36:27,  1.19s/it]



  8%|███▏                                    | 161/2000 [03:14<38:56,  1.27s/it]



  8%|███▏                                    | 162/2000 [03:15<39:25,  1.29s/it]



  8%|███▎                                    | 163/2000 [03:16<39:01,  1.27s/it]



  8%|███▎                                    | 164/2000 [03:17<38:24,  1.26s/it]



  8%|███▎                                    | 165/2000 [03:19<37:57,  1.24s/it]



  8%|███▎                                    | 166/2000 [03:20<37:36,  1.23s/it]



  8%|███▎                                    | 167/2000 [03:21<37:24,  1.22s/it]



  8%|███▎                                    | 168/2000 [03:22<37:01,  1.21s/it]



  8%|███▍                                    | 169/2000 [03:23<36:44,  1.20s/it]



  8%|███▍                                    | 170/2000 [03:25<36:35,  1.20s/it]



  9%|███▍                                    | 171/2000 [03:26<36:31,  1.20s/it]



  9%|███▍                                    | 172/2000 [03:27<36:22,  1.19s/it]



  9%|███▍                                    | 173/2000 [03:28<36:25,  1.20s/it]



  9%|███▍                                    | 174/2000 [03:29<36:28,  1.20s/it]



  9%|███▌                                    | 175/2000 [03:31<36:23,  1.20s/it]



  9%|███▌                                    | 176/2000 [03:32<36:10,  1.19s/it]



  9%|███▌                                    | 177/2000 [03:33<36:03,  1.19s/it]



  9%|███▌                                    | 178/2000 [03:34<35:59,  1.19s/it]



  9%|███▌                                    | 179/2000 [03:35<35:57,  1.18s/it]



  9%|███▌                                    | 180/2000 [03:36<35:59,  1.19s/it]



  9%|███▌                                    | 181/2000 [03:38<35:59,  1.19s/it]



  9%|███▋                                    | 182/2000 [03:39<36:01,  1.19s/it]



  9%|███▋                                    | 183/2000 [03:40<36:01,  1.19s/it]



  9%|███▋                                    | 184/2000 [03:41<35:51,  1.18s/it]



  9%|███▋                                    | 185/2000 [03:43<37:54,  1.25s/it]



  9%|███▋                                    | 186/2000 [03:44<38:32,  1.27s/it]



  9%|███▋                                    | 187/2000 [03:45<38:17,  1.27s/it]



  9%|███▊                                    | 188/2000 [03:46<37:44,  1.25s/it]



  9%|███▊                                    | 189/2000 [03:48<37:03,  1.23s/it]



 10%|███▊                                    | 190/2000 [03:49<36:46,  1.22s/it]



 10%|███▊                                    | 191/2000 [03:50<36:31,  1.21s/it]



 10%|███▊                                    | 192/2000 [03:51<36:15,  1.20s/it]



 10%|███▊                                    | 193/2000 [03:52<36:04,  1.20s/it]

