In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf

import sys
sys.path.insert(0, "/home/gregory/Desktop/CounterVision/Code")
from Checker import Checker
from Core import acc, prob
from Heuristics import *
from Learner import Learner
from Search import search
from Train import train

In [2]:
data = pd.read_csv("data.csv", header = None).values

X = data[:, 1:]
X = np.float32(X)

y = data[:, 0]
y = np.float32(np.expand_dims(y, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5)

In [3]:
model = tf.keras.Sequential([
          tf.keras.layers.InputLayer(input_shape=(46)),
          tf.keras.layers.Dense(20, activation='relu'),
          tf.keras.layers.Dense(20, activation='relu'),
          tf.keras.layers.Dense(1)
          ])

def loss(model, inputs, labels):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = model(inputs), labels = labels))

print(model.summary())

train(model, loss, X_train, y_train, X_val, y_val, "Model/initial", learning_rate = 0.01)

print("Train Acc: ", acc(model, X_train, y_train))
print("Test Acc: ", acc(model, X_test, y_test))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                940       
_________________________________________________________________
dense_1 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 21        
Total params: 1,381
Trainable params: 1,381
Non-trainable params: 0
_________________________________________________________________
None
Train Acc:  0.7166279369997418
Test Acc:  0.7226955848179706


In [6]:
y_hat_train = prob(model, X_train)
y_hat_val = prob(model, X_val)

colnames = list(pd.read_csv("heloc_dataset_v1.csv"))[1:]

d= 46

checker = Checker("label")
learner = Learner(KNeighborsClassifier)
perturber_con = ContinuousPerturber()
perturber_cat = CategoricalPerturber()

for i in range(23):
    
    print("Searching Feature ", colnames[i])
    
    for j in range(2):
        if j == 0:
            print("Changing 'imputed' flag") 
            heuristics = []
            heuristics.append([2 * i, 0])
            heuristics.append([2 * i, 1])
            perturber = perturber_cat
        else:
            print("Changing value")
            heuristics = []
            for value in [-0.05, 0.05, -0.125, 0.125]:
                pert = np.zeros((1, d))
                pert[0, 2 * i + 1] = value
                heuristics.append(pert)
            perturber = perturber_con
              
        out = search(model, X_train, y_hat_train, heuristics, perturber, checker, learner, use_val = True, X_val = X_val, y_val = y_hat_val, use_acc = False, min_explainability = 0.8, verbose = True)



Searching Feature  ExternalRiskEstimate
Changing 'imputed' flag

Heuristic:  [0, 0]
Success on Train:  1.0
Counts:  7745 7745 0 1
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [0, 1]
Success on Train:  777.0
Counts:  6870 6969 304 777
Train Metrics:  [0.98579423 0.39124839]
Rejected

Changing value

Heuristic:  [[ 0.   -0.05  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.  ]]
Success on Train:  82.0
Counts:  7664 7664 1 82
Train Metrics:  [1.         0.01219512]
Rejected


Heuristic:  [[0.   0.05 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]]
Success on Train:  95.0
Counts:  7650 7651 0 95
Train Metrics

Counts:  7514 7522 22 224
Train Metrics:  [0.99893645 0.09821429]
Rejected


Heuristic:  [[0.    0.    0.    0.    0.    0.    0.    0.    0.    0.125 0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]]
Success on Train:  269.0
Counts:  7473 7477 8 269
Train Metrics:  [0.99946503 0.02973978]
Rejected

Searching Feature  NumTrades60Ever2DerogPubRec
Changing 'imputed' flag

Heuristic:  [10, 0]
Success on Train:  0.0

Heuristic:  [10, 1]
Success on Train:  256.0
Counts:  7476 7490 33 256
Train Metrics:  [0.99813084 0.12890625]
Rejected

Changing value

Heuristic:  [[ 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   -0.05
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    

Counts:  7726 7726 0 20
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [[0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.05 0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]]
Success on Train:  21.0
Counts:  7725 7725 0 21
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [[ 0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
   0.     0.     0.     0.     0.     0.     0.     0.     0.    -0.125
   0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
   0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
   0.     0.     0.     0.     0.     0.   ]]
Success on Train:  48.0
Counts:  7698 7698 0 48
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [[0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.125 0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    

Counts:  7733 7733 0 13
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [28, 1]
Success on Train:  42.0
Counts:  7704 7704 0 42
Train Metrics:  [1. 0.]
Rejected

Changing value

Heuristic:  [[ 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.   -0.05  0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.  ]]
Success on Train:  122.0
Counts:  7621 7624 1 122
Train Metrics:  [0.99960651 0.00819672]
Rejected


Heuristic:  [[0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.05 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]]
Success on Train:  144.0
Counts:  7601 7602 2 144
Train Metrics:  [0.99986846 0.01388889]
Rejected


Heuristic:  [[ 0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
  

Counts:  7629 7630 0 116
Train Metrics:  [0.99986894 0.        ]
Rejected


Heuristic:  [[0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
  0.    0.125 0.    0.    0.    0.    0.    0.    0.    0.   ]]
Success on Train:  92.0
Counts:  7653 7654 1 92
Train Metrics:  [0.99986935 0.01086957]
Rejected

Searching Feature  NumRevolvingTradesWBalance
Changing 'imputed' flag

Heuristic:  [38, 0]
Success on Train:  2.0
Counts:  7744 7744 0 2
Train Metrics:  [1. 0.]
Rejected


Heuristic:  [38, 1]
Success on Train:  461.0
Counts:  7240 7285 100 461
Train Metrics:  [0.99382292 0.21691974]
Rejected

Changing value

Heuristic:  [[ 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.
   0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.