In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf

from Base import *

import sys
sys.path.insert(0, "/home/gregory/Desktop/CounterVision/Code")
from Checker import Checker
from Core import acc, prob
from Heuristics import *
from Learner import Learner
from Search import search
from Train import train


In [2]:
# Setup our datasets

p = 1.0

d = 6
n = 1000

X = np.zeros((n, d))
y = np.zeros((n, 1))
for i in range(n):
    X[i, :], y[i] = sample_1(p = p)
    
X = np.float32(X)
y = np.float32(y)
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5)

X_neutral = np.zeros((100, d))
y_neutral = np.zeros((100, 1))
for i in range(100):
    X_neutral[i, :], y_neutral[i] = sample_uniform()
    
X_neutral = np.float32(X_neutral)
y_neutral = np.float32(y_neutral)

In [3]:
# Train a model

model = tf.keras.Sequential([
          tf.keras.layers.InputLayer(input_shape=(6)),
          tf.keras.layers.Dense(100, activation='relu'),
          tf.keras.layers.Dense(100, activation='relu'),
          tf.keras.layers.Dense(1)
          ])

def loss(model, inputs, labels):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = model(inputs), labels = labels))

print(model.summary())

train(model, loss, X_train, y_train, X_val, y_val, "Tabular/model", learning_rate = 0.01)
    
print("Test Acc: ", acc(model, X_test, y_test))
print("Neutral Acc: ", acc(model, X_neutral, y_neutral))
#print(model.get_layer("dense").get_weights())


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               700       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 10,901
Trainable params: 10,901
Non-trainable params: 0
_________________________________________________________________
None
Test Acc:  1.0
Neutral Acc:  0.8


In [4]:
y_hat_train = prob(model, X_train)
y_hat_val = prob(model, X_val)

checker = Checker("label")
learner = Learner(KNeighborsClassifier)
perturber_con = ContinuousPerturber()
perturber_cat = CategoricalPerturber()

# Search the continuous features
for index in range(6):
    
    print("\nSearching feature ", index)
    
    if index in [1, 2, 4]:
        heuristics = []
        for value in [-0.05, 0.05, -0.1, 0.1, -0.25, 0.25]:
            pert = np.zeros((1, d))
            pert[0, index] = value
            heuristics.append(pert)
        perturber = perturber_con
    
    if index in [0, 3, 5]:
        heuristics = []
        heuristics.append([index, 0])
        heuristics.append([index, 1])
        perturber = perturber_cat
        
    out = search(model, X_train, y_hat_train, heuristics, perturber, checker, learner, use_val = True, X_val = X_val, y_val = y_hat_val, use_acc = False, min_explainability = 0.8, verbose = True)



Searching feature  0

Heuristic:  [0, 0]
Success on Train:  192.0
Counts:  555 558 184 192
Train Metrics:  [0.99462366 0.95833333]
Success on Val:  35.0
Counts:  89 90 30 35
Validation Metrics:  [0.98888889 0.85714286]
Accepted


Heuristic:  [0, 1]
Success on Train:  188.0
Counts:  557 562 185 188
Train Metrics:  [0.9911032  0.98404255]
Success on Val:  51.0
Counts:  74 74 49 51
Validation Metrics:  [1.         0.96078431]
Accepted


Searching feature  1

Heuristic:  [[ 0.   -0.05  0.    0.    0.    0.  ]]
Success on Train:  0.0

Heuristic:  [[0.   0.05 0.   0.   0.   0.  ]]
Success on Train:  0.0

Heuristic:  [[ 0.  -0.1  0.   0.   0.   0. ]]
Success on Train:  0.0

Heuristic:  [[0.  0.1 0.  0.  0.  0. ]]
Success on Train:  0.0

Heuristic:  [[ 0.   -0.25  0.    0.    0.    0.  ]]
Success on Train:  0.0

Heuristic:  [[0.   0.25 0.   0.   0.   0.  ]]
Success on Train:  0.0

Searching feature  2

Heuristic:  [[ 0.    0.   -0.05  0.    0.    0.  ]]
Success on Train:  0.0

Heuristic:  [[0

In [5]:
# Selective data augmentation

model.load_weights("Tabular/model")

def augment(model, perturber, h, X, y):
    X_pert, y_pert = perturber.apply(model, X, h)
    s_pert = checker.check(y, y_pert)
    indices = np.where(s_pert == 1)[0]
    return X_pert[indices, :], y[indices]

perturber = perturber_cat

X_train_0, y_train_0 = augment(model, perturber, [5, 0], X_train, y_train)
X_train_1, y_train_1 = augment(model, perturber, [5, 1], X_train, y_train)

X_val_0, y_val_0 = augment(model, perturber, [5, 0], X_val, y_val)
X_val_1, y_val_1 = augment(model, perturber, [5, 1], X_val, y_val)

X_train_aug = np.vstack((X_train, X_train_0, X_train_1))
y_train_aug = np.vstack((y_train, y_train_0, y_train_1))

print("New training size: ", X_train_aug.shape[0])

X_val_aug = np.vstack((X_val, X_val_0, X_val_1))
y_val_aug = np.vstack((y_val, y_val_0, y_val_1))
    

train(model, loss, X_train_aug, y_train_aug, X_val_aug, y_val_aug, "Tabular/model_aug", learning_rate = 0.01, stopping_tol = 0.00001)

print("Test Acc: ", acc(model, X_test, y_test))
print("Neutral Acc: ", acc(model, X_neutral, y_neutral))
#print(model.get_layer("dense").get_weights())

y_hat_train = prob(model, X_train)
y_hat_val = prob(model, X_val)

heuristics = []
heuristics.append([5, 0])
heuristics.append([5, 1])
perturber = CategoricalPerturber()

out = search(model, X_train, y_hat_train, heuristics, perturber, checker, learner, use_val = True, X_val = X_val, y_val = y_hat_val, use_acc = False, min_explainability = 0.8, verbose = True)


New training size:  1074
Test Acc:  1.0
Neutral Acc:  1.0

Heuristic:  [5, 0]
Success on Train:  0.0

Heuristic:  [5, 1]
Success on Train:  0.0
