# 6: Test Case Retrieval With Local Methods Using Deleted Data

Also save Data for Plotting

In [1]:
import pandas as pd
import numpy as np
import pickle
import time

from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from collections import Counter

from copy import deepcopy

import lime
import lime.lime_tabular

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation
from keras import backend as K

# My own algorithms
from Weighted_KNN_Classifier import *

%matplotlib inline

Using TensorFlow backend.


In [2]:
df = pd.read_csv("processed_df.csv")
feature_names = df.columns

## Load Data 

In [3]:
k = 1

In [4]:
model = load_model("NN.h5")

In [5]:
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

In [6]:
X_train_grad_LIME = np.load("X_train_grad_LIME.npy")
X_test_grad_LIME = np.load("X_test_grad_LIME.npy")

X_train_intgrad = np.load("X_train_intgrad.npy")
X_test_intgrad = np.load("X_test_intgrad.npy")
X_train_deeplift = np.load("X_train_deeplift.npy")
X_test_deeplift = np.load("X_test_deeplift.npy")
X_train_lrp = np.load("X_train_lrp.npy")
X_test_lrp = np.load("X_test_lrp.npy")

## Delete Outlier Data for Final Evaluation

In [7]:
del_idx = list()
for i in range(len(X_train)):
    if y_train[i] != model.predict_classes(np.array([X_train[i]]))[0]:
        del_idx.append(i)

In [8]:
# Modify shape for next loop
y_train = np.array([y_train]).T

In [9]:
to_modify = [X_train, y_train, X_train_grad_LIME,
             X_train_intgrad, X_train_deeplift, X_train_lrp]

modified = list()
for i in range(len(to_modify)):
    matrix = deepcopy(to_modify[i].tolist())
    for index in sorted(del_idx, reverse=True):
        del matrix[index]
    modified.append(matrix)

In [10]:
names = ['X_train_del', 'y_train_del', 'X_train_grad_LIME_del',
         'X_train_intgrad_del', 'X_train_deeplift_del', 'X_train_lrp_del']

for i in range(len(names)):
    matrix = np.array(modified[i])
    name = names[i]
    # for y_train
    if i == 1:
        matrix = matrix.T[0]
    np.save(name, matrix) 

In [11]:
X_train = np.load("X_train_del.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train_del.npy")
y_test = np.load("y_test.npy")

X_train_grad_LIME = np.load("X_train_grad_LIME_del.npy")
X_train_intgrad = np.load("X_train_intgrad_del.npy")
X_train_deeplift = np.load("X_train_deeplift_del.npy")
X_train_lrp = np.load("X_train_lrp_del.npy")

## Test Global Weighting

In [12]:
connection_weights = np.load('connection_weights.npy').item()
garsons_weights = np.load('garsons_weights.npy').item()
sensitivity_weights = np.load('sensitivity_weights.npy').item()
perturb_weights = np.load('perturb_weights.npy').item()
global_weights = [perturb_weights, sensitivity_weights, garsons_weights, connection_weights]

In [13]:
final_weights = []
for i in range(len(global_weights)):
    weights = global_weights[i]
    weight_array = list()
    for j in range(len(feature_names)):
        if feature_names[j] in weights:
            weight_array.append(weights[feature_names[j]]) 
    final_weights.append(weight_array)

In [14]:
for i in range(len(final_weights)):
    for j in range(len(final_weights[i])):
        final_weights[i][j] = abs(final_weights[i][j])
final_weights = np.array(final_weights)

In [15]:
knn_clf = KNeighborsClassifier(n_neighbors=k, algorithm="brute") 
knn_clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [16]:
knn_pred = knn_clf.predict(X_test)
nn_pred = model.predict_classes(X_test)
print("NN Accuracy Test:", accuracy_score(y_test, nn_pred))
print("k-NN Accuracy Test:", accuracy_score(y_test, knn_pred))

NN Accuracy Test: 0.8368857312018946
k-NN Accuracy Test: 0.7079632918886916


In [17]:
print("=====================================")
print("Global Searches: k =", k)
print("=====================================")
for i in range(len(final_weights)):
    if i == 0:
        technique = "Perturbation"
    elif i == 1:
        technique = "Sensitivity"
    elif i == 2:
        technique = "Garson's Algorithm"
    elif i == 3:
        technique = "Connection Weights"
    clf = GlobalWeightedKNN()
    clf.fit(X_train, y_train, k=k, weights=final_weights[i])
    predictions = clf.predict(X_test)
    print(technique + ": ", accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions, labels=None, sample_weight=None))
    print(" ")
    right = 0
    for i in range(len(nn_pred)):
        if predictions[i] == nn_pred[i]:
            right += 1
    print("Agreement:", right/len(nn_pred))
    print(" ")
    print("-------------------------------------")

Global Searches: k = 1
Perturbation:  0.7867081113084665
[[  69  209  351]
 [  43 1154  429]
 [  57  352 4092]]
 
Agreement: 0.8453226761397277
 
-------------------------------------
Sensitivity:  0.7867081113084665
[[  72  209  348]
 [  45 1154  427]
 [  57  355 4089]]
 
Agreement: 0.8457667258732978
 
-------------------------------------
Garson's Algorithm:  0.7864120781527532
[[  74  206  349]
 [  39 1151  436]
 [  49  364 4088]]
 
Agreement: 0.8357015985790408
 
-------------------------------------
Connection Weights:  0.7504440497335702
[[  40  240  349]
 [  45 1055  526]
 [  45  481 3975]]
 
Agreement: 0.8069863824748372
 
-------------------------------------


## Test Local Weighting

In [18]:
nn_pred = model.predict_classes(X_test)

In [19]:
techniques = [["k-NN*", X_train, X_test],
              ["C-DeepLIFT", X_train_deeplift, X_test_deeplift],
              ["C-Integraded Gradients", X_train_intgrad, X_test_intgrad],
              ["C-LRP", X_train_lrp, X_test_lrp]]

for item in techniques:
    technique = item[0]
    train = item[1]
    test = item[2]
    print("==================================================")
    print(technique)
    print("==================================================")
    kNN = KNeighborsClassifier(n_neighbors=k, algorithm="brute") 
    kNN.fit(train, y_train)
    knn_predictions_test = kNN.predict(test)
    print("NN Lables Accuracy:", accuracy_score(y_test, knn_predictions_test))
    print(confusion_matrix(y_test, knn_predictions_test, labels=None, sample_weight=None))
    print(" ")
    right = 0
    for i in range(len(nn_pred)):
        if knn_predictions_test[i] == nn_pred[i]:
            right += 1
    print("Agreement:", right/len(nn_pred))

k-NN*
NN Lables Accuracy: 0.7079632918886916
[[  55  208  366]
 [  51  858  717]
 [  51  580 3870]]
 
Agreement: 0.7538484310242747
C-DeepLIFT
NN Lables Accuracy: 0.8368857312018946
[[ 112  219  298]
 [  45 1309  272]
 [  56  212 4233]]
 
Agreement: 1.0
C-Integraded Gradients
NN Lables Accuracy: 0.8368857312018946
[[ 112  219  298]
 [  45 1309  272]
 [  56  212 4233]]
 
Agreement: 1.0
C-LRP
NN Lables Accuracy: 0.8368857312018946
[[ 112  219  298]
 [  45 1309  272]
 [  57  211 4233]]
 
Agreement: 0.9998519834221433


## Weighted L2
$ Distance = \sqrt{|W_i|Difference(\vec{q} - \vec{x})} $

In [20]:
weighted_knn = LocalWeightedKNN()
weighted_knn.fit(X_train, y_train, k=k)

In [21]:
techniques = [["Local Linear Model", X_test_grad_LIME[:, :len(feature_names)]]]

for item in techniques:
    technique = item[0]
    weights = item[1]
    print("==================================================")
    print(technique)
    print("==================================================")
    start = time.time()
    predictions = list()
    for i in range(len(X_test)):            
        prediction = weighted_knn.predict(X_test[i], abs(weights[i]))
        predictions.append(prediction)
    print("Accuracy: ", accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions, labels=None, sample_weight=None))
    print("Time taken:", time.time() - start)
    print(" ")
    right = 0
    for i in range(len(nn_pred)):
        if predictions[i] == nn_pred[i]:
            right += 1
    print("Agreement:", right/len(nn_pred))

Local Linear Model
Accuracy:  0.7886323268206039
[[  81  247  301]
 [  43 1199  384]
 [  56  397 4048]]
Time taken: 3559.534276008606
 
Agreement: 0.8518354055654234
