# 5: Test Case Retrieval With Local Methods

In [1]:
import pandas as pd
import numpy as np
import pickle
import time

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from collections import Counter

from copy import deepcopy

import lime
import lime.lime_tabular

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation
from keras import backend as K

# My own algorithms
from Weighted_KNN_Classifier import *

Using TensorFlow backend.


In [2]:
df = pd.read_csv("processed_df.csv")
feature_names = df.columns

## Load Data Etc.

In [3]:
k = 1

In [4]:
knn_clf = pickle.load(open("k-nn_model.sav", 'rb'))
model = load_model("NN.h5")

In [5]:
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

In [6]:
X_train_grad_LIME = np.load("X_train_grad_LIME.npy")
X_test_grad_LIME = np.load("X_test_grad_LIME.npy")

X_train_intgrad = np.load("X_train_intgrad.npy")
X_test_intgrad = np.load("X_test_intgrad.npy")
X_train_deeplift = np.load("X_train_deeplift.npy")
X_test_deeplift = np.load("X_test_deeplift.npy")
X_train_lrp = np.load("X_train_lrp.npy")
X_test_lrp = np.load("X_test_lrp.npy")

## Contributions Search - Novel Method
Search for similar cases based on the contributions of each feature, <i>not</i> their feature values.

In [7]:
nn_pred = model.predict_classes(X_test)

In [8]:
techniques = [["k-NN*", X_train, X_test],
              ["C-DeepLIFT", X_train_deeplift, X_test_deeplift],
              ["C-Integraded Gradients", X_train_intgrad, X_test_intgrad],
              ["C-LRP", X_train_lrp, X_test_lrp]
             ]

for item in techniques:
    
    technique = item[0]
    train = item[1]
    test = item[2]

    print("==================================================")
    print(technique)
    print("==================================================")

    kNN = KNeighborsClassifier(n_neighbors=k, algorithm="brute") 
    kNN.fit(train, y_train)

    knn_predictions_test = kNN.predict(test)
    print("Accuracy:", accuracy_score(y_test, knn_predictions_test))
    print(confusion_matrix(y_test, knn_predictions_test, labels=None, sample_weight=None))
    print(" ")
    
    right = 0
    for i in range(len(nn_pred)):
        if knn_predictions_test[i] == nn_pred[i]:
            right += 1

    print("Agreement:", right/len(nn_pred))

k-NN*
Accuracy: 0.6626702190645353
[[ 164  165  300]
 [ 218  787  621]
 [ 332  643 3526]]
 
Agreement: 0.6765837773830669
C-DeepLIFT
Accuracy: 0.7781231497927767
[[ 182  181  266]
 [ 174 1151  301]
 [ 284  293 3924]]
 
Agreement: 0.8561278863232682
C-Integraded Gradients
Accuracy: 0.798105387803434
[[ 215  194  220]
 [ 178 1198  250]
 [ 250  272 3979]]
 
Agreement: 0.8587921847246892
C-LRP
Accuracy: 0.7964772054470101
[[ 221  183  225]
 [ 177 1193  256]
 [ 260  274 3967]]
 
Agreement: 0.8592362344582594


## Local Linear Model - Nugent and Cunningham
We can use the values to weigh a k-NN search across normal euclidean space.

$ Distance = \sqrt{|W_i|Difference(\vec{q} - \vec{x})} $

In [10]:
weighted_knn = LocalWeightedKNN()
weighted_knn.fit(X_train, y_train, k=k)

In [11]:
techniques = [["Local Linear Model", X_test_grad_LIME[:, :len(feature_names)]]]

for item in techniques:
    
    technique = item[0]
    weights = item[1]

    print("==================================================")
    print(technique)
    print("==================================================")

    start = time.time()
    predictions = list()

    for i in range(len(X_test)):            
        prediction = weighted_knn.predict(X_test[i], abs(weights[i]))
        predictions.append(prediction)

    print("Accuracy: ", accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions, labels=None, sample_weight=None))
    print("Time taken:", time.time() - start)
    print(" ")
    
    right = 0
    for i in range(len(nn_pred)):
        if predictions[i] == nn_pred[i]:
            right += 1

    print("Agreement:", right/len(nn_pred))

Local Linear Model
Accuracy:  0.742599171107164
[[ 177  211  241]
 [ 181 1096  349]
 [ 336  421 3744]]
Time taken: 4627.263544797897
 
Agreement: 0.7656897572528123
