# 3: Test Global Weighting Case Retrieval

In [1]:
import pandas as pd
import numpy as np
import pickle
import time
import keras

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
from sklearn.preprocessing import MinMaxScaler

from collections import Counter

from copy import deepcopy

from keras.models import load_model

# My own algorithms
from Weighted_KNN_Classifier import GlobalWeightedKNN

Using TensorFlow backend.


In [2]:
# Load weights from previous notebook & organise into a list
connection_weights = np.load('connection_weights.npy').item()
garsons_weights = np.load('garsons_weights.npy').item()
sensitivity_weights = np.load('sensitivity_weights.npy').item()
perturb_weights = np.load('perturb_weights.npy').item()

global_weights = [perturb_weights, sensitivity_weights, garsons_weights, connection_weights]

In [3]:
# Load dataset for feature names (doesn't contain target)
df = pd.read_csv("processed_df.csv")
feature_names = df.columns

In [4]:
#### Create a list with all weights for all feature extraction techniques
final_weights = []
for i in range(len(global_weights)):
    weights = global_weights[i]
    weight_array = list()
    for j in range(len(feature_names)):
        if feature_names[j] in weights:
            weight_array.append(weights[feature_names[j]])
    final_weights.append(weight_array)

In [5]:
# Make all weights positive => Difficult to understand, but sign needs to be ignored.
for i in range(len(final_weights)):
    for j in range(len(final_weights[i])):
        final_weights[i][j] = abs(final_weights[i][j])
final_weights = np.array(final_weights)

## Perform Sanity Checks

In [6]:
knn_clf = pickle.load(open("k-nn_model.sav", 'rb'))
model = load_model("NN.h5")

In [7]:
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

In [8]:
knn_pred = knn_clf.predict(X_test)
nn_pred = model.predict_classes(X_test)
print("NN Accuracy Test:", accuracy_score(y_test, nn_pred))
print("k-NN Accuracy Test:", accuracy_score(y_test, knn_pred))

NN Accuracy Test: 0.8368857312018946
k-NN Accuracy Test: 0.6626702190645353


## Test Global Weighting Case Retreival Techniques

In [9]:
k = 1

print("=====================================")
print("Global Searches: k =", k)
print("=====================================")
for i in range(len(final_weights)):
        
    if i == 0:
        technique = "Perturbation"
    elif i == 1:
        technique = "Sensitivity"
    elif i == 2:
        technique = "Garson's Algorithm"
    elif i == 3:
        technique = "Connection Weights"
        
    clf = GlobalWeightedKNN()
    clf.fit(X_train, y_train, k=k, weights=final_weights[i])
    predictions = clf.predict(X_test)
    
    print(technique + ": ", accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions, labels=None, sample_weight=None))
    print(" ")
    
    right = 0
    for i in range(len(nn_pred)):
        if predictions[i] == nn_pred[i]:
            right += 1

    print("Agreement:", right/len(nn_pred))
    print(" ")
    print("-------------------------------------")

Global Searches: k = 1
Perturbation:  0.7480757844878626
[[ 182  188  259]
 [ 180 1064  382]
 [ 306  387 3808]]
 
Agreement: 0.7676139727649497
 
-------------------------------------
Sensitivity:  0.7495559502664298
[[ 182  183  264]
 [ 183 1068  375]
 [ 301  386 3814]]
 
Agreement: 0.7702782711663706
 
-------------------------------------
Garson's Algorithm:  0.7458555358200119
[[ 194  169  266]
 [ 208 1040  378]
 [ 298  398 3805]]
 
Agreement: 0.7579928952042628
 
-------------------------------------
Connection Weights:  0.7057430432208407
[[ 141  206  282]
 [ 203  964  459]
 [ 331  507 3663]]
 
Agreement: 0.7310538780343399
 
-------------------------------------
