In [1]:
import os
from fnmatch import fnmatch
from trainData_featExt_glcm import trainData_featExt_glcm
from trainData_featExt_gabor import trainData_featExt_gabor
import numpy as np
from PIL import Image
from filtering.filters import Median
from feature_extraction.lbp import Lbp
from sklearn.neighbors import NearestNeighbors
import sklearn
import time
import pickle
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import normalize

In [None]:
#load training samples feature vector
train_samples_feats_matrix_LBP = np.load("train_samples_feats_matrix_LBP.npy")

In [None]:
#load validation samples feature vector
valid_samples_feats_matrix_LBP = np.load("valid_samples_feats_matrix_LBP.npy")

In [2]:
test_samples_feats_matrix_LBP = np.load("test_samples_feats_matrix_LBP.npy")

In [None]:
#Train a KNN model using training data
t0 = time.time()
knn_model = NearestNeighbors(1000, algorithm = 'kd_tree', metric ='euclidean')
knn_model.fit(train_samples_feats_matrix_LBP) 
print("traing time: ", time.time() - t0)

In [3]:
knn_model = pickle.load( open( "KNN_LBP_500", "rb" ))

In [None]:
#find k nearest points for each sample
kth_dist, kth_ind = knn_model.kneighbors(train_samples_feats_matrix_LBP)
#1-D array contains distances of each data point to its kth nearest point 
kth_nearest_dist = kth_dist[:,-1]

In [5]:
#find k nearest points for each valid sample
kth_dist2, kth_ind2 = knn_model.kneighbors(test_samples_feats_matrix_LBP)
#1-D array contains distances of each data point to its kth nearest point
kth_nearest_dist2 = kth_dist2[:,-1]
labels = np.repeat(0,10000)
#each validation data point whose distance to its kth nearest exceeds the 
#threshold, which is np.amax(kth_nearest_dist) is novel (0)
labels[kth_nearest_dist2 > 0.0204379390004] = 1

In [4]:
#Ground truth
y1 = np.repeat(0, 6000) #normal
y2 = np.repeat(1, 4000) #abnormal
y = np.concatenate((y1,y2))

In [6]:
f1_binary = sklearn.metrics.f1_score(y, labels, average = 'binary')
f1_macro = sklearn.metrics.f1_score(y, labels, average = 'macro')
auc = sklearn.metrics.roc_auc_score(y, kth_nearest_dist2)
Math_Cof = sklearn.metrics.matthews_corrcoef(y, labels) 
tn, fp, fn, tp  = sklearn.metrics.confusion_matrix(y, labels).ravel()
print ("f1_binary: ", f1_binary, "f1_macro: ", f1_macro, "MAth_cof: ", Math_Cof , " auc: ", auc)
print("confusion matrix", tn, fp, fn, tp)

('f1_binary: ', 0.57379330550062257, 'f1_macro: ', 0.32849110682310156, 'MAth_cof: ', 0.062234712496049116, ' auc: ', 0.57502297916666667)
('confusion matrix', 264, 5736, 83, 3917)


In [None]:
import matplotlib.pyplot as pl
import matplotlib
from skimage import io

fpr,tpr,thresh = sklearn.metrics.roc_curve(y,kth_nearest_dist2)

f,ax = pl.subplots(1,1)
ax.plot(fpr,tpr,label="KNN")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC curves")
ax.legend(loc="lower right")
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='square', facecolor='white', alpha=0.5)

textstr = '$F1-binary=%.2f$\n$F1-macro=%.2f$\n$math-corcoeffient=%.2f$\n$AUC=%.2f$'%(0.6, 0.57, 0.23, 0.7)
# place a text box in upper left in axes coords
ax.text(0.95, 0.2, textstr, transform=ax.transAxes, fontsize=13,
    horizontalalignment='right', verticalalignment='bottom', bbox=props)

io.show()

In [None]:
f.savefig('KNN_LBP_500.png')