In [1]:
import os
from fnmatch import fnmatch
from trainData_featExt_glcm import trainData_featExt_glcm
from trainData_featExt_gabor import trainData_featExt_gabor
import numpy as np
from PIL import Image
from filtering.filters import Median
from feature_extraction.lbp import Lbp
from sklearn.neighbors import NearestNeighbors
import sklearn
import time
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import normalize
from sklearn import preprocessing
import pickle

In [None]:
#load training samples feature vector
train_samples_feats_matrix_gabor = np.load("train_samples_feats_matrix_Gabor.npy")

In [None]:
#load training samples feature vector
train_samples_feats_matrix_lbp = np.load("train_samples_feats_matrix_LBP.npy")

In [None]:
#load training samples feature vector
valid_samples_feats_matrix_gabor = np.load("valid_samples_feats_matrix_Gabor.npy")

In [None]:
#load training samples feature vector
valid_samples_feats_matrix_lbp = np.load("valid_samples_feats_matrix_LBP.npy")

In [None]:
#concatenating features 
train_samples_feats_matrix_gabor_lbp = np.concatenate((train_samples_feats_matrix_gabor,train_samples_feats_matrix_lbp), axis=1)
valid_samples_feats_matrix_gabor_lbp = np.concatenate((valid_samples_feats_matrix_gabor,valid_samples_feats_matrix_lbp), axis=1)

In [2]:
test_samples_feats_matrix_Gabor = np.load("test_samples_feats_matrix_Gabor.npy")
test_samples_feats_matrix_LBP = np.load("test_samples_feats_matrix_LBP.npy")
test_samples_feats_matrix_gabor_lbp = np.concatenate((test_samples_feats_matrix_Gabor,test_samples_feats_matrix_LBP), axis=1)

In [None]:
#Train a KNN model using training data
t0 = time.time()
knn_model = NearestNeighbors(100, algorithm = 'kd_tree', metric ='manhattan')
knn_model.fit(train_samples_feats_matrix_glcm_gabor) 
print("traing time: ", time.time() - t0)

In [3]:
knn_model = pickle.load( open( "KNN_['Gabor', 'LBP']_500", "rb" ))

In [None]:
#find k nearest points for each sample
kth_dist, kth_ind = knn_model.kneighbors(train_samples_feats_matrix_gabor_lbp)
#1-D array contains distances of each data point to its kth nearest point 
kth_nearest_dist = kth_dist[:,-1]

In [4]:
#find k nearest points for each valid sample
kth_dist2, kth_ind2 = knn_model.kneighbors(test_samples_feats_matrix_gabor_lbp)
#1-D array contains distances of each data point to its kth nearest point
kth_nearest_dist2 = kth_dist2[:,-1]
labels = np.repeat(0,10000)
#each validation data point whose distance to its kth nearest exceeds the 
#threshold, which is np.aman(kth_nearest_dist) is novel (0)
labels[kth_nearest_dist2 > 0.0603626589306] = 1

In [5]:
#Ground truth
y1 = np.repeat(0, 6000) #normal
y2 = np.repeat(1, 4000) #abnormal
y = np.concatenate((y1,y2))

In [6]:
f1_binary = sklearn.metrics.f1_score(y, labels, average = 'binary')
f1_macro = sklearn.metrics.f1_score(y, labels, average = 'macro')
auc = sklearn.metrics.roc_auc_score(y, kth_nearest_dist2)
Math_Cof = sklearn.metrics.matthews_corrcoef(y, labels) 
kappa = sklearn.metrics.cohen_kappa_score(y, labels)
print("kappa ", kappa)
tn, fp, fn, tp  = sklearn.metrics.confusion_matrix(y, labels).ravel()
print ("f1_binary: ", f1_binary, "f1_macro: ", f1_macro, "MAth_cof: ", Math_Cof , " auc: ", auc)
print("confusion matrix", tn, fp, fn, tp)

('kappa ', -0.033558448792035067)
('f1_binary: ', 0.53339619676253336, 'f1_macro: ', 0.35853202744368073, 'MAth_cof: ', -0.059017267082924686, ' auc: ', 0.48665420833333334)
('confusion matrix', 668, 5332, 606, 3394)


In [None]:
#Get different thresholds value from ROC with corresponding F1_score and AUC
fpr,tpr,thresh = sklearn.metrics.roc_curve(y, kth_nearest_dist2)
for t, thres in enumerate (thresh) :
    labels = np.repeat(0,10000)
    labels[kth_nearest_dist2 > thres] = 1
    f1 = sklearn.metrics.f1_score(y, labels, average = 'macro')
    #auc = sklearn.metrics.roc_auc_score(y, kth_nearest_dist2)
    print ("f1: ", f1, "thres: ", thres)

In [None]:
import matplotlib.pyplot as pl
import matplotlib
from skimage import io

fpr,tpr,thresh = sklearn.metrics.roc_curve(y,kth_nearest_dist2)
#print ('true positive rate: ',tpr )
#print ('true positive rate: ',1 - fpr )
f,ax = pl.subplots(1,1)
ax.plot(fpr,tpr,label="KNN")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC curves")
ax.legend(loc="lower right")
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='square', facecolor='white', alpha=0.5)

textstr = '$F1-binary=%.2f$\n$F1-macro=%.2f$\n$math-corcoeffient=%.2f$\n$AUC=%.2f$'%(0.71, 0.76, 0.54, 0.81)
# place a text box in upper left in axes coords
ax.text(0.95, 0.2, textstr, transform=ax.transAxes, fontsize=13,
    horizontalalignment='right', verticalalignment='bottom', bbox=props)

io.show()

In [None]:
f.savefig('KNN_Gabor_LBP_500.png')