In [1]:
import os
from fnmatch import fnmatch
from trainData_featExt_glcm import trainData_featExt_glcm
from trainData_featExt_gabor import trainData_featExt_gabor
import numpy as np
from PIL import Image
from filtering.filters import Median
from feature_extraction.lbp import Lbp
from sklearn.neighbors import NearestNeighbors
import sklearn
import time
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import normalize
from sklearn import preprocessing
import pickle

In [2]:
#load training samples feature vector
train_samples_feats_matrix_gabor = np.load("train_samples_feats_matrix_Gabor.npy")

In [3]:
#load training samples feature vector
train_samples_feats_matrix_glcm = np.load("train_samples_feats_matrix_GLCM.npy")

In [4]:
#load training samples feature vector
valid_samples_feats_matrix_gabor = np.load("valid_samples_feats_matrix_Gabor.npy")

In [5]:
#load training samples feature vector
valid_samples_feats_matrix_glcm = np.load("valid_samples_feats_matrix_GLCM.npy")

In [6]:
#concatenating features 
train_samples_feats_matrix_glcm_gabor = np.concatenate((train_samples_feats_matrix_glcm,train_samples_feats_matrix_gabor), axis=1)
valid_samples_feats_matrix_glcm_gabor = np.concatenate((valid_samples_feats_matrix_glcm,valid_samples_feats_matrix_gabor), axis=1)

In [7]:
test_samples_feats_matrix_GLCM = np.load("test_samples_feats_matrix_GLCM.npy")
test_samples_feats_matrix_Gabor = np.load("test_samples_feats_matrix_Gabor.npy")
test_samples_feats_matrix_glcm_gabor = np.concatenate((test_samples_feats_matrix_GLCM,test_samples_feats_matrix_Gabor), axis=1)

In [18]:
#Train a KNN model using training data
t0 = time.time()
knn_model = NearestNeighbors(1000, algorithm = 'kd_tree', metric ='euclidean')
knn_model.fit(train_samples_feats_matrix_glcm_gabor) 
print("traing time: ", time.time() - t0)

('traing time: ', 0.07112407684326172)


In [None]:
knn_model = pickle.load( open( "KNN_['GLCM', 'Gabor']_500", "rb" ))

In [19]:
#find k nearest points for each sample
kth_dist, kth_ind = knn_model.kneighbors(train_samples_feats_matrix_glcm_gabor)
#1-D array contains distances of each data point to its kth nearest point 
kth_nearest_dist = kth_dist[:,-1]

In [20]:
#find k nearest points for each valid sample
kth_dist2, kth_ind2 = knn_model.kneighbors(valid_samples_feats_matrix_glcm_gabor)
#1-D array contains distances of each data point to its kth nearest point
kth_nearest_dist2 = kth_dist2[:,-1]
labels = np.repeat(0,10000)
#each validation data point whose distance to its kth nearest exceeds the 
#threshold, which is np.aman(kth_nearest_dist) is novel (0)
labels[kth_nearest_dist2 > np.mean(kth_nearest_dist)] = 1

In [21]:
#Ground truth
y1 = np.repeat(0, 6000) #normal
y2 = np.repeat(1, 4000) #abnormal
y = np.concatenate((y1,y2))

In [22]:
f1_binary = sklearn.metrics.f1_score(y, labels, average = 'binary')
f1_macro = sklearn.metrics.f1_score(y, labels, average = 'macro')
auc = sklearn.metrics.roc_auc_score(y, kth_nearest_dist2)
Math_Cof = sklearn.metrics.matthews_corrcoef(y, labels) 
kappa = sklearn.metrics.cohen_kappa_score(y, labels)
print("kappa ", kappa)
tn, fp, fn, tp  = sklearn.metrics.confusion_matrix(y, labels).ravel()
print ("f1_binary: ", f1_binary, "f1_macro: ", f1_macro, "MAth_cof: ", Math_Cof , " auc: ", auc)
print("confusion matrix", tn, fp, fn, tp)

('kappa ', 0.50353734781178017)
('f1_binary: ', 0.70957651588065462, 'f1_macro: ', 0.75151994856318849, 'MAth_cof: ', 0.50457715574934237, ' auc: ', 0.8070859583333333)
('confusion matrix', 4637, 1363, 1051, 2949)


In [None]:
#Get different thresholds value from ROC with corresponding F1_score and AUC
fpr,tpr,thresh = sklearn.metrics.roc_curve(y, kth_nearest_dist2)
for t, thres in enumerate (thresh) :
    labels = np.repeat(0,10000)
    labels[kth_nearest_dist2 > thres] = 1
    f1 = sklearn.metrics.f1_score(y, labels, average = 'macro')
    #auc = sklearn.metrics.roc_auc_score(y, kth_nearest_dist2)
    print ("f1: ", f1, "thres: ", thres)

In [None]:
import matplotlib.pyplot as pl
import matplotlib
from skimage import io

fpr,tpr,thresh = sklearn.metrics.roc_curve(y,kth_nearest_dist2)

f,ax = pl.subplots(1,1)
ax.plot(fpr,tpr,label="KNN")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC curves")
ax.legend(loc="lower right")
# these are matplotlib.patch.Patch properties
#props = dict(boxstyle='square', facecolor='white', alpha=0.5)

#textstr = '$F1-binary=%.2f$\n$F1-macro=%.2f$\n$math-corcoeffient=%.2f$\n$AUC=%.2f$'%(0.7, 0.74, 0.5, 0.8)
# place a text box in upper left in axes coords
#ax.text(0.95, 0.2, textstr, transform=ax.transAxes, fontsize=13,
#    horizontalalignment='right', verticalalignment='bottom', bbox=props)

io.show()

In [None]:
f.savefig('KNN_GLCM_Gabor_500.png')

In [8]:
print test_samples_feats_matrix_Gabor

[[ 0.04067456  0.00818885  0.04068    ...,  0.71921904  0.32246698
   0.17271638]
 [ 0.22808918  0.04542157  0.22811982 ...,  0.04304621  0.43836488
   0.04508741]
 [ 0.29741772  0.05965647  0.29746722 ...,  0.04249588  0.39801036
   0.04080026]
 ..., 
 [ 0.08327351  0.01668433  0.08328117 ...,  0.03782816  0.49477563
   0.0468388 ]
 [ 0.36704435  0.07271599  0.3670935  ...,  0.03845795  0.32909435
   0.02946692]
 [ 0.38879007  0.07735422  0.38885305 ...,  0.02249949  0.30242688
   0.02115274]]


In [9]:
print test_samples_feats_matrix_GLCM

[[  8.93458933e-02   2.35035211e-01   1.94728241e-01 ...,   2.36283131e-05
    2.07620139e-05   2.49394241e-05]
 [  1.56913962e-01   2.32400757e-01   1.52913882e-01 ...,   4.00539066e-05
    4.25214508e-05   3.94451985e-05]
 [  1.40374228e-01   1.98424296e-01   1.49077918e-01 ...,   4.91008804e-05
    4.00991173e-05   4.09880328e-05]
 ..., 
 [  1.11267827e-01   1.46887882e-01   1.24380977e-01 ...,   3.52089612e-05
    3.10692812e-05   3.35108581e-05]
 [  1.42270071e-01   2.31652246e-01   1.73836788e-01 ...,   3.42573591e-04
    3.40316535e-04   3.65707206e-04]
 [  1.79629374e-01   2.42687798e-01   1.61647567e-01 ...,   4.16565901e-04
    4.14679822e-04   4.22613345e-04]]
