In [1]:
import cv2
import imutils
from imutils import paths
import random
import numpy as np
import os
from scipy.cluster.vq import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

In [2]:
imagePaths = list(imutils.paths.list_images("Bikes")) + list(imutils.paths.list_images("Horses"))
random.shuffle(imagePaths)

labels = []
descriptorList = []

sift = cv2.xfeatures2d.SIFT_create()

for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath)
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    label = imagePath.split(os.path.sep)[0].split("/")[0]
    keyPoints, descriptor = sift.detectAndCompute(image, None)
    descriptorList.append((imagePath, descriptor))
    if (label == 'Bikes'):
        labels.append(0)
    else:
        labels.append(1)
        
        
descriptors = descriptorList[0][1]
    
for imageFlag, descriptor in descriptorList[1:]:
    descriptors = np.vstack((descriptors, descriptor))
    

In [3]:
k = 500

voc,  variance = kmeans(descriptors, k, 1)
print (variance)
print (len(voc))

257.48697
500


In [4]:
imageFeatures = np.zeros((len(imagePaths), k), "float32")
for i in range(len(imagePaths)):
    words, distance = vq(descriptorList[i][1],voc)
    for w in words:
        imageFeatures[i][w] += 1

In [5]:
# numberOccurences = np.sum( (imageFeatures > 0) * 1, axis = 0)
# # print (len(numberOccurences))
# idf = np.array(np.log((1.0*len(imagePaths)+1) / (1.0*numberOccurences + 1)), 'float32')
# print (len(idf))

In [6]:
stdSlr = StandardScaler().fit(imageFeatures)
imageFeatures = stdSlr.transform(imageFeatures)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(imageFeatures, labels, test_size=0.4, random_state=4)

In [8]:
clf = cv2.ml.KNearest_create()
clf.train(X_train, cv2.ml.ROW_SAMPLE, np.asarray(y_train, dtype=np.float32))

True

In [9]:
ret, results, neighbours ,dist = clf.findNearest(X_test, k=10)

In [10]:
pred_label = []
for var in results:
    label = var
    pred_label.append(int(label))

print (y_test)
print (pred_label)
    
metrics.accuracy_score(y_test, pred_label)
# results = list(results.astype(int))
# print (results)
# print (labels_test)

[0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1]
[0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1]


0.8611111111111112