In [15]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score
import os

In [2]:
# defining feature extractor that we want to use
extractor = cv2.xfeatures2d.SIFT_create()

def features(image, extractor):
    keypoints, descriptors = extractor.detectAndCompute(image, None)
    return keypoints, descriptors

In [3]:
# generate histogram for each cluster of feature descriptor
def build_histogram(descriptor_list, cluster_alg):
    histogram = np.zeros(len(cluster_alg.cluster_centers_))
    cluster_result =  cluster_alg.predict(descriptor_list)
    for i in cluster_result:
        histogram[i] += 1.0
    return histogram

In [12]:
# get image files from directory
def getImageFiles(path):
    imagePaths = []
    for file in os.listdir(path):
        imagePaths.append(os.path.join(path,file))
    return imagePaths

In [5]:

featureHistogram = []
labels = [] # ground truth values
imagePaths = []


trainDirPaths = "/home/vidhikatkoria/VR/TRAIN"

imagePaths = getImageFiles(trainDirPaths)

for (i, imagePath) in enumerate(imagePaths):
    # load the image and extract the class label 
    # (assuming path as the format: /path/to/dataset/{category}.{image_num}.jpg
    image = cv2.imread(imagePath)
    label = imagePath.split(os.path.sep)[-1].split(".")[0]
    labels.append(label)
    # shift descriptor for keypoints in image
    keypoint, descriptor = features(image, extractor)
    # k-cluster of keypoints
    kmeans = KMeans(n_clusters = 10)
    kmeans.fit(descriptor)
    # histogram of clusters for training images
    histogram = build_histogram(descriptor, kmeans)
    featureHistogram.append(histogram)

In [44]:
data = cv2.imread("/home/vidhikatkoria/VR/test/Landscape.50.jpg")
# data = gray(data)
keypoint, descriptor = features(data, extractor)
histogramt = build_histogram(descriptor, kmeans)
neighbor = NearestNeighbors(n_neighbors = 1)
neighbor.fit(featureHistogram)
dist, result = neighbor.kneighbors([histogramt])

In [45]:
print(dist,result)

[[1300.26535753]] [[117]]


In [46]:
labels[result[0][0]]

'Landscape'

In [13]:
# test using k nearest neighbour
testLabel = []
predictedLabel = []
testDirPaths = "/home/vidhikatkoria/VR/test"

testImagePaths = getImageFiles(testDirPaths)

for (i, imagePath) in enumerate(testImagePaths):
    # load the image and extract the class label 
    testImage = cv2.imread(imagePath)
    label = imagePath.split(os.path.sep)[-1].split(".")[0]
    testLabel.append(label)
     # shift descriptor for keypoints in image
    keypoint, descriptor = features(testImage, extractor)
    # histogram of clusters for test image
    histogram = build_histogram(descriptor, kmeans)
    # find nearest matched neighbour from histograms of trained images
    neighbor = NearestNeighbors(n_neighbors = 1)
    neighbor.fit(featureHistogram)
    dist, result = neighbor.kneighbors([histogram])
    # get the predicted label of nearest match
    predictedLabel.append(labels[result[0][0]])

In [19]:
# accuracy of test image classification
accuracy_score(testLabel, predictedLabel)

0.7333333333333333