In [8]:
import cv2
import numpy as np
import pickle as cPickle
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import KNeighborsClassifier

Let us first read the train and test files

In [9]:
train_images_filenames = cPickle.load(open('train_images_filenames.dat','rb'))
test_images_filenames = cPickle.load(open('test_images_filenames.dat','rb'))
train_labels = cPickle.load(open('train_labels.dat','rb'))
test_labels = cPickle.load(open('test_labels.dat','rb'))

We set the parameters of the execution

In [10]:
num_features = 300 #number of features for the SIFT detector
k = 128 # codebook size / number of clusters for KMeans
num_neighbors = 5 #number of neighbors (k) for the k-nn classifier
knn_metric = 'euclidean'#distance for the k-nn classifier
denseSift = True #True if Dense SIFT is to be used, False for classical SIFT

We create a SIFT object detector and descriptor

In [11]:
SIFTdetector = cv2.xfeatures2d.SIFT_create(nfeatures=num_features)

We compute the SIFT descriptors for all the train images and subsequently build a numpy array with all the descriptors stacked together

In [None]:
Train_descriptors = []
Train_label_per_descriptor = []


for filename, labels in zip(train_images_filenames, train_labels):
    filename = filename.replace("../../Databases/MIT_split", ".")
    ima = cv2.imread(filename)
    gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY)
 
    if denseSift:
        step = 10
        height, width = gray.shape
        kpt = [cv2.KeyPoint(x, y, step) for y in range(0, gray.shape[0], step) 
                                       for x in range(0, gray.shape[1], step)]
        _, des = SIFTdetector.compute(gray, kpt)
        
    else:
        kpt, des = SIFTdetector.detectAndCompute(gray, None)

    Train_descriptors.append(des)
    Train_label_per_descriptor.append(labels)

D = np.vstack(Train_descriptors)

We now compute a k-means clustering on the descriptor space

In [None]:
codebook = MiniBatchKMeans(n_clusters=k, verbose=False, batch_size=k * 20,compute_labels=False,reassignment_ratio=10**-4,random_state=42)
codebook.fit(D)

And, for each train image, we project each keypoint descriptor to its closest visual word. We represent each of the images with the frequency of each visual word.

In [None]:
visual_words=np.zeros((len(Train_descriptors),k),dtype=np.float32)
for i in range(len(Train_descriptors)):
    words=codebook.predict(Train_descriptors[i])
    visual_words[i,:]=np.bincount(words,minlength=k)

We build a k-nn classifier and train it with the train descriptors

In [None]:
knn = KNeighborsClassifier(n_neighbors=num_neighbors,n_jobs=-1,metric=knn_metric)
knn.fit(visual_words, train_labels) 

We end up computing the test descriptors and compute the accuracy of the model

In [None]:
visual_words_test=np.zeros((len(test_images_filenames),k),dtype=np.float32)
for i in range(len(test_images_filenames)):
    filename=test_images_filenames[i]
    filename = filename.replace("../../Databases/MIT_split", ".")
    ima=cv2.imread(filename)
    gray=cv2.cvtColor(ima,cv2.COLOR_BGR2GRAY)
         
    if denseSift:
        step = 10
        height, width = gray.shape
        kpt = [cv2.KeyPoint(x, y, step) for y in range(0, gray.shape[0], step) 
                                       for x in range(0, gray.shape[1], step)]
        _, des = SIFTdetector.compute(gray, kpt)
    else:
        kpt, des = SIFTdetector.detectAndCompute(gray, None)
        
    words=codebook.predict(des)
    visual_words_test[i,:]=np.bincount(words,minlength=k)

In [None]:
accuracy = 100*knn.score(visual_words_test, test_labels)
print(accuracy)

We save a log file of the execution

In [None]:
with open('parameters_execution.log', 'a') as f:
    f.write('denseSift: '+str(denseSift)+', '+
            'num_features: '+str(num_features)+', '+
            'k: '+str(k)+', '+
            'num_neighbors: '+str(num_neighbors)+', '+
            'knn_metric: '+str(knn_metric)+', '+
            'accuracy: '+str(accuracy)+'\n')