In [129]:
import os
import numpy as np
import cv2
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold   
import cv2

In [130]:
def desSIFT(image):
    sift = cv2.xfeatures2d.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image,None)
    return keypoints, descriptors

def describeORB(image):
    orb = cv2.ORB_create()
    keypoints, descriptors = orb.detectAndCompute(image,None)
    return keypoints, descriptors

def describeSURF( image):
    surf = cv2.xfeatures2d.SURF_create()
    # it is better to have this value between 300 and 500
    surf.setHessianThreshold(400)
    kp, des = surf.detectAndCompute(image,None)
    return keypoints, descriptors

In [131]:
def read_images(path, folders):
    images = []
    labels = []
    idx = 0
    for folder in folders:
        for filename in os.listdir(path+folder):
            image = cv2.imread(os.path.join(path+folder, filename))
            if image is not None:
                images.append(image)
                labels.append(idx)
                
        idx += 1
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

In [132]:
def getDescriptors(images) : 
    descriptors = []
    
    for image in images : 
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        gray_image = cv2.resize(image, (150, 150), interpolation=cv2.INTER_AREA)
        keypoint, descriptor = desSIFT(gray_image)
        if descriptor is not None : 
            descriptors.extend(descriptor)
            
    descriptors = np.asarray(descriptors)    
    return descriptors

In [133]:
def getVLADDescriptors(images, image_labels, visualDic):
    descriptors = []
    labels = []
    
    idx = 0
    for image in images : 
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        gray_image = cv2.resize(image, (150, 150), interpolation=cv2.INTER_AREA)
        keypoint, descriptor = desSIFT(gray_image)
        if descriptor is not None : 
            v = VLAD(descriptor, visualDic)
            descriptors.append(v)
            labels.append([image_labels[idx]])
        idx += 1
            
    descriptors = np.asarray(descriptors)
    labels = np.array(labels).astype(np.float32)
        
    return descriptors, labels

In [134]:
def VLAD(X, visualDictionary) : 
    
    predictedLabels = visualDictionary.predict(X)
    centers = visualDictionary.cluster_centers_
    labels = visualDictionary.labels_
    k = visualDictionary.n_clusters
    
    m,d = X.shape
    V=np.zeros([k,d])
    #computing the differences

    # for all the clusters (visual words)
    for i in range(k):
        # if there is at least one descriptor in that cluster
        if np.sum(predictedLabels==i)>0:
            # add the diferences
            V[i]=np.sum(X[predictedLabels==i,:]-centers[i],axis=0)
    

    V = V.flatten()
    # power normalization, also called square-rooting normalization
    V = np.sign(V)*np.sqrt(np.abs(V))

    # L2 normalization

    V = V/np.sqrt(np.dot(V,V))
    return V


In [135]:
# images, labels = read_images("../../../Assignment2/Panorama-BOVW/SIFT-SURF/",["Bikes", "Horses"])
images, labels = read_images("../../../Assignment2/Panorama-BOVW/SIFT-SURF/cifar-10/",["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"])

In [136]:
sift_des = getDescriptors(images)

In [137]:
visDic = MiniBatchKMeans(init='k-means++', n_clusters=50,max_iter=1000, batch_size=1000, n_init=10, max_no_improvement=10, verbose=0).fit(sift_des)

In [138]:
X = images
y = labels
print(labels)
cv = StratifiedKFold(n_splits=6, random_state=42)

scores = []
count = 0
for train_ind, validate_ind in cv.split(X, y):
    print("CV # - ", count)
    count += 1
    train_X, train_y = X[train_ind], y[train_ind]
    validate_X, validate_y = X[validate_ind], y[validate_ind]
    
    vlad_des, vlad_labels = getVLADDescriptors(train_X, train_y, visDic)
    print ("Hola")

    vlad_des_test, vlad_labels_test = getVLADDescriptors(validate_X, validate_y, visDic)
    clf = cv2.ml.KNearest_create()
    clf.train(np.float32(vlad_des), cv2.ml.ROW_SAMPLE, vlad_labels)
    ret, results, neighbours, dist = clf.findNearest(np.float32(vlad_des_test), k=10)
    score = accuracy_score(results, vlad_labels_test)
    scores.append(score)


[0 0 0 ... 9 9 9]
CV # -  0
Hola
CV # -  1
Hola
CV # -  2
Hola
CV # -  3
Hola
CV # -  4
Hola
CV # -  5
Hola


In [139]:
final_score = sum(scores)/len(scores)
print(final_score)

0.2920154616881115


In [140]:
print(scores)

[0.286, 0.2882864859457837, 0.29222922292229225, 0.29268780634190256, 0.29322932293229326, 0.2996599319863973]
