In [1]:
import numpy as np
import cv2 as cv
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans
from tqdm import tqdm


In [2]:
def load_images_from_folder(folder):
    images = {}
    for filename in tqdm(os.listdir(folder)):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv.resize(cv.imread(path + "/" + cat,0), (500,500))
            if img is not None:
                category.append(img)
        images[filename] = category
    return images

images_dict = load_images_from_folder('data/database')

100%|██████████| 50/50 [00:02<00:00, 19.40it/s]


In [3]:
sift = cv.SIFT_create()  

vocab = {}
descriptors = []
for label, images in tqdm(images_dict.items()):
    if label not in vocab.keys():
        vocab[label] = []
    for image in images:
        _,desc = sift.detectAndCompute(image, None)
        vocab[label].append(desc)
        descriptors.extend(desc)

100%|██████████| 50/50 [00:22<00:00,  2.26it/s]


In [4]:
kmeans = KMeans(n_clusters = 500, n_init=10)
k = kmeans.fit_predict(descriptors)

In [None]:
n_images = len(images_dict)
vocab_histogram = np.array([np.zeros(500) for i in range(n_images)])
old_count = 0
for i in range(n_images):
    l = len(descriptors[i])
    for j in range(l):
        idx = k[old_count+j]
        vocab_histogram[i][idx] += 1
    old_count += l

In [None]:
from sklearn.preprocessing import StandardScaler
vocab_histogram = StandardScaler().fit_transform(vocab_histogram)

In [None]:
test_images = []
for img_path in os.listdir('./data/queries_validation'):
    if img_path.endswith(".jpg"):
        img = cv.resize(cv.imread( "./data/queries_validation/" + img_path,0), (500,500))
        test_images.append(img)

In [None]:
test_histogram = np.array([np.zeros(500) for _ in range(len(test_images))])
old_count = 0
for image in test_images:
    _,desc = sift.detectAndCompute(image, None)
    l = len(desc)
    for j in range(l):
        idx = k[old_count+j]
        test_histogram[i][idx] += 1
    old_count += l

In [None]:
print(k)

[369 135  43 ...  10 432 358]


In [None]:
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = cv.calcHist([img],[0],None,[256],[0,256])
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

In [None]:
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}
    
    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0]
        for tst in test_val:
            predict_start = 0
            minimum = 0
            key = "a" #predicted
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        if dist < minimum:
                            minimum = dist
                            key = train_key
            
            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
    return [num_test, correct_predict, class_based]

In [None]:
from glob import glob
test_images = [cv.calcHist([cv.resize(cv.imread(f"./data/queries_validation/{i+1}_11.jpg"), (500, 500))],[0],None,[256],[0,256]) for i in range(50)]
