In [6]:
import numpy as np 
import cv2 as cv
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time
import imageio
from PIL import Image
from sklearn.cluster import MiniBatchKMeans, KMeans

## Load in all the Images, filenames and y

In [10]:
def load_image_from_folder(folder, image_type, gray=False):
    images = []
    filenames = []
    y = []
    category, idx = "none", 0
    for filename in sorted(os.listdir(folder)):
        if filename.endswith(image_type):
            if image_type == ".gif":
                gif_image = cv.VideoCapture(os.path.join(folder, filename))
                ret, frame = gif_image.read()
                image = Image.fromarray(frame)
                image = np.array(image)
            else:
                image = cv.imread(os.path.join(folder, filename))
            if gray:
                gray_image = image
            else:
                gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
            if gray_image is not None:
                images.append(gray_image)
                split_name = filename.split('_')
                if len(split_name) == 3:
                    fname = filename.split('_')[0] + "_" + filename.split('_')[1]
                elif len(split_name) == 2:
                    fname = filename.split('_')[0]
                elif len(split_name) == 1:
                    fname = filename.split('-')[0]
                filenames.append(fname)
                if filename.startswith(category):
                    y.append(idx)
                else:
                    split = filename.split('_')
                    if len(split) == 3:
                        category = filename.split('_')[0] + "_" + filename.split('_')[1]
                    elif len(split) == 2:
                        category = filename.split('_')[0]
                    idx = idx + 1
                    y.append(idx)
    print(len(images), "Images loaded successfully!")
    return images, filenames, y

## Compute SIFT features (keypoints & descriptors) on each image

In [4]:
def SIFT(images):
    sift = cv.SIFT_create()
    
    keypoints_per_image = []
    descriptor_per_image = []
    
    count = 0
    for image in images:
        keypoints, descriptor = sift.detectAndCompute(image, None)

        keypoints_per_image.append(keypoints)
        descriptor_per_image.append(descriptor)
        
        count += 1 
        clear_output(wait=True)
        print("Percentage Completed: {}%".format(round((count/len(images))*100), 2))
    
    return keypoints_per_image, descriptor_per_image

## Stack the descriptors into 1 array for clustering

In [5]:
def stack_descriptors(descriptors):
    stack = []
    
    for desc in descriptors:
        tmp = np.array(desc)
        if tmp.shape:
            stack.append(tmp)
            
    all_descriptors = np.vstack(i for i in stack)
    
    return all_descriptors

## Clustering the descriptors (either using kmeans or minibatch)

In [7]:
def cluster(data, n_clusters=100, cluster_type="minibatch"):
    start = time.time()
    
    if cluster_type == "minibatch":
        cluster = MiniBatchKMeans(n_clusters=n_clusters)
        y_cluster = cluster.fit_predict(data)
    elif cluster_type == "kmeans":
        cluster = KMeans(n_clusters=n_clusters)
        y_cluster = cluster.fit_predict(data)
    else:
        print("Unknown cluster_type! Try: 'minibatch' or 'kmeans'")

    end = time.time()
    print("Time Elapsed: {} min".format(round((end - start)/60, 2)))
    return y_cluster

## Creating Bag-Of-Words array for each image

In [9]:
def solve_BoW(descriptors, y_cluster, n_clusters):
    previous = 0
    count = 0
    image_words = []
    for image_number in range(len(descriptors)):
        if descriptors[image_number] is not None:
            number_of_keypoints = len(descriptors[image_number])
            image_words.append(y_cluster[previous:number_of_keypoints+previous])
            previous = number_of_keypoints
            
            count += 1
            clear_output(wait=True)
            print("(1/2) Percentage Completed: {}%".format(round((count/len(descriptors))*100), 2))
        else:
            # If image has no desciptors, append 0 words to it
            image_words.append([0])

    count = 0
    image_histograms = []
    for image in range(len(image_words)):
        hist = [0]*n_clusters
        for words in image_words[image]:
            hist[words-1] = hist[words-1]+1
        image_histograms.append(hist)
        
        count += 1
        clear_output(wait=True)
        print("(2/2) Percentage Completed: {}%".format(round((count/len(image_words))*100), 2))
    
    return image_histograms

## OXFORD 5k DATASET:

In [None]:
# Loading Images
folder = r"C:\Users\Sean\Desktop\Image-Retrieval\Oxford code\Oxford dataset\Oxford building images"
images, filenames, y = load_image_from_folder(folder, ".jpg", False)

In [None]:
# SIFT features
keypoints_per_image, descriptor_per_image = SIFT(images)

In [None]:
# Stacking descriptors
stacked_desriptors = stack_descriptors(descriptor_per_image)

In [None]:
# Clustering descriptors
n_clusters = 1000
y_clusters = cluster(stacked_desriptors, n_clusters, "minibatch")

In [None]:
# Creating BoW for each image
oxford_5k_image_histograms = solve_BoW(descriptor_per_image, y_clusters, n_clusters)

In [None]:
# Saving BoW array
np.save('Oxford5k_BoW_words={}.npy'.format(n_clusters), oxford_5k_image_histograms)

## MPEG7 DATASET:

In [None]:
# Loading Images
folder2 = r"C:\Users\Sean\Desktop\Image-Retrieval\MPEG7 code\MPEG7"
images2, filenames2, y2 = load_image_from_folder(folder2, ".gif", False)

In [None]:
# SIFT features
keypoints_per_image2, descriptor_per_image2 = SIFT(images2)

In [None]:
# Stacking descriptors
stacked_desriptors2 = stack_descriptors(descriptor_per_image2)

In [None]:
# Clustering descriptors
n_clusters2 = 1000
y_clusters2 = cluster(stacked_desriptors2, n_clusters2, "minibatch")

In [None]:
# Creating BoW for each image
MPEG7_image_histograms = solve_BoW(descriptor_per_image2, y_clusters2, n_clusters2)

In [None]:
# Saving BoW array
np.save('MPEG7_BoW_words={}.npy'.format(n_clusters2), MPEG7_image_histograms)