In [1]:
import numpy as np 
import cv2
import os
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import clear_output
import time
import pandas
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
import glob

In [2]:
def SIFT(images):
    sift = cv2.SIFT_create()
    
    keypoints_per_image = []
    descriptor_per_image = []
    
    count = 0
    for image in images:
        keypoints, descriptor = sift.detectAndCompute(image, None)

        keypoints_per_image.append(keypoints)
        descriptor_per_image.append(descriptor)
        
        count += 1 
        print("Percentage Completed: {}%".format(round((count/len(images))*100), 2), end="\r")
    
    print("")
    return keypoints_per_image, descriptor_per_image

def stack_descriptors(descriptors):
    stack = []
    
    for desc in descriptors:
        tmp = np.array(desc)
        if tmp.shape:
            stack.append(tmp)
            
    all_descriptors = np.vstack(i for i in stack)
    
    return all_descriptors

def cluster(data, n_clusters=100, cluster_type="minibatch"):
    start = time.time()
    
    if cluster_type == "minibatch":
        cluster = MiniBatchKMeans(n_clusters=n_clusters)
        y_cluster = cluster.fit(data)
    elif cluster_type == "kmeans":
        cluster = KMeans(n_clusters=n_clusters)
        y_cluster = cluster.fit(data)
    else:
        print("Unknown cluster_type! Try: 'minibatch' or 'kmeans'")
        
    end = time.time()
    print("Time Elapsed: {} min".format(round((end - start)/60, 2)))
    return y_cluster

def solve_BoW(descriptors, y_cluster, n_clusters):
    previous = 0
    count = 0
    image_words = []
    for image_number in range(len(descriptors)):
        if descriptors[image_number] is not None:
            tmp = []
            for kp in descriptors[image_number]:
                cluster = y_cluster.predict(np.array([kp]))
                tmp.append(cluster[0])
            image_words.append(tmp)
            
            count += 1
            print("(1/2) Percentage Completed: {}%".format(round((count/len(descriptors))*100), 2), end="\r")
        else:
            # If image has no desciptors, append 0 words to it
            image_words.append([0])
    
    print("")
    count = 0
    image_histograms = []
    for image in range(len(image_words)):
        hist = np.zeros(n_clusters)
        for words in image_words[image]:
            hist[words-1] = hist[words-1]+1
        image_histograms.append(hist)
        
        count += 1
        print("(2/2) Percentage Completed: {}%".format(round((count/len(image_words))*100), 2), end="\r")
    
    print("")
    # Transforming data using tf-idf:
    transformer = TfidfTransformer(smooth_idf=False)
    weighted_image_histograms = transformer.fit_transform(image_histograms).toarray()
    
    return weighted_image_histograms

In [3]:
def image_retrieval_k(train_data, test_data, train_names, test_names, train_images, test_images, k=10, view_option=0, border_size=3, print_opt=True):
    avg_precisions = []
    precisionsatk = []
    count = 0
    
    for idx, query in enumerate(test_data):
        all_precisions = []
        precisions = []
        
        # Finding similarity order:
        query = query.reshape((1, -1))
        D = euclidean_distances(train_data, query).squeeze()
        D = 1-D
        index = np.argsort(D)
        
        # Finding the index of the last correct image in the sorted index to iter to
        last_correct_image_idx = 0
        for i in range(len(index)):
            if train_names[index[i]] == test_names[idx]:
                last_correct_image_idx = i
        
        # make sure we iter to k (for precision@k) if all correct images are found before k
        if k > last_correct_image_idx:
            last_correct_image_idx = k+1
        
        # Itering through all images untill we get to k or last correct image to compute AP
        for kk in range(1, last_correct_image_idx+2):
            TP = 0
            FP = 0
            FN = 0
            
            # Finding the correct amount of images in the training set
            correct_count = 0
            for ind in index:
                if train_names[ind] == test_names[idx]:
                    correct_count += 1
            sized_index = index[:kk]
            
            # Find TP FP FN
            for ind in sized_index:
                if train_names[ind] == test_names[idx]:
                    TP += 1
                else:
                    FP += 1
            FN = correct_count - TP
            
            # If the last k image is a correct image we add precision to the list
            if train_names[sized_index[-1]] == test_names[idx]:
                precisions.append(TP/(TP+FP))
                
            # Adding all precisions and recalls to a seperate list
            all_precisions.append(TP/(TP+FP))
        
        # Solving AP and precision@k
        avg_precisions.append(np.average(precisions))
        precisionsatk.append(all_precisions[k-1])
        
        # display retrieval:
        if view_option == 0:
            count += 1
            if print_opt:
                print("Percentage Complete: {}%".format(round((count/len(test_data))*100),2), end="\r")
        elif view_option == 1:
            display_retrieval(test_data, test_images, idx, train_images, index, test_names, train_names, sized_index, avg_precisions[-1], precisionsatk[-1], border_size, k=k)
            
    return avg_precisions, precisionsatk

In [4]:
def display_retrieval(test_data, test_images, idx, train_images, index, test_names, train_names, sized_index, avg_precisions, precisionsatk, border_size, k):
    top_k_images = [test_images[idx]]
    for i in range(0,k):
        top_k_images.append(train_images[index[i]])

    fig, axes = plt.subplots(1, k+1, figsize=(200/k, 200/k))
    for i, (image, ax) in enumerate(zip(top_k_images, axes.ravel())):
        if i == 0:
            query_name = test_names[idx]
            title = "Query: {}".format(query_name)
        else:
            title = train_names[sized_index[i-1]]
            if train_names[sized_index[i-1]] == query_name:
                color = (0, 255, 0)
                image = border(image, color, border_size)
            else:
                color = (255, 0, 0)
                image = border(image, color, border_size)
        # display all set options
        ax.imshow(image, cmap="gray")
        ax.set_title(title)
        ax.axis("off")
    plt.show()
    print("Label: {}".format(test_names[idx]))
    print("Average Precision for query {}: ".format(idx), avg_precisions)
    print("Precision@k for query {}: ".format(idx), precisionsatk)
    print("\n")

In [5]:
def border(img, color, border_size):
    # get dimensions
    h, w = img.shape[:2]

    # make a base slightly bigger than image
    base_size= h+(border_size*2), w+(border_size*2), 3
    base = np.zeros(base_size, dtype=np.uint8)

    # make a boundary of chosen color
    cv2.rectangle(base, (0,0), (w+20,h+20), color, 30)

    # put original image into base
    base[border_size:h+border_size, border_size:w+border_size] = img
    
    return base

In [6]:
def pca(trn_features, val_features, dim=256, print_opt=True):
    # PCA Dimension reduction
    pca = PCA(n_components=dim)
    pca.fit(trn_features)

    # Dimension reduction
    trn_features = pca.transform(trn_features)
    val_features = pca.transform(val_features)

    if print_opt:
        print("Train Features shape: {} | Valid Features shape: {}".format(trn_features.shape, val_features.shape))
    
    return trn_features, val_features

In [7]:
def compute_SIFT(test_gray_images, train_gray_images, n_clusters):
    # Copmuting bovw
    print("\nComputing test SIFT features...")
    test_kp, test_desc = SIFT(test_gray_images)

    print("\nComputing train SIFT features...")
    train_kp, train_desc = SIFT(train_gray_images)
    stacked_train_desc = stack_descriptors(train_desc)

    print("\nClustering Descriptors...")
    cluster_func = cluster(stacked_train_desc, n_clusters)

    print("\nComputing test BoVW...")
    test_bovw  = solve_BoW(test_desc, cluster_func, n_clusters)
    print("\nComputing train BoVW...")
    train_bovw = solve_BoW(train_desc, cluster_func, n_clusters)
    
    return test_bovw, train_bovw

## Load in images & names here

In [23]:
def load_data(train_path, query_path):

    train_image_paths = []
    train_images = []
    train_names = []

    # save path to image and save class names as numbers (train)
    for data_path in glob.glob(train_path + '/*'):
        name = data_path.split('/')[-1].split("-")[0]
        train_names.append(name) 
        train_image_paths.append(data_path)
    
    # open image from path and save to array
    for img_path in train_image_paths:
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        train_images.append(img)

    # save path to image and save class names as numbers (query)
    query_image_paths = []
    query_names = []
    query_images = []
    
    for data_path in glob.glob(query_path + '/*'):
        name = data_path.split('/')[-1].split("-")[0]
        query_names.append(name) 
        query_image_paths.append(data_path)
    
    # open image from path and save to array
    for img_path in query_image_paths:
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        query_images.append(img)

    print("Train Images: {} | Query Images: {}".format(len(train_images), len(query_images)))
    return train_images, train_names, query_images, query_names

option = 'easy'
train_path = "/home/sean/Code/Pawsey/3. Data/Revised and Sorted/roxford5k/{}".format(option)
query_path = "/home/sean/Code/Pawsey/3. Data/Revised and Sorted/roxford5k/query"

ox_easy_images, ox_easy_names, ox_query_images, ox_query_names = load_data(train_path, query_path)

Train Images: 516 | Query Images: 70


## For roxford

In [39]:
# need to load in colour and gray images
test_colour_images = ox_query_images
train_colour_images = ox_easy_images

test_gray_images = [cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) for image in test_colour_images]
train_gray_images = [cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) for image in train_colour_images]

In [40]:
# load in names:
test_names = ox_query_names
train_names = ox_easy_names

## For oxford

In [10]:
test_colour_images = np.load("/home/sean/Code/Pawsey/1. Initial Analysis/oxford_data/test_images.npy", allow_pickle=True)
train_colour_images = np.load("/home/sean/Code/Pawsey/1. Initial Analysis/oxford_data/train_images.npy", allow_pickle=True)

test_gray_images = [cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) for image in test_colour_images]
train_gray_images = [cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) for image in train_colour_images]

In [11]:
# load in names:
test_names = np.load("/home/sean/Code/Pawsey/1. Initial Analysis/oxford_data/test_names.npy", allow_pickle=True)
train_names = np.load("/home/sean/Code/Pawsey/1. Initial Analysis/oxford_data/train_names.npy", allow_pickle=True)

## Compute SIFT Features:

In [12]:
# set the amount of words you want
n_clusters = 10

In [13]:
test_bovw, train_bovw = compute_SIFT(test_gray_images, train_gray_images, n_clusters)


Computing test SIFT features...
Percentage Completed: 100%

Computing train SIFT features...
Percentage Completed: 100%


  all_descriptors = np.vstack(i for i in stack)



Clustering Descriptors...
Time Elapsed: 0.04 min

Computing test BoVW...
(1/2) Percentage Completed: 100%
(2/2) Percentage Completed: 100%

Computing train BoVW...
(1/2) Percentage Completed: 100%
(2/2) Percentage Completed: 100%


In [43]:
# can apply PCA here (256 for best mAP)
test_bovw1, train_bovw1 = pca(train_bovw, test_bovw, 256)

ValueError: n_components=256 must be between 0 and min(n_samples, n_features)=100 with svd_solver='full'

In [14]:
# Compute metrics
print("\nComputing Metrics...")
AP, precisionsatk = image_retrieval_k(train_bovw, test_bovw, train_names, test_names, train_colour_images, test_colour_images, k=10, view_option=0, border_size=20)

# Display mAP
mAP = np.average(AP)
print("\nmAP =", mAP)


Computing Metrics...
Percentage Complete: 100%
mAP = 0.09853524550611947


In [37]:
train_names

['radcliffe_camera',
 'all_souls',
 'radcliffe_camera',
 'christ_church',
 'all_souls',
 'radcliffe_camera',
 'radcliffe_camera',
 'radcliffe_camera',
 'all_souls',
 'radcliffe_camera',
 'radcliffe_camera',
 'christ_church',
 'magdalen',
 'radcliffe_camera',
 'radcliffe_camera',
 'radcliffe_camera',
 'christ_church',
 'radcliffe_camera',
 'all_souls',
 'radcliffe_camera',
 'bodleian',
 'radcliffe_camera',
 'radcliffe_camera',
 'christ_church',
 'radcliffe_camera',
 'all_souls',
 'radcliffe_camera',
 'christ_church',
 'radcliffe_camera',
 'radcliffe_camera',
 'magdalen',
 'magdalen',
 'bodleian',
 'radcliffe_camera',
 'ashmolean',
 'hertford',
 'radcliffe_camera',
 'all_souls',
 'radcliffe_camera',
 'radcliffe_camera',
 'bodleian',
 'christ_church',
 'radcliffe_camera',
 'radcliffe_camera',
 'bodleian',
 'pitt_rivers',
 'radcliffe_camera',
 'ashmolean',
 'radcliffe_camera',
 'christ_church',
 'radcliffe_camera',
 'magdalen',
 'radcliffe_camera',
 'magdalen',
 'all_souls',
 'christ_churc