## Sift Evaluator

The objectives of this notebook are to:

- Define functions that query the data based on different parameters (distance metric, transformations?)
- Define functions to evaluate the truth of each returned query parameter
- Define functions to calculate mAP and precision@k for the above output
- Create a pipeline for evaluating the effects of different parameter set ups / transformations on mAP and p@k

______
# Evaluation Functions
____
### Imports

In [10]:
import numpy as np
import unittest
import sklearn.metrics.pairwise
import sklearn.preprocessing
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
import time
import glob
import copy
import my_eval
import sklearn.decomposition

In [3]:
data = {}

NOTEBOOK_DIR = "/home/sean/Code/Pawsey/4. Clean"


data = {}

for data_subset in ["oldenburger", "sutton"]:
    subdir = "./data/" + data_subset
    data[data_subset] = {}

    for descriptor in os.listdir(subdir):

        if descriptor == "names":
            data[data_subset][descriptor] = {"ox" : {}, "par" : {}}
            for fname in os.listdir(subdir + "/" + descriptor):
                split_name = fname[:-4].split("-")
                dataset = split_name[0]
                if fname.endswith("y-names.npy"):
                    data[data_subset][descriptor][dataset]["y"] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))
                else:
                    difficulty = split_name[2]
                    data[data_subset][descriptor][dataset][difficulty] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))

        else:
            data[data_subset][descriptor] = {"ox" : {"E" : {}, "M" : {}, "H" :{}},
                                "par" : {"E" : {}, "M" : {}, "H" :{}}}
            for fname in os.listdir(subdir + "/" + descriptor):
                split_name = fname[:-4].split("-")
                if len(split_name) == 3:
                    pass
                else:
                    (_, xy, dataset, difficulty) = split_name
                    data[data_subset][descriptor][dataset][difficulty][xy] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))
        
        print("Loaded " + descriptor)

data["oldenburger"]["swin"]["ox"]["E"]["x"]

Loaded swin
Loaded vgg
Loaded resnet
Loaded names
Loaded vit
Loaded names
Loaded sift-10k
Loaded sift-1k


array([[ 0.4381919 , -1.1369115 , -0.49100572, ..., -0.27456677,
         0.38102797, -0.30554023],
       [-0.19804995,  0.02098738,  0.52111053, ...,  0.44540596,
         0.8620084 ,  0.18907186],
       [ 1.0216093 , -0.06300209, -0.06569103, ...,  0.02202551,
        -0.32440802,  0.3858102 ],
       ...,
       [ 0.74518114, -0.9655011 , -0.55623275, ..., -0.39560622,
         0.3983633 , -0.4672271 ],
       [ 0.4493655 , -0.97439206, -0.61376625, ..., -0.19914342,
         0.27447924, -0.3482531 ],
       [ 0.04259995,  0.09633142,  0.65417933, ...,  0.5438953 ,
         0.53027916,  0.03832415]], dtype=float32)

____
### Query Functions

In [4]:
def basic_query(query, query_target, metric_function = sklearn.metrics.pairwise.euclidean_distances):
    """Return the indexes of the query_target images, arranged in ascending euclidean distance as compared to the query image"""
    
    query = query.reshape((1, -1))
    D = metric_function(query_target, query).squeeze()
    index = np.argsort(D)

    return(index)


In [37]:
## Functions to carry out basic and expanded queries

import sklearn.metrics.pairwise
import numpy as np
import diffusion

def return_ranks(method, queries, gallery, **kwargs):

    if method == "basic":
        ranks = np.array([basic_query(query, gallery, **kwargs) for query in queries])
        return(ranks)
    elif method == "diffusion":
        ranks = diffusion.diffusion_ranks(queries, gallery, **kwargs)
        return(ranks)
    elif method == "expanded query":
        ranks = np.array([qe_query(query, gallery, **kwargs) for query in queries])
        return(ranks)
    

def basic_query(query, query_target, metric_function = sklearn.metrics.pairwise.euclidean_distances):
    """Return the indexes of the query_target images, arranged in ascending euclidean distance as compared to the query image"""
    
    query = query.reshape((1, -1))
    D = metric_function(query_target, query).squeeze()
    index = np.argsort(D)

    return(index)

def qe_query(query, query_target, metric_function=sklearn.metrics.pairwise.euclidean_distances, type="qe baseline", n=5, alpha=1):
    """Run a query with query expansion, supported methods:
       - "qe baseline" : described in Total Recall (2007), new result is based on alpha proportion of requerying (e.g. alpha = 1,
                         then results after the top 5 will be completely determined by the top five"""
    
    original_results = basic_query(query, query_target, metric_function)

    if type == "qe baseline":
        # find top n results, combine top n into a new query, append results of new query to top n
        top_n_results = original_results[:n]
        second_query = np.average(query_target[top_n_results], axis=0)
        
        if alpha != 1:
            combined_queries = np.vstack([query, second_query])
            second_query = np.average([query, second_query], axis = 0, weights = [1-alpha, alpha])
            
        new_results = basic_query(second_query, query_target, metric_function)
        pruned_new_results = new_results[np.logical_not(np.isin(new_results, top_n_results))]
        results = np.concatenate([top_n_results, pruned_new_results])

        return(results)

    print("Something went wrong")

In [52]:
a = np.array([[0,0],[1,1],[2,2],[3,3]])
a[[2,1,3]]

array([[2, 2],
       [1, 1],
       [3, 3]])

In [35]:
q = data["oldenburger"]["swin"]["ox"]["E"]["y"]
t = data["oldenburger"]["swin"]["ox"]["E"]["x"]
q_n = data["oldenburger"]["names"]["ox"]["y"]
t_n = data["oldenburger"]["names"]["ox"]["E"]



In [47]:
ranks = return_ranks("expanded query", q, t, alpha = 0)
len(ranks)

70

In [43]:
my_eval.evaluate(ranks, q_n, t_n)

(0.8838282489082855,
 {1: 0.8857142857142857, 5: 0.7371428571428572, 10: 0.6728571428571429})