# Compute Results of Analysis

Combine SIFT, resnet, swin, vgg and vit features with basic distance analysis, query expansion and diffusion.

____
## Imports and Constants

In [30]:
import numpy as np
import pandas as pd
import os
import sklearn.metrics.pairwise
import my_eval
import query

###########################################

NOTEBOOK_DIR = "/home/sean/Code/Pawsey/4. Clean"

_____
## Load Data

In [4]:
## Load features

data = {}

for descriptor in os.listdir("./data"):

    if descriptor == "names":
        data[descriptor] = {"ox" : {}, "par" : {}}
        for fname in os.listdir("./data/" + descriptor):
            split_name = fname[:-4].split("-")
            dataset = split_name[0]
            if fname.endswith("y-names.npy"):
                data[descriptor][dataset]["y"] = np.load("./data/{}/{}".format(descriptor, fname))
            else:
                difficulty = split_name[2]
                data[descriptor][dataset][difficulty] = np.load("./data/{}/{}".format(descriptor, fname))

    else:
        data[descriptor] = {"ox" : {"E" : {}, "M" : {}, "H" :{}},
                            "par" : {"E" : {}, "M" : {}, "H" :{}}}
        for fname in os.listdir("./data/" + descriptor):
            split_name = fname[:-4].split("-")
            if len(split_name) == 3:
                pass
            else:
                (_, xy, dataset, difficulty) = split_name
                data[descriptor][dataset][difficulty][xy] = np.load("./data/{}/{}".format(descriptor, fname))
    
    print("Loaded " + descriptor)

data["swin"]["ox"]["E"]["x"]

Loaded swin
Loaded vgg
Loaded resnet
Loaded names
Loaded vit


array([[ 0.4381919 , -1.1369115 , -0.49100572, ..., -0.27456677,
         0.38102797, -0.30554023],
       [-0.19804995,  0.02098738,  0.52111053, ...,  0.44540596,
         0.8620084 ,  0.18907186],
       [ 1.0216093 , -0.06300209, -0.06569103, ...,  0.02202551,
        -0.32440802,  0.3858102 ],
       ...,
       [ 0.74518114, -0.9655011 , -0.55623275, ..., -0.39560622,
         0.3983633 , -0.4672271 ],
       [ 0.4493655 , -0.97439206, -0.61376625, ..., -0.19914342,
         0.27447924, -0.3482531 ],
       [ 0.04259995,  0.09633142,  0.65417933, ...,  0.5438953 ,
         0.53027916,  0.03832415]], dtype=float32)

In [27]:
q = data["swin"]["ox"]["E"]["y"]
t = data["swin"]["ox"]["E"]["x"]
q_n = data["names"]["ox"]["y"]
t_n = data["names"]["ox"]["E"]

_____
## Evaluate Methods

In [24]:
basic_query(q[0], t)

array([265, 462,  78, 239, 283, 259, 442, 491, 213, 276, 216, 186, 109,
        89,  42, 504, 296, 430, 395, 311, 473,   2,  55,   9, 278,  59,
       154, 315, 236,  19, 187, 507,  79, 150, 329, 170, 164, 306, 116,
       330, 160, 237, 128, 481, 240, 197, 434, 292, 417, 456, 357, 255,
       264, 143, 231, 408, 342, 336, 122, 157, 181, 506, 402, 362, 485,
       438,  69, 502, 453, 335,  95,  68, 378, 377,  10, 183, 431, 423,
       131, 476,  40,  28,  18, 215, 379, 286, 155, 436, 256, 433, 121,
       267, 439, 169,  76, 416, 429, 420, 129, 167, 250, 141, 367, 235,
       279, 225, 258, 176,  90,  11, 410, 104, 253,  98, 112,  32, 196,
         8, 117, 372, 271, 165, 352, 412, 201,  24, 406, 405, 515, 174,
       359, 111, 460, 508,  92, 448, 304, 313, 302, 223, 152, 133, 397,
       212,  25, 204, 274, 468, 214, 301,  34, 219, 173, 270, 388, 317,
       452,   1, 389, 409, 445, 199, 320, 370, 385,  63, 366, 178, 499,
       188, 501, 321, 483, 280, 455, 142,  70, 149, 490, 404, 20

In [28]:
def return_ranks(method, queries, gallery, **kwargs):
    
    if method == "basic":
        ranks = np.array([basic_query(query, gallery, **kwargs) for query in queries])
        return(ranks)

a = return_ranks("basic", q, t)
a
        

array([[265, 462,  78, ..., 450, 228, 272],
       [ 79, 507, 434, ..., 450, 228, 272],
       [442, 259, 462, ..., 450, 228, 272],
       ...,
       [213, 265, 442, ..., 450, 228, 272],
       [299, 464, 291, ...,  81, 228, 272],
       [507,  79, 434, ..., 228,  81, 272]])

In [29]:
evaluate(a, q_n, t_n)

(0.8838282489082855,
 {1: 0.8857142857142857, 5: 0.7371428571428572, 10: 0.6728571428571429})

In [18]:
def basic_query(query, query_target, metric_function = sklearn.metrics.pairwise.euclidean_distances):
    """Return the indexes of the query_target images, arranged in ascending euclidean distance as compared to the query image"""
    
    query = query.reshape((1, -1))
    D = metric_function(query_target, query).squeeze()
    index = np.argsort(D)

    return(index)

def query_results_to_truth_values(query_image_building, query_results, image_names):
    """Convert the index results of a query to an array of booleans corresponding to whether the correct image was retrieved."""
    return([query_image_building == image_names[index] for index in query_results])

def precision_at_k(truth_values, k, warnings=True):
    """Return proportions of true values in the first k elements.
    If warnings=True and all true values occur before the kth element, raise an error"""
    p_at_k = truth_values[:k].count(True) / k

    # if warnings:
    #     if k < len(truth_values):
    #         if truth_values[k:].count(True) == 0:
    #             raise ValueError("All true values are before the first k values")
    
    return(p_at_k)


def average_precision(truth_values):
    """Given a boolean input of whether returned query values are correct or false, return the average precision.
    e.g. average_precision([True, True, False, True]) ~ 0.85
    """
    precisions = []
    for (index, val) in enumerate(truth_values):
        if val: # == True
            precisions.append(truth_values[:index + 1].count(True) / (index + 1))      

    return(np.mean(precisions))