# Reranking Test Notebook

__Goals__
- Implement nqe
- Implement alpha query expansion
- Implement diffusion

____
## Imports

In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics
import unittest
import sklearn.preprocessing
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
import time
import glob
import copy

_____
## Metrics Functions

In [2]:
def basic_query(query_image_features, query_target_features, metric_function = sklearn.metrics.pairwise.euclidean_distances, aqe = False):
    """Return the indexes of the query_target images, arranged in ascending euclidean distance as compared to the query image"""
    
    query = query_image_features.reshape((1, -1))
    D = metric_function(query_target_features, query).squeeze()
    index = np.argsort(D)

    return(index)

In [3]:
def query_results_to_truth_values(query_image_building, query_results, image_names):
    """Convert the index results of a query to an array of booleans corresponding to whether the correct image was retrieved."""
    return([query_image_building == image_names[index] for index in query_results])

In [4]:
def precision_at_k(truth_values, k, warnings=True):
    """Return proportions of true values in the first k elements.
    If warnings=True and all true values occur before the kth element, raise an error"""
    p_at_k = truth_values[:k].count(True) / k

    if warnings:
        if k < len(truth_values):
            if truth_values[k:].count(True) == 0:
                raise ValueError("All true values are before the first k values")
    
    return(p_at_k)


def average_precision(truth_values):
    """Given a boolean input of whether returned query values are correct or false, return the average precision.
    e.g. average_precision([True, True, False, True]) ~ 0.85
    """
    precisions = []
    for (index, val) in enumerate(truth_values):
        if val: # == True
            precisions.append(truth_values[:index + 1].count(True) / (index + 1))      

    return(np.mean(precisions))

In [5]:
def compute_metrics(train_features, test_features, train_names, test_names, query_function, metric_function, average_mean_precision = True, k_values = [5,10,20]):
    """Run each test feature against the train features,"""
    average_precisions = []
    precisions_at_k = {}
    for k in k_values:
        precisions_at_k[k] = []
    
    for (test_feature, test_feature_name) in zip(test_features.iterrows(), test_names):
        query_image_features = test_feature[1].values # extract the values for the iterrows row object
        query_results = query_function(query_image_features, train_features, metric_function)
        truth_values = query_results_to_truth_values(test_feature_name, query_results, train_names)
        
        average_precisions.append(average_precision(truth_values))
        for k in precisions_at_k:
            p_at_k = precision_at_k(truth_values, k, warnings=False)
            precisions_at_k[k].append(p_at_k)
    
    for (k_value, list_of_precisions) in precisions_at_k.items():
        precisions_at_k[k_value] = np.mean(list_of_precisions)
    
    return(np.mean(average_precisions), precisions_at_k)

____
## Query Expansion

In [15]:
def new_expanded_query(original_query_results, query_target_features, type = "naive", n = 5):
    """Return an expanded query to based on the top n results of the initial query results."""
    
    top_n_features = query_target_features.loc[:n]

    if type == "n":
        return(pd.DataFrame([a.apply(np.mean)]).values)

    else:
        if type == "alpha":
            raise ValueError("Alpha query expansion not implemented")
        elif type == "linear1":
            weights = [(n-i)/n for i in range(n)]
        elif type == "linear2":
            weights = [(n-i)/n + 1 for i in range(n)]
        elif type == "fractional":
            weights = [1/i for i in range(1, n+1)]
        
        return(pd.DataFrame([top_n_features.apply(np.average, weights = weights)]))

In [3]:
test_features = pd.DataFrame({1 : [1,2,3,4],
                              2 : [1,2,3,4],
                              3 : [1,2,3,4]})
results = [2,0,1,3]

n = 2

a = test_features.loc[results[:2]]
a

Unnamed: 0,1,2,3
2,3,3,3
0,1,1,1


In [4]:
b = pd.DataFrame([a.apply(np.mean)]).values
b

array([[2., 2., 2.]])

In [14]:
b2 = pd.DataFrame([a.apply(np.average, weights = [2,1])]).values
b2

array([[2.33333333, 2.33333333, 2.33333333]])

In [77]:
c = np.array(a.apply(np.mean))
type(c)

numpy.ndarray

In [78]:
type(b.values)

numpy.ndarray

In [81]:
np.shape(b.values)

(1, 3)

In [76]:
basic_query(c, test_features)

ValueError: Expected 2D array, got 1D array instead:
array=[2. 2. 2.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [67]:
type(b.values)

numpy.ndarray

In [42]:
def basic_query(query_image_features, query_target_features, metric_function = sklearn.metrics.pairwise.euclidean_distances):
    """Return the indexes of the query_target images, arranged in ascending euclidean distance as compared to the query image"""
    
    query = query_image_features.reshape((1, -1))
    D = metric_function(query_target_features, query_image_features).squeeze()
    index = np.argsort(D)

    return(index)

In [48]:
a = test_features.loc[1].values.reshape((1,-1))
a

array([[2, 2, 2]])

In [50]:
D = sklearn.metrics.euclidean_distances(a, test_features).squeeze()
D

array([1.73205081, 0.        , 1.73205081, 3.46410162])

In [51]:
basic_query(a, test_features)

array([1, 0, 2, 3])