In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from tqdm import tqdm
from math import floor


def minmax_norm_numpy(data):
    # Function for min-max normalizing a vector/array
    # Calculate the minimum and maximum values in the array
    min_val = data.min()
    max_val = data.max()

    # Perform min-max normalization
    normalized_data = (data - min_val) / (max_val - min_val)
    return normalized_data

def stand_numpy(data):
    # Function for standardizing a vector/array based on mean and standard deviation
    # Calculate the mean and standard deviation of the array
    mean_val = data.mean()
    std_dev = data.std()

    # Perform standardization
    standardized_data = (data - mean_val) / std_dev
    return standardized_data

def getMatchInds(ft_qry, ft_ref, metric='cosine'):
    # Function for performing VPR matching
    # metric: 'euclidean' or 'cosine'

    dMat = cdist(ft_ref,ft_qry,metric)
    mInds = np.argsort(dMat,axis=0)[:5]
    return dMat, mInds

In [None]:
################################ This cell sets up some data variables #############################
dataset = 'Dataset_Name'

# Load VPR query and reference descriptors/features
query_feats = np.load('Path to Query Features')
ref_feats = np.load('Path to Reference Features')

# Load ground truth information
# This should have a shape of M x 2, where:
# M = number of query images;
# The first column contains the query index;
# The second column contains a list for each query of the reference image indices
#  which correspond to matching reference images
gt = np.load('Path to Ground Truth Info', allow_pickle=True)

# Store just the column containing matching reference indices for each query
gt_match_idxs = gt[:,1]

# Scale factor between query and reference if there is a difference in sampling
ref_scaler = 1
# Size of the coarse position prior (in number of images)
sect_len = 75
# Step size when iterating through the dataset
step_scaler = 1

diff_type = 'Absolute'

# List of the sequence lengths which are being tested
sequence_list = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21]
# Extra list to include case of single query
sequence_list_labels = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]

In [None]:
############# This cell computes the baseline VPR performance and appearance variation vectors ###########################
print('Calculating baselines')

# Compute the base VPR distance matrix and proposed matches
dMat, mInds = getMatchInds(query_feats, ref_feats, metric='cosine')


section_mInds = np.zeros((query_feats.shape[0],)).astype(int)
section_all_mInds = np.zeros((query_feats.shape[0],10)).astype(int)

coarse_prior_appearance_var = np.zeros((floor(query_feats.shape[0]/sect_len)*step_scaler, query_feats.shape[1]))
# Create a variable for tracking the brute for check of recall per chunk per sequence length
section_seq_avg_recall = np.zeros((len(sequence_list)+1, floor(query_feats.shape[0]/sect_len)*step_scaler))

# Iterating through the dataset using different center images for the coarse position prior
for k in tqdm(range(0,query_feats.shape[0]-sect_len,int(sect_len/step_scaler))):

    # Retrieve the distance matrix, ground truth match indices and VPR match indices
    #  for the particular section/chunk of the dataset
    sect_dMat = dMat[k*ref_scaler:(k+sect_len)*ref_scaler, k:k+sect_len]
    sect_gt = gt_match_idxs[k:k+sect_len]
    sect_mInds = np.argsort(sect_dMat,axis=0)[:10]

    
    # Adjust the match indices to be with respect to the entire
    #  reference database instead of the particular chunk
    sect_mInds = sect_mInds+(k*ref_scaler)
    section_mInds[k:k+sect_len] = sect_mInds[0,:]
    section_all_mInds[k:k+sect_len,:] = sect_mInds.T

    # Retrieve reference features for places within the coarse position prior
    section_ref_feats = ref_feats[k*ref_scaler:(k+sect_len)*ref_scaler]
    # Add new axis for vectorising computation of mean differences
    reshaped_ref_feats = section_ref_feats[:,np.newaxis,:]
    # Compute the feature-wise mean difference between reference places in the coarse position prior
    sect_ref_feats_diff = np.mean(reshaped_ref_feats - section_ref_feats, axis=1)

    coarse_prior_appearance_var[int((k/sect_len)*step_scaler), :] = np.std(sect_ref_feats_diff, axis=0)

    ############## Evaluate the recall the coarse position prior area ###########################

    rec_count = 0
    match_idx = 0
    for j in range(sect_len):
        if sect_mInds[0,j] in sect_gt[j]:
            rec_count += 1
    section_seq_avg_recall[0,int((k/sect_len)*step_scaler)] = rec_count/sect_len


In [None]:
####################### This cell computes VPR performance for different sequence lengths #############################
print('Calculating sequences')
print('Starting VPR')

for idx, n in enumerate(sequence_list):

    # Load the distance matrix for VPR using the sequence length n
    dMat = np.load('Path to sequence-based distance matrix for seq len n'.format(n))
    mInds = np.argsort(dMat,axis=0)[:5]

    for k in tqdm(range(0,query_feats.shape[0]-sect_len,int(sect_len/step_scaler))):

        # Retrieve the distance matrix, ground truth match indices and VPR match indices
        #  for the particular sequence length and section/chunk of the dataset
        sect_dMat = dMat[k*ref_scaler:(k+sect_len)*ref_scaler, k:k+sect_len]
        sect_gt = gt_match_idxs[k:k+sect_len]
        sect_mInds = np.argsort(sect_dMat,axis=0)[:5]

        # Adjust the match indices to be with respect to the entire
        #  reference database instead of the particular chunk
        sect_mInds = sect_mInds+(k*ref_scaler)

        rec_count = 0
        for j in range(sect_len):
            if sect_mInds[0,j] in sect_gt[j]:
                rec_count += 1
        section_seq_avg_recall[idx+1,int((k/sect_len)*step_scaler)] = rec_count/sect_len

In [None]:
################# This cell computes the min sequence length for the desired recall in each dataset chunk ##################
# Setup a variable for storing the minimum sequence length values
min_seq_len_for_target = np.zeros((floor(query_feats.shape[0]/sect_len)*step_scaler,))
# This is the nominated minimum recall performance target
target_recall_performance = 0.75

# Here we plot the recall in each dataset chunk for increasing sequence lengths
#  This is useful for tuning the desired recall performance
f, ax = plt.subplots()
for n in range(0,floor(query_feats.shape[0]/sect_len)*step_scaler):
    ax.plot(np.arange(1,22,2).astype(int), section_seq_avg_recall[:, n]*100, label='{}'.format(n))
plt.legend()
plt.axhline(target_recall_performance*100, color='k', linestyle='--')
plt.xlabel('Sequence Length', fontdict={"size":13})
plt.ylabel('Recall @ 1 (\%)', fontdict={"size":13})
plt.ylim(0, 100)
plt.title('Sequence Length Sweep'.format(dataset))
plt.xticks(sequence_list_labels)

# Plot the min sequence length for each dataset chunk which exceeds the target performance
#  or the one which maximises recall in the case it does not achieve the target
f, ax = plt.subplots()
f.set_size_inches(4.5, 3.5)
for n in range(floor(query_feats.shape[0]/sect_len)*step_scaler):
    vals = section_seq_avg_recall[:, n]-target_recall_performance # abs()
    if np.all(vals < 0):
        vals = abs(vals)
    else:
        vals[vals < 0] = 1
    min_seq_len_for_target[n] = sequence_list_labels[vals.argmin()]
    ax.scatter(sequence_list_labels[vals.argmin()], section_seq_avg_recall[vals.argmin(), n]*100)
plt.axhline(target_recall_performance*100, color='k', linestyle='--')
plt.xlabel('Sequence Length', fontdict={"size":13})
plt.ylabel('Recall @ 1 (\%)', fontdict={"size":13})
ax.set_yticks([])
plt.ylim(0, 100)
plt.xticks(sequence_list_labels)
plt.title(r'Sequence Lengths for Target Recall ($s_{c}$)')

########################## Here is where you would save data for use in the multivariate-NN-regression #################
########################## Variables: min_seq_len_for_target, and coarse_prior_appearance_var          ##################