# Compute Results of Analysis

Combine SIFT, resnet, swin, vgg and vit features with basic distance analysis, query expansion and diffusion.

____
## Imports and Constants

In [1]:
import numpy as np
import pandas as pd
import os
import sklearn.metrics.pairwise
import my_eval
import query

###########################################

NOTEBOOK_DIR = "/home/sean/Code/Pawsey/4. Clean"

_____
## Load Data

In [2]:
## Load features

data = {}

for data_subset in ["oldenburger", "sutton"]:
    subdir = "./data/" + data_subset
    data[data_subset] = {}

    for descriptor in os.listdir(subdir):

        if descriptor == "names":
            data[data_subset][descriptor] = {"ox" : {}, "par" : {}}
            for fname in os.listdir(subdir + "/" + descriptor):
                split_name = fname[:-4].split("-")
                dataset = split_name[0]
                if fname.endswith("y-names.npy"):
                    data[data_subset][descriptor][dataset]["y"] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))
                else:
                    difficulty = split_name[2]
                    data[data_subset][descriptor][dataset][difficulty] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))

        else:
            data[data_subset][descriptor] = {"ox" : {"E" : {}, "M" : {}, "H" :{}},
                                "par" : {"E" : {}, "M" : {}, "H" :{}}}
            for fname in os.listdir(subdir + "/" + descriptor):
                split_name = fname[:-4].split("-")
                if len(split_name) == 3:
                    pass
                else:
                    (_, xy, dataset, difficulty) = split_name
                    data[data_subset][descriptor][dataset][difficulty][xy] = np.load("./data/{}/{}/{}".format(data_subset, descriptor, fname))
        
        print("Loaded " + descriptor)

data["oldenburger"]["swin"]["ox"]["E"]["x"]

Loaded swin
Loaded vgg
Loaded resnet
Loaded names
Loaded vit
Loaded names
Loaded sift-10k
Loaded sift-1k


array([[ 0.4381919 , -1.1369115 , -0.49100572, ..., -0.27456677,
         0.38102797, -0.30554023],
       [-0.19804995,  0.02098738,  0.52111053, ...,  0.44540596,
         0.8620084 ,  0.18907186],
       [ 1.0216093 , -0.06300209, -0.06569103, ...,  0.02202551,
        -0.32440802,  0.3858102 ],
       ...,
       [ 0.74518114, -0.9655011 , -0.55623275, ..., -0.39560622,
         0.3983633 , -0.4672271 ],
       [ 0.4493655 , -0.97439206, -0.61376625, ..., -0.19914342,
         0.27447924, -0.3482531 ],
       [ 0.04259995,  0.09633142,  0.65417933, ...,  0.5438953 ,
         0.53027916,  0.03832415]], dtype=float32)

_____
## Evaluate Methods

In [5]:
r = {key : [] for key in ["feature_type", "dataset", "difficulty", "query type", "mAP", "alpha", "diffusion scalar"]}
r

{'feature_types': [],
 'datasets': [],
 'difficulties': [],
 'query_types': [],
 'm_APs': [],
 'alpha': [],
 'diffusion_scalar': []}

In [6]:
lis = [1,2,3]
lis += [5] * 3
lis

[1, 2, 3, 5, 5, 5]

In [13]:
# set parameters
kappas = [1,5,10]
alphas = [0.25, 0.5, 0.75, 1]
distance_metrics = {"euclidean" : sklearn.metrics.pairwise.euclidean_distances, "cosine" : sklearn.metrics.pairwise.cosine_distances}

# evaluate data
r = {key : [] for key in ["feature_type", "dataset", "difficulty", "query type", "alpha", "diffusion scalar", "distance metric", "mAP"]}
for kappa in kappas:
    r["precision at " + str(kappa)] = []

for data_split in data:
    for feature in data[data_split]:
        
        if feature == "names":
            continue
        
        for dataset in ["ox", "par"]:
            for (difficulty, dat) in data[data_split][feature][dataset].items():

                print("Processing {} {} {}".format(dataset, difficulty, feature))

                queries = dat["y"]
                gallery = dat["x"]
                query_names = data[data_split]["names"][dataset]["y"]
                gallery_names = data[data_split]["names"][dataset][difficulty]

                # Compute basic query and expanded query

                for (metric_name, metric_function) in distance_metrics.items():
                    n_tests = len(alphas) + 1
                    r["feature_type"] += [feature] * n_tests
                    r["dataset"] += [dataset] * n_tests
                    r["difficulty"] += [difficulty] * n_tests
                    r["query type"] += ["basic"] + ["expanded"] * len(alphas)
                    r["alpha"] += [np.nan] + alphas
                    r["distance metric"] += [metric_name] * n_tests
                    r["diffusion scalar"] += [np.nan] * n_tests

                    basic_ranks = query.return_ranks('basic', queries, gallery, metric_function = metric_function)
                    m_ap, ps = my_eval.evaluate(basic_ranks, query_names, gallery_names, kappas)
                    r["mAP"].append(m_ap)
                    for (kappa, p) in ps.items():
                        r["precision at " + str(kappa)].append(p)
                    
                    for alpha in alphas:
                        expansion_ranks = query.return_ranks('expanded', queries, gallery, alpha=alpha, metric_function = metric_function)
                        m_ap, ps = my_eval.evaluate(expansion_ranks, query_names, gallery_names, kappas)
                        r["mAP"].append(m_ap)
                        for (kappa, p) in ps.items():
                            r["precision at " + str(kappa)].append(p)
                
                # Compute diffusion query

                r["feature_type"].append(feature)
                r["dataset"].append(dataset)
                r["difficulty"].append(difficulty)
                r["query type"].append("diffusion")
                r["alpha"].append(np.nan)
                r["distance metric"].append(np.nan)

                try:
                    diffusion_ranks = query.return_ranks('diffusion', queries, gallery, cache_dir = "./tmp/{}_{}_{}".format(dataset, feature, difficulty))
                    r["diffusion scalar"].append(1)
                except ValueError: # caused by the values being too small
                    try:
                        print("CRASH! Running diffusion x10.")
                        diffusion_ranks = query.return_ranks('diffusion', queries * 10, gallery * 10, cache_dir = "./tmp/{}_{}_{}_x10".format(dataset, feature, difficulty))
                        r["diffusion scalar"].append(10)
                    except ValueError: # caused by the values being too small
                        print("CRASH! Running diffusion x50.")
                        diffusion_ranks = query.return_ranks('diffusion', queries * 50, gallery * 50, cache_dir = "./tmp/{}_{}_{}_x50".format(dataset, feature, difficulty))
                        r["diffusion scalar"].append(10)

                m_ap, ps = my_eval.evaluate(diffusion_ranks, query_names, gallery_names, kappas)
                r["mAP"].append(m_ap)
                for (kappa, p) in ps.items():
                    r["precision at " + str(kappa)].append(p)

results=pd.DataFrame(r)
                    

Processing ox E swin
[cache] loading ./tmp/ox_swin_E/offline.jbl costs 0.00s
Processing ox M swin
[cache] loading ./tmp/ox_swin_M/offline.jbl costs 0.00s
Processing ox H swin
[cache] loading ./tmp/ox_swin_H/offline.jbl costs 0.00s
Processing par E swin
[cache] loading ./tmp/par_swin_E/offline.jbl costs 0.00s
Processing par M swin
[cache] loading ./tmp/par_swin_M/offline.jbl costs 0.01s
Processing par H swin
[cache] loading ./tmp/par_swin_H/offline.jbl costs 0.00s
Processing ox E vgg
[cache] loading ./tmp/ox_vgg_E/offline.jbl costs 0.00s
Processing ox M vgg
[cache] loading ./tmp/ox_vgg_M/offline.jbl costs 0.00s
Processing ox H vgg
[cache] loading ./tmp/ox_vgg_H/offline.jbl costs 0.00s
Processing par E vgg
[cache] loading ./tmp/par_vgg_E/offline.jbl costs 0.00s
Processing par M vgg
[cache] loading ./tmp/par_vgg_M/offline.jbl costs 0.00s
Processing par H vgg
[cache] loading ./tmp/par_vgg_H/offline.jbl costs 0.00s
Processing ox E resnet
[cache] loading ./tmp/ox_resnet_E/offline.jbl costs 0

In [15]:
#results.to_csv("./results/first_full_set.csv", index=False)