# **Mount**

Mount the google drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!pip install import-ipynb
%cd "/content/gdrive/MyDrive/progetto/code/"

Mounted at /content/gdrive
Collecting import-ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp36-none-any.whl size=2976 sha256=161ac2f21005663985bd27a6cf6d3649398d3c21d14ba7e24f2dcd471ec1afcf
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3
/content/gdrive/MyDrive/progetto/code


# **Compute AP at K**

## Average precision at K 

$AP@k= \frac{1}{N(k)} \sum_{i=1}^{k} \frac{TPseen(i)}{i}$


$TP = true positives$

$N(k) = min(k,TPtotal)$

*TPseen(i) = {0 if i°=FALSE; 1 if i°=TRUE}*

## Mean Average Precision at K

The mAP@k is the mean of AP@K obtained from different queries

$mAP@k= \frac{1}{N} \sum_{i=1}^{N} AP@k_i$

In [None]:
#compute AP@k
def compute_ap_at_k(ranked_relevance, k):
  #ranked_relevance is a boolean array of dim k. true if retrieved element is relevant for the query, false if not relevant 
  total_relevant_items = ranked_relevance.sum()

  # Precision@i for each query, i.e. the number of relevant items @i divided by i
  #cumsum [1,3,2]=>[1,4,6]
  prec_at_i = ranked_relevance.cumsum() / np.arange(1, k + 1) 

  # sum of prec_at_i only for relevant, divide for total number of relevant items 
  aps =  (ranked_relevance * prec_at_i).sum() / ( total_relevant_items + np.finfo(float).eps ) 
#  print(aps)

  return aps

def retrieve_query_label(results):
  label_results = []
  for result in results:
    label_results.append(result[1])
  return label_results

# **Setup and Pickles**

First, we define the paths where the pickles are located. Then, we load the features, the first block extracted using the predefined model, the second one using the finetuned model

In [None]:
import numpy as np
import pickle
from sklearn import metrics
import numpy as np

ids_path = '/content/gdrive/MyDrive/progetto/pickles/image_ids_complete.npy'
features_path = '/content/gdrive/MyDrive/progetto/pickles/image_features_complete.npy'
labels_path = '/content/gdrive/MyDrive/progetto/pickles/image_labels_complete.npy'
test_path = "/content/gdrive/MyDrive/progetto/pickles/testing_image_features.npy"
test_labels_path = "/content/gdrive/MyDrive/progetto/pickles/testing_image_labels.npy"

finetuned_ids_path = '/content/gdrive/MyDrive/progetto/pickles/image_ids_finetuning.npy'
finetuned_features_path = '/content/gdrive/MyDrive/progetto/pickles/image_features_finetuning.npy'
finetuned_labels_path = '/content/gdrive/MyDrive/progetto/pickles/image_labels_finetuning.npy'
finetuned_test_labels_path = "/content/gdrive/MyDrive/progetto/pickles/testing_image_labels_finetuning.npy"
finetuned_test_features_path = "/content/gdrive/MyDrive/progetto/pickles/testing_image_features_finetuning.npy"

print("Loading features and labels predefinited...")
training_ids = np.load(  ids_path  )
training_features = np.load( features_path )
training_labels = np.load( labels_path )
test_features = np.load(  test_path  )
test_labels = np.load( test_labels_path )
print( "features and labels loaded... ")
print("Loading features and labels finetuned...")
finetuned_training_ids = np.load(  finetuned_ids_path  )
finetuned_training_features = np.load( finetuned_features_path )
finetuned_training_labels = np.load( finetuned_labels_path )
finetuned_test_features = np.load(  finetuned_test_features_path  )
finetuned_test_labels = np.load( finetuned_test_labels_path )
print( "features and labels loaded... ")

Loading features and labels predefinited...
features and labels loaded... 
Loading features and labels finetuned...
features and labels loaded... 


# **kNN**

We use the kNN implemented by scikit as a performance comparison for our indexes. One is constructed with the features of the predefined model, one with the ones of the finetuned model; for each one of them we print the mAP and average search time to perform a query with 10 results

In [None]:
from sklearn.neighbors import NearestNeighbors
import time
import numpy as np

nbrs = NearestNeighbors(n_neighbors=10).fit( training_features )
startTime = time.perf_counter()
distances, indices = nbrs.kneighbors( test_features )
processTime = time.perf_counter() - startTime

ap_vect = []

for i in range(len(test_features)):
  predicted_class = [ training_labels[j] for j in indices[i] ]
  actual_class = test_labels[i]
  rank_rel = []

  for p in predicted_class:
    if actual_class == p:
      res = 1
    else:
      res = 0
    rank_rel.append(res)
 
  ap = compute_ap_at_k(np.array( rank_rel ), 10 )
  ap_vect.append(ap)

map = np.mean( np.array(ap_vect) )
print("MAP:"+ str(map))
meanTime = processTime/len(test_labels)
print("Average search time:" + str(meanTime))

MAP:0.44406272843956934
Average search time:0.01076135451741433


In [None]:
nbrs = NearestNeighbors(n_neighbors=10,algorithm='auto').fit( finetuned_training_features )
startTime = time.perf_counter()
distances, indices = nbrs.kneighbors( finetuned_test_features )
processTime = time.perf_counter() - startTime
ap_vect = []

for i in range(len(finetuned_test_features)):
  predicted_class = [ finetuned_training_labels[j] for j in indices[i] ]
  actual_class = test_labels[i]
  rank_rel = []

  for p in predicted_class:
    if actual_class == p:
      res = 1
    else:
      res = 0
    rank_rel.append(res)
 
  ap = compute_ap_at_k(np.array( rank_rel ), 10 )
  ap_vect.append(ap)

map = np.mean( np.array(ap_vect) )
print("MAP:"+ str(map))
meanTime = processTime/len(test_labels)
print("Average search time:" + str(meanTime))

MAP:0.6336122829032131
Average search time:0.01160106087935358
