# Codebase

In [None]:
!pip install faiss-cpu 
!pip install faiss-gpu 
!pip install pytorch_metric_learning

In [None]:
# Import libraries
import cv2
import faiss
import faiss.contrib.torch_utils
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
import random

from collections import defaultdict
from itertools import product
from PIL import Image,ImageDraw, ImageFont
from prettytable import PrettyTable
from pytorch_metric_learning import miners, losses
from pytorch_metric_learning.distances import CosineSimilarity, DotProductSimilarity
from sklearn.neighbors import NearestNeighbors
from skimage.transform import rescale
from torch.optim import SGD, Adam, AdamW, ASGD, RMSprop
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import datasets, transforms, models
from tqdm import tqdm

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
# Set all manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)

### Load Datasets


In [None]:
# Transformations on images
transform = transforms.Compose([           #DA RIGUARDARE DATA AUGMENTATION (VEDERE LINK CHE DEVE MANDARE TRIV)
    # transforms.RandAugment(num_ops=3),  # applica tre operazioni di aumento casuale all’immagine
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Implementation of the train class 

class TrainDataset(Dataset):
    def __init__(self, root_dir, transform, img_per_place= 4):
        self.root_dir = root_dir
        self.transform = transform
        self.img_per_place= img_per_place #number of images to keep for the same id
        self.place_paths = defaultdict(list)

        # Iteration loop trough all the directories of cities
        for city_dir in os.listdir(root_dir):
            city_path = os.path.join(root_dir, city_dir)

            # Check if it's a directory
            if os.path.isdir(city_path):
                # Iteration loop trough all the images of a city
                for filename in os.listdir(city_path):
                    img_path = os.path.join(city_path, filename)
                    place_id = img_path.split("@")[-2]
                    self.place_paths[place_id].append(img_path)
                    
        for place_id in list(self.place_paths.keys()):
            paths_place_id = self.place_paths[place_id]
            #keep only the places that have at least a minimum of 4 images per id
            if len(paths_place_id) < 4: 
                del self.place_paths[place_id]
        self.places_ids = sorted(list(self.place_paths.keys()))
                 
                    
    def __getitem__(self, idx):
        place_id = self.places_ids[idx]
        paths_place_id = self.place_paths[place_id]
        #keep 4 random paths for each id
        chosen_paths = np.random.choice(paths_place_id, self.img_per_place)         
        images = [Image.open(path).convert('RGB') for path in chosen_paths]
        images = [self.transform(img) for img in images]
        return torch.stack(images), torch.tensor(idx).repeat(self.img_per_place), place_id
    
    def __len__(self):
        return len(self.places_ids)

In [None]:
# Implementation of the evaluation classes (validation,test)

class EvalDataset(data.Dataset):
    def __init__(self, root_dir,type_of_set,transform):
        self.root_dir = root_dir
        self.type_of_set = type_of_set
        self.transform = transform

        if (type_of_set != 'val') and (type_of_set != 'test'):
            raise ValueError(f"Type of set not valid,try 'val' or 'test'")
        else:
            path_directory = os.path.join(root_dir,type_of_set)
            database_dir = os.path.join(path_directory,'database')
            queries_dir = os.path.join(path_directory, 'queries')

        self.database_paths = []
        for filename in os.listdir(database_dir):
            img_path = os.path.join(database_dir, filename)
            self.database_paths.append(img_path)
        self.queries_paths = []
        for filename in os.listdir(queries_dir):
            img_path = os.path.join(queries_dir, filename)
            self.queries_paths.append(img_path)

        self.database_coordinates = np.array \
            ([(path.split("@")[1], path.split("@")[2]) for path in self.database_paths]).astype(float)
        
        self.queries_coordinates = np.array\
            ([(path.split("@")[1], path.split("@")[2]) for path in self.queries_paths]).astype(float)

        # Find positives_per_query, which are within positive_dist_threshold (default 25 meters)
        knn = NearestNeighbors(n_jobs=-1)
        knn.fit(self.database_coordinates)
        self.positives_per_query = knn.radius_neighbors(self.queries_coordinates,
                                                        radius=25,
                                                        return_distance=False)
        # Create a unique list to ease the __getitem__
        self.all_images_paths = [path for path in self.database_paths]
        self.all_images_paths += [path for path in self.queries_paths]

        self.database_num = len(self.database_paths)
        self.queries_num = len(self.queries_paths)


    def __getitem__(self, idx):
        image_path = self.all_images_paths[idx]
        image = self.transform(Image.open(image_path).convert('RGB'))
        return image, idx

    def __len__(self):
        return len(self.all_images_paths)
    
    #forse si potrebbe togliere
    def __repr__(self):
        return f" <{self.type_of_set}; - #q: {self.queries_num}; #db: {self.database_num} >"

    def get_positives(self):
        return self.positives_per_query

In [None]:
# Training loading
root_dir_train = '/kaggle/input/gsv-xs/gsv_xs/train'
dataset_train = TrainDataset(root_dir=root_dir_train, transform=transform)
dataloader_train = data.DataLoader(dataset_train, batch_size=64, shuffle=True)

In [None]:
#Validation loading
root_dir_eval = '/kaggle/input/sf-xs/sf_xs'
dataset_val = EvalDataset(root_dir=root_dir_eval, type_of_set= 'val', transform=transform)
dataloader_val = data.DataLoader(dataset_val, batch_size=64, shuffle=False)

In [None]:
#Test loading

#SF-XS
root_dir_eval = '/kaggle/input/sf-xs/sf_xs'
dataset_test = EvalDataset(root_dir=root_dir_eval, type_of_set= 'test', transform=transform)
dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False)

#Tokyo-xs
root_dir_tokyo = '/kaggle/input/tokyo-xs/tokyo_xs'
dataset_tokyo = EvalDataset(root_dir=root_dir_tokyo, type_of_set= 'test', transform=transform)
dataloader_tokyo = data.DataLoader(dataset_tokyo, batch_size=64, shuffle=False)

### First Visualizations

da fare file separato e migliorare

In [None]:
# Function to denormalize image for visualization
def denormalize(image):
    image = image.to('cpu').numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = image * std + mean
    image = np.clip(image, 0, 1)
    return image

In [None]:
# # Train images 
# batch_count = 0
# for images, labels, place_id in dataloader_train:
#     print(f"Batch:{batch_count}")
#     # Assicurati di non superare il numero di immagini nel batch
#     num_images = min(32, len(images))
#     # Denormalizza e visualizza le immagini nel batch
#     for i in range(num_images):
#         label = place_id[i]
#         for j in range(4):
#             img = denormalize(images[i][j])  # Denormalizza l'immagine i-esima
#             plt.figure()
#             plt.imshow(img)
#             plt.title(f'ID: {label}')
#             plt.show()

#     # Incrementa il contatore del batch
#     batch_count += 1

#     # Se vuoi limitare il numero di batch visualizzati
#     if batch_count >= 10:  # Mostra solo i primi 10 batch
#         break

In [None]:
# Val and test images 
# batch_count = 0
# for images, labels in dataloader_tokyo:
#     print(f"Batch:{batch_count}")
#     # Assicurati di non superare il numero di immagini nel batch
#     num_images = min(32, len(images))
#     # Denormalizza e visualizza le immagini nel batch
#     for i in range(num_images):
#         img = denormalize(images[i])  # Denormalizza l'immagine i-esima
#         plt.figure()
#         plt.imshow(img)
#         # plt.title(f'ID: {labels}')
#         plt.show()

#     # Incrementa il contatore del batch
#     batch_count += 1

#     # Se vuoi limitare il numero di batch visualizzati
#     if batch_count >= 10:  # Mostra solo i primi 10 batch
#         break

### Models

In [None]:
def get_backbone():                            # backbone_name è uno degli argomenti del programma
    backbone = torchvision.models.resnet18(pretrained=True)     # loading del modello già allenato
    for name, child in backbone.named_children():               # ritorna un iteratore che permette di iterare sui moduli nella backbone
                                                            # restituendo una tupla con nome e modulo per ogni elemen
            if name == "layer3":  # Freeze layers before conv_3
                break                                               # fa il freeze di tutti i layers precedenti in modo da non 
            for params in child.parameters():                       # perdere informazioni durante il transfer learning
                params.requires_grad = False                        # freeza i parametri del modello in modo che questi non cambino durante l'ottimizzazione   
        #logging.debug(f"Train only layer3 and layer4 of the {backbone_name}, freeze the previous ones")
    layers = list(backbone.children())[:7]                     # rimuove gli utlimi due layers della backbone (avg pooling and FC layer) in modo
                                                                    # da poterci attaccare i successivi del nuovo modello (aggregation)
    
    backbone = torch.nn.Sequential(*layers)                         # crea una backbone dopo la manipolazione dei layers
    
     # prende la dimensione corretta dell'utlimo layer in modo da poterla
                                                                    # mettere come dimensione di input per il linear layer successivo
    return backbone

In [None]:
class Flatten(nn.Module):                       # override della classe flatten
    def __init__(self):
        super().__init__()                      # restituisce un oggetto della classe parent, cioè Module
    
    def forward(self, x):
        assert x.shape[2] == x.shape[3] == 1, f"{x.shape[2]} != {x.shape[3]} != 1"  # si assicura che il tensore abbia la terza e quarta dimensione uguale ad 1
        return x[:, :, 0, 0]


class L2Norm(nn.Module):                        # least square error
    def __init__(self, dim=1):
        super().__init__()
        self.dim = dim                          # dimensione a cui va ridotto
    
    def forward(self, x):
        return F.normalize(x, p=2.0, dim=self.dim)  

In [None]:
class Avg_ResNet(nn.Module):                        # questa è la rete principale
    def __init__(self):            # l'oggetto della classe parent è creato in funzione della backbone scelta
        super(Avg_ResNet,self).__init__()
        self.backbone = get_backbone()
        self.aggregation = nn.Sequential(                   # container sequenziale di layers, che sono appunto eseguiti in sequenza come una catena
                # L2Norm(),                                   # questi sono le classi definite in layers
                nn.AdaptiveAvgPool2d((1, 1)),
                Flatten(),
                nn.Linear(256, 256),     # applica la trasformazione y = x @ A.T + b dove A sono i parametri della rete in quel punto 
                L2Norm()                                    # e b è il bias aggiunto se è passato bias=True al modello. I pesi e il bias sono inizializzati
            )                                               # random dalle features in ingresso
    
    
    def forward(self, x):
        x = self.backbone(x)                                # prima entra nella backbone
        x = self.aggregation(x)                             # e dopo entra nel container sequenziale
        return x

#model_avg = Avg_ResNet().cuda()

# Initialize the network
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs")
    model_avg = nn.DataParallel(Avg_ResNet())
    model_avg = model_avg.cuda()
else:
    model_avg = Avg_ResNet().cuda()
# print(model_gem)
# torch.save(model_avg.state_dict(), '/kaggle/working/initial_weights.pth')

In [None]:
# Gem pooling layer to obtain the final embedding

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
    
    #PROVARE DA TOGLIERE
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

gem_pool = GeM()

# Network with truncated ResNet-18 followed by gem pooling
class GeM_ResNet(nn.Module):
    def __init__(self):
        super(GeM_ResNet, self).__init__()
        # Load the pretrained ResNet-18 model
        self.backbone = get_backbone()
        self.aggregation = nn.Sequential(                   # container sequenziale di layers, che sono appunto eseguiti in sequenza come una catena
                # L2Norm(),                                   # questi sono le classi definite in layers
                gem_pool,
                Flatten(),
                nn.Linear(256, 256),     # applica la trasformazione y = x @ A.T + b dove A sono i parametri della rete in quel punto 
                # L2Norm()                                    # e b è il bias aggiunto se è passato bias=True al modello. I pesi e il bias sono inizializzati
            )                                               # random dalle features in ingresso

    def forward(self, x):
        x = self.backbone(x)                                # prima entra nella backbone
        x = self.aggregation(x)                             # e dopo entra nel container sequenziale
        return x    

# Initialize and print the new network
#model_gem = GeM_ResNet().cuda()

# Initialize the network
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs")
    model_gem = nn.DataParallel(GeM_ResNet())
    model_gem = model_gem.cuda()
else:
    model_gem = GeM_ResNet().cuda()
# print(model_gem)
# torch.save(model_gem.state_dict(), '/kaggle/working/initial_weights.pth')

In [None]:
# # Caricamento dei pesi salvati
# model_avg.load_state_dict(torch.load('/kaggle/input/pesiiii/model_weights.pth'))  

#### Proxy Mining

### Training loop

In [None]:
def knn_search(proxies, proxy_labels):
    informative_batches = []
    k=60
    while len(proxies) > k:
        # Create an index object with a flat L2 distance metric
        faiss_index = faiss.IndexFlatL2(proxies.shape[1])

        # Add the vectors to the index
        faiss_index.add(proxies)

        # Define a query vector
        query_vector = proxies[0]
        query_vector = np.reshape(query_vector, (1, -1))

        distances, indices = faiss_index.search(query_vector, k)
        indices_list = [indices[0][:][i] for i in range(k)]

        informative_batches_labels = [proxy_labels[idx] for idx in indices_list]
        informative_batches.append(informative_batches_labels)
        
        proxies = np.delete(proxies, indices_list, axis=0)
        proxy_labels = np.delete(proxy_labels, indices_list, axis=0)
    
    return informative_batches

In [None]:
# PROXY
def training_loop(epoch,model,dataloader,criterion, optimizer, miner = None, pre_miner = None):
    
    model.train()
    inf_batch_count = 0
    train_loss = 0
        
    if epoch == 1 or pre_miner is None:
        if pre_miner is not None:
            global informative_batches
            informative_batches = []
            proxy_labels = []
            proxies = []
        for batch_idx, batch in enumerate(dataloader):
            optimizer.zero_grad()

            images, labels, _ = batch

            num_places, num_images_per_place, C, H, W = images.shape

            images = images.view(num_places * num_images_per_place, C, H, W)
            labels = labels.view(num_places * num_images_per_place)

            descriptors = model(images.to(device)).cpu()

            if pre_miner is not None:
                num_tensori, *_ = descriptors.shape
                for i in range(0, num_tensori - 4 + 1, 4):
                    place_images = descriptors[i:i+4]
                    proxy = place_images.mean(dim=0).tolist()
                    proxies.append(proxy)
                    proxy_labels.append(int(labels[i]))

            # MINING: we mine the pairs/triplets if there is an online mining strategy
            if miner is not None:
                miner_outputs = miner(descriptors, labels.to(device))
                loss = criterion(descriptors, labels.to(device), miner_outputs)

                # calculate the % of trivial pairs/triplets which do not contribute in the loss value
                nb_samples = descriptors.shape[0]
                nb_mined = len(set(miner_outputs[0].detach().cpu().numpy()))
                batch_acc = 1.0 - (nb_mined / nb_samples)

            else: # no online mining
                loss = criterion(descriptors, labels.to(device))
                batch_acc = 0.0
            
            
            loss.backward() 
            optimizer.step()
            train_loss += loss.item()
            # print(f'Batch {batch_idx}, Loss: {loss.item()}')

        if pre_miner is not None:
            proxies = np.asarray(proxies, dtype = np.float32)
            proxy_labels = np.asarray(proxy_labels, dtype = np.int32)
            informative_batches = knn_search(proxies, proxy_labels)
        
      
        
    
    else:
        proxy_labels = []
        proxies = []
        for batch in informative_batches:            
            # print(batch)
            optimizer.zero_grad()

            images = [dataset_train.__getitem__(label)[0] for label in batch]
            
            images = torch.stack(images)
            dimensions = images.shape
            labels = [torch.tensor(label).repeat(4) for label in batch]
            labels = torch.stack(labels)


            num_places, num_images_per_place, C, H, W = images.shape

            images = images.view(num_places * num_images_per_place, C, H, W)
            labels = labels.view(num_places * num_images_per_place)

            descriptors = model(images.to(device)).cpu()

            num_tensori, *_ = descriptors.shape
            for i in range(0, num_tensori - 4 + 1, 4):
                place_images = descriptors[i:i+4]
                proxy = place_images.mean(dim=0).tolist()
                proxies.append(proxy)
                proxy_labels.append(int(labels[i]))

            # MINING: we mine the pairs/triplets if there is an online mining strategy
            if miner is not None:
                miner_outputs = miner(descriptors, labels.to(device))
                loss = criterion(descriptors, labels.to(device), miner_outputs)

                # calculate the % of trivial pairs/triplets which do not contribute in the loss value
                nb_samples = descriptors.shape[0]
                nb_mined = len(set(miner_outputs[0].detach().cpu().numpy()))
                batch_acc = 1.0 - (nb_mined / nb_samples)

            else: # no online mining
                loss = criterion(descriptors, labels.to(device))
                batch_acc = 0.0
                

            loss.backward() 
            optimizer.step()
            train_loss += loss.item()
            inf_batch_count += 1
            # print(f'Batch {batch_idx}, Loss: {loss.item()}')


        proxies = np.asarray(proxies, dtype = np.float32)
        proxy_labels = np.asarray(proxy_labels, dtype = np.int32)

        informative_batches = knn_search(proxies, proxy_labels)

    train_loss = train_loss / len(dataloader)
    print(f'Train Epoch: {epoch} Loss: {train_loss:.6f}')
    #print(informative_batches, len(informative_batches),len(informative_batches[5]))
    # return train_loss

#### Visualization of results

In [None]:
# Height and width of a single image
Height = 1024
Width = 1024

Total_height = 350 #height of the resulting collage of photos with text
fontsize = 100
space = 150  # Space between two images

def write_labels_to_image(labels=["text1", "text2"]):

    # Load the font
    font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", fontsize)
    
    # Calculate the width of the resulting image
    img_width = Width * len(labels) + space * (len(labels) - 1)
    img_height = Total_height
    
    # Create a new image with white background
    background_color = (1, 1, 1) #white
    img = Image.new('RGB', (img_width, img_height), background_color)
    draw = ImageDraw.Draw(img)
    
    # Draw each label
    for i, text in enumerate(labels):
        _, _,text_width, text_height = draw.textbbox((0,0), text, font=font)
        x = (Width + space) * i + Width //2 - text_width // 2
        y = (img_height - text_height) // 2
        draw.text((x, y), text, fill=(0, 0, 0), font=font)
    
    return np.array(img)


def draw_box(img, c): #create a coloured box around the image
    thickness=5
    height, width = img.shape[:2]
    cv2.line(img, (0, 0), (0, height), c, thickness) #left vertical line
    cv2.line(img, (0, height), (width, height), c, thickness) #upper horizontal line
    cv2.line(img, (width, height), (width, 0), c, thickness) #right vertical line
    cv2.line(img, (width, 0), (0, 0), c, thickness) #lower horizontal line
    return img


def print_preds(predictions, test_dataset,number_of_images_per_epoch):
    x=0 
    #take the true positive of the query
    positives_per_query = test_dataset.get_positives()
    for q_idx, preds in enumerate(predictions):
        if x>=number_of_images_per_epoch:
            break
        query_path = test_dataset.queries_paths[q_idx]
        list_of_images_paths = [query_path]
        # List of None (query), True (correct preds) or False (wrong preds)
        preds_types = [None]
        for _ , pred in enumerate(preds):
            pred_path = test_dataset.database_paths[pred]
            list_of_images_paths.append(pred_path) #list of query path + paths of all its predictions
            if pred in positives_per_query[q_idx]: #check if the prediction is correct, comparing to true positives
                type_of_pred = True
            else:
                type_of_pred = False
            preds_types.append(type_of_pred)
              
        labels = ["Query"] + [f"Prediction{i} - {type_of_pred}" for i, type_of_pred in enumerate(preds_types[1:])]
        num_images = len(list_of_images_paths)
        color=[]
        images = [np.array(Image.open(path)) for path in list_of_images_paths]
        for img, correct in zip(images, preds_types):
            if correct is not None: #check if it's a query or not
                if correct:
                    color = (0, 255, 0)  # Green for correct
                else:
                    color = (255, 0, 0)  # Red for wrong
            draw_box(img, color)
        concat_image = np.ones([Height, (num_images*Width)+((num_images-1)*space), 3])
        
        #rescaling the images to the same dimentions dim
        rescaleds = [rescale(i, [min(Height/i.shape[0], Width/i.shape[1]), min(Height/i.shape[0], Width/i.shape[1]), 1]) for i in images]
        
        #zero padding needed to center the image 
        for i, image in enumerate(rescaleds):
            pad_width = (Width - image.shape[1] + 1) // 2
            pad_height = (Height - image.shape[0] + 1) // 2
            image = np.pad(image, [[pad_height, pad_height], [pad_width, pad_width], [0, 0]], constant_values=1)[:Height, :Width]
            concat_image[: , i*(Width+space) : i*(Width+space)+Width] = image
            
        labels_image = write_labels_to_image(labels)
        final_image = np.concatenate([labels_image, concat_image])
        final_image = Image.fromarray((final_image*255).astype(np.uint8))
        plt.figure()
        plt.imshow(final_image)
        plt.axis('off')
        plt.show()
        x = x+1

### Evaluation loop

In [None]:
def recall(dataset, database_descriptors, queries_descriptors, k_values, print_predictions, number_of_images_per_epoch):
    #use faiss to optimize the research
    faiss_index = faiss.IndexFlatL2(queries_descriptors.shape[1])
    faiss_index.add(database_descriptors)
    del database_descriptors
    
    _, predictions = faiss_index.search(queries_descriptors, max(k_values))
 
    positives_per_query = dataset.get_positives()
    recalls = np.zeros(len(k_values))
        
    # Calculate recall 
    for q_idx, pred in enumerate(predictions):
        for i, n in enumerate(k_values):
            if np.any(np.in1d(pred[:n], positives_per_query[q_idx])):
                recalls[i:] += 1
                break
                
    recalls = recalls / dataset.queries_num * 100    
    if print_predictions == True:
        # For each query save 3 predictions
        print_preds(predictions[:, :3],dataset,number_of_images_per_epoch)

    table = PrettyTable()
    table.field_names = ['K']+[str(k) for k in k_values]
    table.add_row(['Recall@K']+ [f'{values:.2f}' for values in recalls])
    print(table)
    return recalls

In [None]:
def evaluation_loop(dataset, model, dataloader, k_values,print_predictions, number_of_images_per_epoch = 5):
    model.eval()
    recalls = np.zeros(len(k_values))
    sum_recalls = np.zeros(len(k_values))
    all_descriptors = []
    for batch_idx, batch in enumerate(dataloader):
        images, _ = batch
    
        # Calcola i descrittori utilizzando il modello
        descriptors = model(images.to(device)).cpu().detach().numpy().astype(np.float32)

        # Concatena i descrittori alla lista dei descrittori concatenati
        all_descriptors.append(descriptors)  # Aggiungi i descrittori calcolati alla lista dei descrittori
        concatenated_descriptors = np.concatenate(all_descriptors, axis=0) 
       # print(concatenated_descriptors.shape[0])
    database_descriptors = concatenated_descriptors[: dataset.database_num ]
    queries_descriptors = concatenated_descriptors[dataset.database_num :]
        
    recalls = recall(dataset, database_descriptors, queries_descriptors, k_values, print_predictions, number_of_images_per_epoch)
    # print(f'R@{k_values[0]}: {recalls[0]:.6f} ; R@{k_values[1]}: {recalls[1]:.6f} ; R@{k_values[2]}: {recalls[2]:.6f};')
    return recalls

## Training session

In [None]:
# Criterion and the optimizer
criterion = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)
#optimizer = SGD(model_gem.parameters(), lr=0.0001, weight_decay=0.0001, momentum=0.9)

In [None]:
#miner = miners.MultiSimilarityMiner(epsilon=0.1)
# miner = miners.TripletMarginMiner(margin=0.2, type_of_triplets="all")
# miner = miners.BatchHardMiner()  # da provare
# miner = miners.AngularMiner(angle=20)

In [None]:
# # PROXY
# print('\033[1;31mRESULTS ON TRAINING\033[0m')
# for epoch in tqdm(range(2,3)):
#     training_loop(epoch,model_gem,dataloader_train,criterion,optimizer,pre_miner = 'Proxy')

In [None]:
# # Salvataggio dei pesi del modello
# torch.save(model_avg.state_dict(), 'model_weights.pth')

## Validate session 

In [None]:
# # FIRST GRID SEARCH 

# # Parametri per la grid search
# optimizers = ['SGD', 'Adam', 'ASGD', 'AdamW', 'RMSprop']  
# momentums = [0, 0.95]  # [0.0, 0.8, 0.9, 0.95]  # Tuning sul parametro momentum per SGD

# # Risultati della grid search
# results = []

# # Loop di grid search
# for opt_name, momentum in product(optimizers, momentums):
#     print(f'Running with LR={lr}, WD={wd}, Optimizer={opt_name}, Scheduler={sched_name}, Momentum={momentum}')
    
#     # Scegli l'ottimizzatore
#     if opt_name == 'SGD':
#         optimizer = optim.SGD(model_avg.parameters(), momentum=momentum)
#     elif opt_name == 'Adam':
#         optimizer = optim.Adam(model_avg.parameters())
#     elif opt_name == 'AdamW':
#         optimizer = optim.AdamW(model_avg.parameters())
#     elif opt_name == 'ASGD':
#         optimizer = optim.ASGD(model_avg.parameters())
#     elif opt_name == 'RMSprop':
#         optimizer = optim.RMSprop(model_avg.parameters(), momentum=momentum)
      
#     # Loop di addestramento
#     num_epochs = 10
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(num_epochs)):
#         training_loss = training_loop(epoch, model_avg, dataloader_train, criterion, optimizer)
#         # validation_loss = evaluation_loop(dataset_val, model_avg, dataloader_val, k_values, True,3)
   
#     # Salva i risultati
#     results.append({
#         'optimizer': opt_name,
#         'momentum': momentum,
#         'final_loss': training_loss
#     })

#     evaluation_loop(dataset_val, model_avg, dataloader_val, k_values, False)
#     model_avg.reset_parameters()  # vedere se worka
    
# # Stampa i risultati finali
# for result in results:
#     print(result)

In [None]:
k_values= [1,5]
# Parametri per la grid search
learning_rates = [1e-4, 1e-3]  # [1e-4, 1e-5]  # [1e-5, 1e-4, 1e-3, 1e-2]  
weight_decays = [1e-3, 1e-2]  
optimizers = ['Adam']  #, 'AdamW']  # best optimizers
# momentums = [0, 0.95]  # [0.0, 0.8, 0.9, 0.95]  
schedulers = ['None','CosineAnnealingLR']

# Risultati della grid search
results = []

# Loop di grid search
for lr, wd, opt_name, sched_name in product(learning_rates, weight_decays, optimizers, schedulers):
    print(f'Running with LR={lr}, WD={wd}, Optimizer={opt_name}, Scheduler={sched_name}')
    
    # Scegli l'ottimizzatore
    if opt_name == 'Adam':
        optimizer = optim.Adam(model_gem.parameters(), lr=lr, weight_decay=wd)

    # Scegli lo scheduler
    if sched_name == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimizer, T_max=10, verbose=True)
    
    # Loop di addestramento
    num_epochs = 10
    print('\033[1;31mRESULTS ON TRAINING\033[0m')
    for epoch in tqdm(range(1,11)):
        training_loss = training_loop(epoch, model_gem, dataloader_train, criterion, optimizer,pre_miner = 'Proxy')                   
        if sched_name == 'CosineAnnealingLR':            
            scheduler.step()
    
    recalls = evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)
    model_gem.load_state_dict(torch.load('/kaggle/input/initial-weights-gem-parallel/initial_weights_gem_parallel.pth'))  

    # Salva i risultati
    results.append({
        'optimizer': opt_name,
        'recall@1': recalls[0],
        'recall@5': recalls[1]
    })
    
# Stampa i risultati finali
for result in results:
    print(result)

In [None]:
# # GRID SEARCH FOR LOSSES
# k_values = [1, 5]
# num_classes = dataset_train.__len__()

# # Parametri per la grid search
# ArcFaceLoss = losses.ArcFaceLoss(num_classes, embedding_size=256, margin=28.6, scale=64)
# ContrastiveLoss = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)
# CosFaceLoss = losses.CosFaceLoss(num_classes, embedding_size=256, margin=0.35, scale=64)
# MultiSimilarityLoss = losses.MultiSimilarityLoss(alpha=1.0, beta=50, base=0.0, distance=DotProductSimilarity())
# type_losses = [ArcFaceLoss, ContrastiveLoss, CosFaceLoss, MultiSimilarityLoss]
# # Proxy in dolce attesa delle idee del maestro GAZA
# # N-pair Loss --> buona, ma richiede batch molto grande, quindi con i  nostri strimenti infatttibile --> SCRIVERE NELLE ESTENSIONI

# # Risultati della grid search
# results = []

# # Loop di grid search
# for loss in type_losses:
#     print(f'Loss: {loss}')
    
#     # Loop di addestramento
#     num_epochs = 3
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(num_epochs)):
#         training_loss = training_loop(epoch, model_avg, dataloader_train, loss, optimizer)
#         # validation_loss = evaluation_loop(dataset_val, model_avg, dataloader_val, k_values, True,3)

#     # Salva i risultati
#     results.append({'loss': loss, 'final_loss': training_loss})
#     evaluation_loop(dataset_val, model_avg, dataloader_val, k_values, False)
#     model_avg.load_state_dict(torch.load('/kaggle/working/initial_weights.pth'))
#     print('---------------------------------------------------------------')

# # Stampa i risultati finali
# for result in results:
#     print(result)

## Test session

In [None]:
# k_values = [1, 5]

# print('\033[1;32mRESULTS ON SF-XS VAL\033[0m')
# evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)

# print('\033[1;33mRESULTS ON SF-XS TEST\033[0m')
# evaluation_loop(dataset_test, model_gem, dataloader_test, k_values, False)

# print('\033[1;36mRESULTS ON TOKYO TEST\033[0m')
# evaluation_loop(dataset_tokyo, model_gem, dataloader_tokyo, k_values, False)

queries 7993
database 8015

### Questions

Are the results different on SF-XS val from SF-XS test? Why?

Have a look at the images in the datasets, and make sure that you understand why the results are different on the two sets! 

What are the main differences between SF-XS val and SF-XS test?

Are the results different on Tokyo-XS than on SF-XS test?

Have a look at the images in Tokyo-XS, what are the main differences between Tokyo-XS and SF-XS test?

Report the values of recall@N (for N=1,5), i.e. the percentage of queries for which at least one of the first N predictions is within a 25 meters distance from the query.

Try to change it with a GeM layer. Did the results improve?

Visually analize the results

There is a parameter in the code that you can pass to the training script that will save some queries and their predictions (i.e. the images from the database that the model thinks are most similar to the query). Find it and use it! How do the predictions look? Does your model make any strange mistakes?