# Image Retrieval for Visual Geolocalization: Extensions and Experiments

## Libraries

In [1]:
# Import libraries
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from itertools import product
from pytorch_metric_learning import miners, losses
from pytorch_metric_learning.distances import CosineSimilarity, DotProductSimilarity
from torch.optim import SGD, Adam, AdamW, ASGD, RMSprop
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from tqdm import tqdm

In [None]:
# Import Modules
from datasets.Train import TrainDataset, transform
from datasets.Eval import EvalDataset
from visualization.Visualization import print_sample_dataset
from models.Aggregators import Avg_ResNet
from models.Aggregators import GeM_ResNet
from models.Training_loop import training_loop
from models.Evaluation_loop import evaluation_loop

## Initializations

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
# Set all manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)

## Load Datasets


In [None]:
# Training loading
root_dir_train = '/kaggle/input/gsv-xs/gsv_xs/train'
dataset_train = TrainDataset(root_dir=root_dir_train, transform=transform)
dataloader_train = data.DataLoader(dataset_train, batch_size=64, shuffle=True)

In [None]:
# Validation loading
root_dir_eval = '/kaggle/input/sf-xs/sf_xs'
dataset_val = EvalDataset(root_dir=root_dir_eval, type_of_set= 'val', transform=transform)
dataloader_val = data.DataLoader(dataset_val, batch_size=64, shuffle=False)

In [None]:
# Test loading

# SF-XS
root_dir_eval = '/kaggle/input/sf-xs/sf_xs'
dataset_test = EvalDataset(root_dir=root_dir_eval, type_of_set= 'test', transform=transform)
dataloader_test = data.DataLoader(dataset_test, batch_size=64, shuffle=False)

# Tokyo-xs
root_dir_tokyo = '/kaggle/input/tokyo-xs/tokyo_xs'
dataset_tokyo = EvalDataset(root_dir=root_dir_tokyo, type_of_set= 'test', transform=transform)
dataloader_tokyo = data.DataLoader(dataset_tokyo, batch_size=64, shuffle=False)

## First Visualizations

In [None]:
print_sample_dataset('train', 1, 3, dataloader_train, dataloader_tokyo)

In [None]:
print_sample_dataset('test', 1, 3, dataloader_train, dataloader_tokyo)

## Network

#### Resnet-18 with Average Pooling

In [None]:
# if torch.cuda.device_count() > 1:  # multiple GPU in parallel
#     print("Let's use", torch.cuda.device_count(), "GPUs")
#     model_avg = nn.DataParallel(Avg_ResNet())
#     model_avg = model_avg.cuda()
# else:  # single GPU
#     model_avg = Avg_ResNet().cuda()

# # Print the model architecture
# print(model_avg)

# # Save the model's initial state dictionary
# torch.save(model_avg.state_dict(), '/kaggle/working/initial_weights.pth')

#### Resnet-18 with Gem Pooling

In [None]:
# Initialize the network
if torch.cuda.device_count() > 1:  # multiple GPU in parallel
    print("Let's use", torch.cuda.device_count(), "GPUs")
    model_gem = nn.DataParallel(GeM_ResNet())
    model_gem = model_gem.cuda()
else:  # single GPU
    model_gem = GeM_ResNet().cuda()

# Print the model architecture
print(model_gem)

# Save the model's initial state dictionary
torch.save(model_gem.state_dict(), '/kaggle/working/initial_weights.pth')

In [None]:
# Loading saved weights
# model_gem.load_state_dict(torch.load('/kaggle/input/weights/model_weights.pth'))  

## Training session

#### Optimizer

Some tested choiches:
- `SGD(model_gem.parameters(), lr=0.001, momentum = 0.90, weight_decay=0)`
- `ASGD(model_gem.parameters(), lr=0.001, weight_decay=0)`
- `Adam(model_gem.parameters(), lr=0.001, weight_decay=0)`
- `AdamW(model_gem.parameters(), lr=0.001, weight_decay=0.01)`
- `RMSprop(model_gem.parameters(), lr=0.001, momentum=0.90)`


In [None]:
optimizer = SGD(model_gem.parameters(), lr=0.001, weight_decay=0.001, momentum=0.9)

#### Loss

Tested choiches:
- `losses.ContrastiveLoss(pos_margin=0, neg_margin=1)`
- `losses.CosFaceLoss(num_classes, embedding_size=256, margin=0.35, scale=64)`
- `losses.ArcFaceLoss(num_classes, embedding_size=256, margin=28.6, scale=64)`
- `losses.TripletMarginLoss(margin=0.05, swap=False, smooth_loss=False, triplets_per_anchor="all")`
- `losses.MultiSimilarityLoss(alpha=1.0, beta=50, base=0.0, distance=DotProductSimilarity())`

In [None]:
criterion = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)

#### Miner

Tested choiches:
- `None`, i.e. offline mining
- `miners.MultiSimilarityMiner(epsilon=0.1)`
- `miners.TripletMarginMiner(margin=0.2, type_of_triplets="all")`
- `miners.AngularMiner(angle=20)`

In [None]:
miner = None

#### Pre-Miner

In [None]:
pre_miner = None
# pre_miner = 'Proxy'

#### Scheduler

Tested choiches:
- No scheduler
- `ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)`
- `CosineAnnealingLR(optimizer, T_max=10, verbose=True)`

In [None]:
scheduler = CosineAnnealingLR(optimizer, T_max=10, verbose=True)

#### Epochs

In [None]:
num_epochs = 10

#### K values for Recall@K

In [None]:
k_values = [1, 5]

#### Save model's weights

In [None]:
# torch.save(model_gem.state_dict(), 'model_weights.pth')

### Training loop

In [None]:
print('\033[1;31mRESULTS ON TRAINING\033[0m')
for epoch in tqdm(range(1,num_epochs+1)):
    training_loss = training_loop(epoch, model_gem, dataset_train, dataloader_train, criterion, optimizer, miner = miner, pre_miner = pre_miner)
    scheduler.step()  # CosineAnnealingLR
    # scheduler.step(training_loss)  # ReduceLROnPlateau

## Validate session 

In [None]:
# # FIRST GRID SEARCH: OPTIMIZERS

# # Parameters for the grid search
# optimizers = ['SGD','Adam', 'ASGD', 'AdamW', 'RMSprop']  

# # Results of the grid search
# results = []

# # Grid search loop
# for opt_name in optimizers:
#     print(f'Running with Optimizer={opt_name}')
    
#     # Choose the optimizer
#     if opt_name == 'SGD':
#         optimizer = SGD(model_gem.parameters(), lr=0.001, momentum=0.90)        
#     elif opt_name == 'Adam':
#         optimizer = Adam(model_gem.parameters(), lr=0.001)
#     elif opt_name == 'AdamW':
#         optimizer = AdamW(model_gem.parameters(), lr=0.001)
#     elif opt_name == 'ASGD':
#         optimizer = ASGD(model_gem.parameters(), lr=0.01)
#     elif opt_name == 'RMSprop':
#         optimizer = RMSprop(model_gem.parameters(), lr=0.001, momentum=0.90)
      
#     # Training loop
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(1, num_epochs+1)):
#         training_loop(epoch, model_gem, dataset_train, dataloader_train, criterion, optimizer, miner=miner, pre_miner=pre_miner) 

#     recalls = evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, print_predictions=False)
    
#     # Save the results
#     results.append({
#         'optimizer': opt_name,
#         'R@1': recalls[0],
#         'R@5': recalls[1]
#     })
    
#     # Reset the weights
#     model_gem.load_state_dict(torch.load('/kaggle/input/initial-weights-gem/initial_weights_gem.pth'))  
    
# # Print the final results
# for result in results:
#     print(result)

In [None]:
# # SECOND GRID SEARCH: PARAMETERS OF THE OPTIMAL OPTIMIZER 

# # Parameters for the grid search
# learning_rates = [1e-3, 1e-4, 1e-5]  
# weight_decays = [1e-2, 1e-3, 1e-4, 1e-5]  
# optimizers = ['Adam']  
# schedulers = ['None', 'ReduceLROnPlateau', 'CosineAnnealingLR']

# # Results of the grid search
# results = []

# # Grid search loop
# for lr, wd, opt_name, sched_name in product(learning_rates, weight_decays, optimizers, schedulers):
#     print(f'Running with LR={lr}, WD={wd}, Optimizer={opt_name}, Scheduler={sched_name}')
    
#     # Choose the optimizer
#     if opt_name == 'Adam':
#         optimizer = optim.Adam(model_gem.parameters(), lr=lr, weight_decay=wd)
    
#     # Choose the scheduler
#     if sched_name == 'ReduceLROnPlateau':
#         scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)
#     elif sched_name == 'CosineAnnealingLR':
#         scheduler = CosineAnnealingLR(optimizer, T_max=10, verbose=True)
    
#     # Training loop
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(1, num_epochs+1)):
#         training_loss = training_loop(epoch, model_gem, dataset_train, dataloader_train, criterion, optimizer, miner=miner, pre_miner=pre_miner)        
#         # Scheduler step       
#         if sched_name == 'ReduceLROnPlateau':
#             scheduler.step(training_loss)            
#         elif sched_name == 'CosineAnnealingLR':
#             scheduler.step()
    
#     recalls = evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)

#     # Save the results
#     results.append({
#         'optimizer': opt_name,
#         'learning rate': lr,
#         'weight decay': wd,
#         'scheduler': sched_name,
#         'recall@1': recalls[0],
#         'recall@5': recalls[1]
#     })
    
#     # Reset the weights
#     model_gem.load_state_dict(torch.load('/kaggle/input/initial-weights-gem-parallel/initial_weights_gem_parallel.pth'))  

# # Print the final results
# for result in results:
#     print(result)

In [None]:
# # THIRD GRID SEARCH: LOSSES

# # CosFaceLoss and ArcFaceLoss need the number of classes in the training dataset
# num_classes = dataset_train.__len__()

# # Parameters for the grid search
# learning_rates = [1e-4]  
# weight_decays = [1e-2]  
# optimizers = ['Adam']  
# schedulers = ['CosineAnnealingLR']
# criterions = ['ContrastiveLoss', 'CosFaceLoss', 'ArcFaceLoss', 'TripletMarginLoss', 'MultiSimilarityLoss']

# # Results of the grid search
# results = []

# # Grid search loop
# for lr, wd, opt_name, sched_name, criterion_name in product(learning_rates, weight_decays, optimizers, schedulers, criterions):
#     print(f'Running with LR={lr}, WD={wd}, Optimizer={opt_name}, Scheduler={sched_name}, Loss={criterion_name}')
    
#     # Choose the optimizer
#     if opt_name == 'Adam':
#         optimizer = optim.Adam(model_gem.parameters(), lr=lr, weight_decay=wd)
    
#     # Choose the scheduler
#     if sched_name == 'CosineAnnealingLR':
#         scheduler = CosineAnnealingLR(optimizer, T_max=10, verbose=True)
        
#     # Choose the loss
#     if criterion_name == 'ContrastiveLoss':
#         criterion = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)
#     elif criterion_name == 'CosFaceLoss':
#         criterion = losses.CosFaceLoss(num_classes, embedding_size=256, margin=0.35, scale=64)
#     elif criterion_name == 'ArcFaceLoss':
#         criterion = losses.ArcFaceLoss(num_classes, embedding_size=256, margin=28.6, scale=64)
#     elif criterion_name == 'TripletMarginLoss':
#         criterion = losses.TripletMarginLoss(margin=0.05, swap=False, smooth_loss=False, triplets_per_anchor="all")
#     elif criterion_name == 'MultiSimilarityLoss':
#         criterion = losses.MultiSimilarityLoss(alpha=1.0, beta=50, base=0.0, distance=DotProductSimilarity())
    
#     # Training loop
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(1, num_epochs+1)):
#         training_loss = training_loop(epoch, model_gem, dataset_train, dataloader_train, criterion, optimizer, miner=miner, pre_miner=pre_miner)        
#         # Scheduler step                  
#         if sched_name == 'CosineAnnealingLR':
#             scheduler.step()
    
#     recalls = evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)

#     # Save the results
#     results.append({
#         'optimizer': opt_name,
#         'learning rate': lr,
#         'weight decay': wd,
#         'scheduler': sched_name,
#         'loss': criterion_name,
#         'recall@1': recalls[0],
#         'recall@5': recalls[1]
#     })
    
#     # Reset the weights
#     model_gem.load_state_dict(torch.load('/kaggle/input/initial-weights-gem-parallel/initial_weights_gem_parallel.pth'))  

# # Print the final results
# for result in results:
#     print(result)

In [None]:
# # FOURTH GRID SEARCH: MINERS

# # Parameters for the grid search
# learning_rates = [1e-4]  
# weight_decays = [1e-2]  
# optimizers = ['Adam']  
# schedulers = ['CosineAnnealingLR']
# criterions = ['ContrastiveLoss', 'TripletMarginLoss', 'MultiSimilarityLoss']
# miner_names = ['MultiSimilarityMiner', 'TripletMarginMiner', 'AngularMiner']

# # Results of the grid search
# results = []

# # Grid search loop
# for lr, wd, opt_name, sched_name, criterion_name, miner_name in product(learning_rates, weight_decays, optimizers, schedulers, criterions, miner_names):
#     print(f'Running with LR={lr}, WD={wd}, Optimizer={opt_name}, Scheduler={sched_name}, Loss={criterion_name}, Miner={miner_name}')
    
#     # Choose the optimizer
#     if opt_name == 'Adam':
#         optimizer = optim.Adam(model_gem.parameters(), lr=lr, weight_decay=wd)
    
#     # Choose the scheduler
#     if sched_name == 'CosineAnnealingLR':
#         scheduler = CosineAnnealingLR(optimizer, T_max=10, verbose=True)
        
#     # Choose the loss
#     if criterion_name == 'ContrastiveLoss':
#         criterion = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)
#     elif criterion_name == 'TripletMarginLoss':
#         criterion = losses.TripletMarginLoss(margin=0.05, swap=False, smooth_loss=False, triplets_per_anchor="all")
#     elif criterion_name == 'MultiSimilarityLoss':
#         criterion = losses.MultiSimilarityLoss(alpha=1.0, beta=50, base=0.0, distance=DotProductSimilarity())
        
#     # Choose the miner
#     if miner_name == 'MultiSimilarityMiner':
#         miner = miners.MultiSimilarityMiner(epsilon=0.1)
#     elif miner_name == 'TripletMarginMiner':
#         miner = miners.TripletMarginMiner(margin=0.2, type_of_triplets="all")
#     elif miner_name == 'AngularMiner':
#         miner = miners.AngularMiner(angle=20)
    
#     # Training loop
#     print('\033[1;31mRESULTS ON TRAINING\033[0m')
#     for epoch in tqdm(range(1, num_epochs+1)):
#         training_loss = training_loop(epoch, model_gem, dataset_train, dataloader_train, criterion, optimizer, miner=miner, pre_miner=pre_miner)        
#         # Scheduler step                  
#         if sched_name == 'CosineAnnealingLR':
#             scheduler.step()
    
#     recalls = evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)

#     # Save the results
#     results.append({
#         'optimizer': opt_name,
#         'learning rate': lr,
#         'weight decay': wd,
#         'scheduler': sched_name,
#         'loss': criterion_name,
#         'miner': miner_name, 
#         'recall@1': recalls[0],
#         'recall@5': recalls[1]
#     })
    
#     # Reset the weights
#     model_gem.load_state_dict(torch.load('/kaggle/input/initial-weights-gem-parallel/initial_weights_gem_parallel.pth'))  

# # Print the final results
# for result in results:
#     print(result)

## Test session

In [None]:
print('\033[1;32mRESULTS ON SF-XS VAL\033[0m')
evaluation_loop(dataset_val, model_gem, dataloader_val, k_values, False)

print('\033[1;33mRESULTS ON SF-XS TEST\033[0m')
evaluation_loop(dataset_test, model_gem, dataloader_test, k_values, False)

print('\033[1;36mRESULTS ON TOKYO TEST\033[0m')
evaluation_loop(dataset_tokyo, model_gem, dataloader_tokyo, k_values, False)