<a href="https://colab.research.google.com/github/EmaMule/Computer-Vision/blob/main/CVUSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import and installing dependencies

In [None]:
%%capture

# @title Installing dependencies

!pip install tqdm
!pip install pytorch_lightning
!pip install patool
!pip install torchvision nightly

In [None]:
# @title Importing libraries

import pandas as pd
import numpy as np
import random
import warnings
import matplotlib.pyplot as plt
import os
from PIL import Image
from tqdm import tqdm
from google.colab import drive
import shutil
import csv
import cv2
import gdown
import patoolib

# pytorch
import torch
from torch.utils.data import Dataset, DataLoader, Sampler, random_split
from torch.utils.data.sampler import SubsetRandomSampler, SequentialSampler, RandomSampler, BatchSampler
from torch import nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from torchvision.transforms import v2
from torchtext.data.metrics import bleu_score
from torchmetrics.functional.pairwise import pairwise_cosine_similarity

# pytorch lighting
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar, RichProgressBar, ModelPruning
from pytorch_lightning import loggers



In [None]:
# @title Folders Setup

shutil.rmtree('/content/input', ignore_errors = True)
os.mkdir('/content/input')

shutil.rmtree('/content/output', ignore_errors = True)
os.mkdir('/content/output')

shutil.rmtree('/content/output/log', ignore_errors = True)
os.mkdir('/content/output/log')

shutil.rmtree('/content/lightning_logs', ignore_errors = True)
os.mkdir('/content/lightning_logs')

In [None]:
# @title Downloading Dataset

# our id: 1-FHbO02_KtJcStojzf1JRKwJkc0hMCLd
# their id: 17W9VEPMneRlb6igtSxa--Xh4fSZs3RS_

url = 'https://drive.google.com/uc?id=1-FHbO02_KtJcStojzf1JRKwJkc0hMCLd'
output_file = '/content/input/CVUSA_subset.rar'
output_dir = '/content/input/data'

gdown.download(url, output_file)
patoolib.extract_archive(output_file, outdir = output_dir)

url = 'https://drive.google.com/uc?id=19fD1WMGTmusYk8E7ygT6nAJTluf3a_oH'
output_file = '/content/input/train.csv'
gdown.download(url, output_file)

url = 'https://drive.google.com/uc?id=1Rt6waJ6f-kM12Q2A9mgRxAcKfZPdg9IY'
output_file = '/content/input/val.csv'
gdown.download(url, output_file)

Downloading...
From (original): https://drive.google.com/uc?id=1-FHbO02_KtJcStojzf1JRKwJkc0hMCLd
From (redirected): https://drive.google.com/uc?id=1-FHbO02_KtJcStojzf1JRKwJkc0hMCLd&confirm=t&uuid=813aa56a-360a-4677-a502-6625b9ebe1f2
To: /content/input/CVUSA_subset.rar
100%|██████████| 4.38G/4.38G [03:16<00:00, 22.3MB/s]
INFO patool: Extracting /content/input/CVUSA_subset.rar ...
INFO:patool:Extracting /content/input/CVUSA_subset.rar ...
INFO patool: ... creating output directory `/content/input/data'.
INFO:patool:... creating output directory `/content/input/data'.
INFO patool: running /usr/bin/unrar x -- /content/input/CVUSA_subset.rar
INFO:patool:running /usr/bin/unrar x -- /content/input/CVUSA_subset.rar
INFO patool:     with cwd='/content/input/data', input=''
INFO:patool:    with cwd='/content/input/data', input=''
INFO patool: ... /content/input/CVUSA_subset.rar extracted to `/content/input/data'.
INFO:patool:... /content/input/CVUSA_subset.rar extracted to `/content/input/data'.

'/content/input/val.csv'

In [None]:
# @title Settings

pl.seed_everything(42)
device = "cuda" if torch.cuda.is_available() else "cpu"

INFO:lightning_fabric.utilities.seed:Seed set to 42


#Dataset and DataModule

We need to also possibly add polar and segmap! not done right now because there is a problem with the csv files. Also no test set, should we use validation or split the training and use the current validation as test?

In [None]:
# @title Dataset definition: without using polar transforms (neither segmentation)

# Expected dataset structure: the input_dir contains the split cvs files and a
# subdirectory named 'data' with the CVUSA dataset

class CVUSADataset(Dataset):

    def __init__(self, input_dir, split = 'train', polar = False):
        self.split = split
        self.polar = polar
        self.data = self.load_data(input_dir + f'/{split}.csv')


    def load_data(self, csv_path):
        data = []
        with open(csv_path, 'r') as file:
            csv_reader = csv.reader(file)
            next(csv_reader) #skip header
            for row in csv_reader:
                grd_path = row[1]
                if self.polar: #If we want to use polar
                   sat_path = row[3]
                   seg_path = row[4]
                else:
                  sat_path = row[0]
                  seg_path = row[2]
                data.append({"grd_path": grd_path, "sat_path": sat_path, "seg_path": seg_path})

        return data


    def __len__(self):
        return len(self.data)


    def __getitem__(self, index):
        dictionary = self.data[index]
        grd_path = dictionary['grd_path']
        sat_path = dictionary['sat_path']
        seg_path = dictionary['seg_path']
        return grd_path, sat_path, seg_path


    def __str__(self):
        return f"CVUSA-Dataset-{self.split}: {len(self.data)} samples"

In [None]:
# @title Data module definition: without using polar transforms (neither segmentation)

class CVUSADataModule(pl.LightningDataModule):

    def __init__(self, input_dir, batch_size=8, grd_resize = None, sat_resize = None, seg_resize = None):

        super(CVUSADataModule, self).__init__()
        self.batch_size = batch_size
        self.input_dir = input_dir
        self.data_dir = input_dir + '/data'

        self.original_size = {'grd': None, 'sat': None, 'seg': None}
        self.resize = {'grd': grd_resize, 'sat': sat_resize, 'seg': seg_resize}
        self.size = {'grd': None, 'sat': None, 'seg': None}
        self.mean = {'grd': [0,0,0], 'sat': [0,0,0], 'seg': [0,0,0]}
        self.std = {'grd': [1,1,1], 'sat': [1,1,1], 'seg': [1,1,1]}
        self.transform = {'grd': None, 'sat': None, 'seg': None}


    def setup(self):

        # load the datasets
        self.train_dataset = CVUSADataset(input_dir=self.input_dir, split='train')
        self.val_dataset = CVUSADataset(input_dir=self.input_dir, split='val')
        #self.test_dataset = CVUSADataset(input_dir=self.input_dir, split='test')

        print(self.train_dataset)
        print(self.val_dataset)
        #print(self.test_dataset)

        # find image sizes
        grd_sample, sat_sample, seg_sample = self.train_dataset[0]
        grd_image = v2.ToImage()(Image.open(os.path.join(self.data_dir, grd_sample)))
        sat_image = v2.ToImage()(Image.open(os.path.join(self.data_dir, sat_sample)))
        seg_image = v2.ToImage()(Image.open(os.path.join(self.data_dir, seg_sample)))

        self.original_size['grd'] = grd_image.size()[1:3]
        self.original_size['sat'] = sat_image.size()[1:3]
        self.original_size['seg'] = seg_image.size()[1:3]

        self.size['grd'] = grd_image.size()[1:3]
        self.size['sat'] = sat_image.size()[1:3]
        self.size['seg'] = seg_image.size()[1:3]

        # compute image new sizes
        if self.resize['grd']:
          self.size['grd'] = v2.Resize((self.resize['grd']))(grd_image).size()[1:3]
        if self.resize['sat']:
          self.size['sat'] = v2.Resize((self.resize['sat']))(sat_image).size()[1:3]
        if self.resize['seg']:
          self.size['seg'] = v2.Resize((self.resize['seg']))(seg_image).size()[1:3]

        # compute transforms
        self.transform['grd'] = v2.Compose([
            v2.ToImage(),
            v2.Resize(self.size['grd']),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(self.mean['grd'], self.std['grd'], inplace=False)
        ])

        self.transform['sat'] = v2.Compose([
            v2.ToImage(),
            v2.Resize(self.size['sat']),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(self.mean['sat'], self.std['sat'], inplace=False)
        ])

        self.transform['seg'] = v2.Compose([
            v2.ToImage(),
            v2.Resize(self.size['seg']),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(self.mean['seg'], self.std['seg'], inplace=False),
        ])


    #collate function is useful so we don't overuse RAM, training is a little bit slower tho...
    def collate_fn(self,batch):

        grd_path, sat_path, seg_path = zip(*batch)

        # load and transform each image in the batch
        grd_ids, grd_images = self.__compute_images(grd_path, 'grd')
        sat_ids, sat_images = self.__compute_images(sat_path, 'sat')
        seg_ids, seg_images = self.__compute_images(seg_path, 'seg')

        grd_samples = {'imgs': grd_images, 'imgs_id': grd_ids}
        sat_samples = {'imgs': sat_images, 'imgs_id': sat_ids}
        seg_samples = {'imgs': seg_images, 'imgs_id': seg_ids}

        return grd_samples, sat_samples, seg_samples


    # we could add transformations (first of all normalization of the input!)
    def __compute_images(self, paths, img_type):
        images = []
        ids = []

        for img_path in paths:
            img = Image.open(os.path.join(self.data_dir, img_path))
            img = self.transform[img_type](img)
            images.append(img)
            ids.append(int(img_path[-11:-4]))

        # Stack the image tensors along the batch dimension
        images_tensor = torch.stack(images)
        ids_tensor = torch.tensor(ids, dtype=int)
        return ids_tensor, images_tensor


    @staticmethod
    def combine_segmap(image, segmap, sigma):
        assert image.shape == segmap.shape
        combined = sigma * image + (1-sigma) * segmap
        return combined


    def train_dataloader(self):
        return DataLoader(self.train_dataset,batch_size=self.batch_size,collate_fn=self.collate_fn,shuffle=True,num_workers=2)


    def val_dataloader(self):
        return DataLoader(self.val_dataset,batch_size=self.batch_size,collate_fn=self.collate_fn,shuffle=False,num_workers=2)


    #def test_dataloader(self):
    #    return DataLoader(self.test_dataset,batch_size=self.batch_size, collate_fn=self.collate_fn,shuffle=True,num_workers=2)


    def compute_mean_std(self):

        sv_mean = np.array([0., 0., 0.])
        sv_std = np.array([0., 0., 0.])

        sm_mean = np.array([0., 0., 0.])
        sm_std = np.array([0., 0., 0.])

        # compute mean
        for i in self.train_dataset.data:

            sv_path = os.path.join(self.data_dir, i['grd_path'])
            sv_img = cv2.imread(sv_path)
            sv_img = cv2.cvtColor(sv_img, cv2.COLOR_BGR2RGB)
            sv_img = sv_img.astype(float) / 255.
            sv_mean += np.mean(sv_img[:,:,:], axis = (0,1))

            sm_path = os.path.join(self.data_dir, i['sat_path'])
            sm_img = cv2.imread(sm_path)
            sm_img = cv2.cvtColor(sm_img, cv2.COLOR_BGR2RGB)
            sm_img = sm_img.astype(float) / 255.
            sm_mean += np.mean(sm_img[:,:,:], axis = (0,1))

        sv_mean /= len(self.train_dataset.data)
        sm_mean /= len(self.train_dataset.data)

        # compute std
        for i in self.train_dataset.data:

            sv_path = os.path.join(self.data_dir, i['grd_path'])
            sv_img = cv2.imread(sv_path)
            sv_img = cv2.cvtColor(sv_img, cv2.COLOR_BGR2RGB)
            sv_img = sv_img.astype(float) / 255.
            sv_img_size = sv_img.shape[0] * sv_img.shape[1]
            sv_std += ((sv_img[:,:,:] - sv_mean)**2).sum(axis = (0,1)) / sv_img_size

            sm_path = os.path.join(self.data_dir, i['sat_path'])
            sm_img = cv2.imread(sm_path)
            sm_img = cv2.cvtColor(sm_img, cv2.COLOR_BGR2RGB)
            sm_img = sm_img.astype(float) / 255.
            sm_img_size = sm_img.shape[0] * sm_img.shape[1]
            sm_std += ((sm_img[:,:,:] - sv_mean)**2).sum(axis = (0,1)) / sm_img_size

        sv_std = np.sqrt(sv_std/len(self.train_dataset.data))
        sm_std = np.sqrt(sm_std/len(self.train_dataset.data))

        # does it make sense to normalize the segmap?

        result = {'grd_mean': sv_mean, 'grd_std': sv_std,
                  'sat_mean': sm_mean, 'sat_std': sm_std,
                  'seg_mean': [0,0,0], 'seg_std': [1,1,1]
        }

        return result


    def set_mean_std(self, mean_std):
        self.mean['grd'] = mean_std['grd_mean']
        self.mean['sat'] = mean_std['sat_mean']
        self.mean['seg'] = mean_std['seg_mean']
        self.std['grd'] = mean_std['grd_std']
        self.std['sat'] = mean_std['sat_std']
        self.std['seg'] = mean_std['seg_std']

In [None]:
# @title Creating dataloaders

input_dir = '/content/input'

data_module = CVUSADataModule(
    input_dir = input_dir,
    batch_size = 64,
    grd_resize = 64,
    sat_resize = 128,
    seg_resize = 128
)

# mean_std = data_module.compute_mean_std()
mean_std = {
    'grd_mean': [0.4691, 0.4821, 0.4603], 'grd_std': [0.2202, 0.2191, 0.2583],
    'sat_mean': [0.3833 , 0.3964, 0.3434], 'sat_std': [0.2131, 0.2024, 0.2259],
    'seg_mean': [0, 0, 0], 'seg_std': [1, 1, 1]
}
data_module.set_mean_std(mean_std)

data_module.setup()

train_loader = data_module.train_dataloader()
val_loader = data_module.val_dataloader()
#test_loader = data_module.test_dataloader()

CVUSA-Dataset-train: 6647 samples
CVUSA-Dataset-val: 2215 samples


#Losses and other utilities

In [None]:
#@title Implementation TripletLoss more stable

# RIVEDI (confronto con triplet torchreid)

class TripletLoss(pl.LightningModule):

    def __init__(self, loss_weight = 0.3):
        super().__init__()
        self.loss_weight = loss_weight


    def forward(self, image_features1, image_features2):
        image_features1 = F.normalize(image_features1, dim=-1)
        image_features2 = F.normalize(image_features2, dim=-1)
        dist_array = 2.0 - 2.0 * torch.matmul(image_features2, image_features1.T)
        n = len(image_features1)
        pos_dist = torch.diag(dist_array)
        pair_n = n * (n - 1.0)
        triplet_dist_g2s = pos_dist - dist_array
        loss_g2s = torch.sum(torch.log(1.0 + torch.exp(triplet_dist_g2s * self.loss_weight)))/pair_n
        triplet_dist_s2g = torch.unsqueeze(pos_dist, 1) - dist_array
        loss_s2g = torch.sum(torch.log(1.0 + torch.exp(triplet_dist_s2g * self.loss_weight)))/pair_n
        loss = (loss_g2s + loss_s2g) / 2.0

        return loss

In [None]:
#@title InfoNCE implementation
class InfoNCE(pl.LightningModule):

    def __init__(self, loss_function, logit_scale=3.0):
        super().__init__()

        self.loss_function = loss_function #we can use a generic loss function!
        self.logit_scale = logit_scale


    def forward(self, image_features1, image_features2):

        image_features1 = F.normalize(image_features1, dim=-1)
        image_features2 = F.normalize(image_features2, dim=-1)

        # use pairwise_cosine_similarity instead? it's the same?
        logits_per_image1 = self.logit_scale * image_features1 @ image_features2.T
        logits_per_image2 = logits_per_image1.T

        labels = torch.arange(len(logits_per_image1), dtype=torch.long, device = device)
        loss = (self.loss_function(logits_per_image1, labels) + self.loss_function(logits_per_image2, labels))/2

        return loss

In [None]:
# @title Top-K Rank Accuracy: takes embeddings in input

def top_k_rank_accuracy(emb1, emb2, k=1):

    num_samples = len(emb1)

    if k > num_samples :
      return 0.0 # might happen at the end of the dataset (batch less then the chosen one)

    # replace with pairwise_cosine?
    emb1 = F.normalize(emb1, dim=-1)
    emb2 = F.normalize(emb2, dim=-1)
    dist_matrix = 1 - (emb1 @ emb2.T)

    _, topk_indices = torch.topk(dist_matrix, k = k, dim = 1, largest = False)

    correct_in_topk = sum([i in topk_indices[i, :] for i in range(num_samples)])

    accuracy = correct_in_topk / num_samples
    return accuracy

In [None]:
#@title Implementation of Attention operator
class Attention(pl.LightningModule):

    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):

        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
        self.scale = qk_scale or head_dim ** -0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)


    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

# Branches

In [None]:
# @title Resnet

class ResNet50Branch(pl.LightningModule):

    def __init__(self, output_dim):
        super(ResNet50Branch, self).__init__()
        self.output_dim = output_dim #so we can access from super-models
        self.resnet50 = models.resnet50(weights = models.ResNet50_Weights.DEFAULT)
        # Modify the last layer for your specific task
        self.resnet50.fc = torch.nn.Linear(self.resnet50.fc.in_features, self.output_dim)


    def forward(self, x, featuremaps = False):

        # to print the featuremap we need to return the last conv layer output
        if featuremaps:
            x = self.resnet50.conv1(x)
            x = self.resnet50.bn1(x)
            x = self.resnet50.relu(x)
            x = self.resnet50.maxpool(x)
            x = self.resnet50.layer1(x)
            x = self.resnet50.layer2(x)
            x = self.resnet50.layer3(x)
            x = self.resnet50.layer4(x)
            return x

        else:
            return self.resnet50(x)

In [None]:
#@title SAIG


class ConvBnReluBlock(pl.LightningModule):

    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super(ConvBnReluBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)


    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

###

class Block(pl.LightningModule):

    def __init__(
        self,
        dim,
        num_heads,
        qkv_bias=False,
        qk_scale=None,
        drop=0.,
        attn_drop=0.,
        dropout=0.,
        norm_layer=nn.LayerNorm
    ):

        super().__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop
        )
        # check what is droppath
        self.dropout = nn.Dropout(dropout) if dropout > 0. else nn.Identity()


    def forward(self, x):
        B, N, C = x.shape
        x = x + self.dropout(self.attn(self.norm1(x)))
        return x

###

class SAIGBranch(pl.LightningModule):

    def __init__(self, img_size, patch_size=16, in_channels=3, embed_dim=768, num_heads = 8, depth = 4, smd_dim = 8, qkv_bias = True, qk_scale = None, drop_rate=0., attn_drop_rate=0., norm_layer=None, flatten=True):
        super(SAIGBranch, self).__init__()
        #potremmo salvare i parametri, ha qualche senso?
        self.output_dim = embed_dim * smd_dim

        self.img_size = img_size
        self.patch_size = patch_size
        self.grid_size = (img_size[0] // patch_size, img_size[1] // patch_size)

        if img_size[0] % patch_size != 0 or img_size[1] % patch_size != 0:
          print("Warning: image size is not divisible for patch size")

        self.num_patches = self.grid_size[0] * self.grid_size[1]

        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

        self.conv_bn_relu_blocks = nn.Sequential(
            ConvBnReluBlock(in_channels = 3, out_channels = 64, stride = 2),
            ConvBnReluBlock(in_channels = 64, out_channels = 128, stride = 2),
            ConvBnReluBlock(in_channels = 128, out_channels = 128, stride = 1),
            ConvBnReluBlock(in_channels = 128, out_channels = 256, stride = 2),
            ConvBnReluBlock(in_channels = 256, out_channels = 256, stride = 1),
            ConvBnReluBlock(in_channels = 256, out_channels = 512, stride = 2),
        )
        self.patch_block = nn.Conv2d(in_channels = 512, out_channels = embed_dim, kernel_size=1, stride=1 ,padding=0)
        self.attn_blocks = nn.ModuleList([
            Block(
                dim=embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
                drop=drop_rate, attn_drop=attn_drop_rate)
            for i in range(depth)])

        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches, embed_dim))
        #self.pos_embed = nn.Parameter(torch.zeros(1, embed_dim, self.num_patches))
        self.pos_drop = nn.Dropout(p=drop_rate)

        #self.GAP = nn.AdaptiveAvgPool1d(1)

        #self.logits = nn.Linear(in_features = embed_dim, out_features = 512)

        self.smd = nn.Sequential(
            nn.Linear(self.num_patches, self.num_patches*4),
            nn.GELU(),
            nn.Linear(self.num_patches*4, self.num_patches),
            nn.Linear(self.num_patches, smd_dim)
        )


    def forward(self, x, featuremaps = False):

      # extract patch embeddings
      x = self.conv_bn_relu_blocks(x)

      if featuremaps:
        return x

      x = self.patch_block(x)
      x = x.flatten(2).transpose(1,2)
      #x = self.norm(x) CHECK

      # add position embeddings
      x = x + self.pos_embed
      x = self.pos_drop(x)

      # pass through sequence of attention blocks
      for blk in self.attn_blocks:
          x = blk(x)

      x = self.norm(x)
      # x = self.GAP(x.transpose(-1, -2)).squeeze(2)
      # x = self.logits(x)

      # if featuremaps:
      #   return x.resize(x.shape[0], self.grid_size[0], self.grid_size[1], 384)

      # x: b x 88 x 384
      x = x.transpose(-1, -2)
      x = self.smd(x)
      x = x.transpose(-1, -2)
      x = x.flatten(-2, -1)

      return x

In [None]:
#@title VGG16

class VGG_net(pl.LightningModule):

    def __init__(self, img_size, architecture, num_max_pooling, in_channels=3, output_dim=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.output_dim = output_dim
        self.division = 2 ** num_max_pooling

        self.conv_layers = self.create_conv_layers(architecture)

        self.fcs = nn.Sequential(
            nn.Linear(128 * img_size[0]//self.division * img_size[1]//self.division, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, output_dim),
        )


    def forward(self, x, featuremaps=False):
        for layer in self.conv_layers:
            x = layer(x)

            if featuremaps and x.shape[2] == 32:
                return x

        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x


    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(in_channels, out_channels, 3, 1, 1),
                    nn.BatchNorm2d(out_channels),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(2, 2)]

        return nn.ModuleList(layers)

In [None]:
#@title VGG16 - pretrained
class VGGBranch(pl.LightningModule):

    def __init__(self, output_dim):
        super(VGGBranch, self).__init__()
        self.output_dim = output_dim #so we can access from super-models
        self.vgg = models.vgg16(pretrained=True)
        # Modify the last layer for your specific task
        num_features = self.vgg.classifier[-1].in_features
        self.vgg.classifier[-1] = nn.Linear(num_features, self.output_dim)

    def forward(self, x, featuremaps=False):

        # to print the featuremap we need to return the last conv layer output
        if featuremaps:
            features = self.vgg.features(x)
            return features

        else:
            return self.vgg(x)


# Dual Model (RGB Grd | RGB Sat)

In [None]:
# @title Dual Model

class DualModel(pl.LightningModule):

    def __init__(self, model_grd, model_sat, loss = InfoNCE(loss_function=nn.CrossEntropyLoss())):
        super(DualModel, self).__init__()
        self.branch1 = model_grd
        self.branch2 = model_sat

        if self.branch1.output_dim != self.branch2.output_dim:
          raise ValueError("ATTENTION, MISMATCHING OUTPUT DIMENSIONS FOR THE BRANCHES!")

        self.output_dim = self.branch1.output_dim

        self.loss = loss

        #train
        #self.grd_features_train =torch.empty((0, self.output_dim), device = device)
        #self.sat_features_train =torch.empty((0, self.output_dim), device = device)

        #validation
        self.grd_features_val =  []
        self.sat_features_val =  []
        #test
        self.grd_features_test = []
        self.sat_features_test = []



    def forward(self, x1, x2):
        out1 = self.branch1(x1['imgs'])
        out2 = self.branch2(x2['imgs'])
        return out1, out2


    def training_step(self, batch, batch_idx):
        grd_img, sat_img, _ = batch
        out1, out2 = self(grd_img, sat_img)

        loss = self.loss(out1, out2)

        top_1 = top_k_rank_accuracy(out1, out2, k=1)

        self.log('train_top1', top_1, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss


    def validation_step(self, batch, batch_idx):
        grd_img, sat_img, _ = batch
        out1, out2 = self(grd_img, sat_img)

        self.grd_features_val.append(out1)
        self.sat_features_val.append(out2)


    def on_validation_epoch_end(self):

      grd_features_val = torch.cat(self.grd_features_val, dim=0)
      sat_features_val = torch.cat(self.sat_features_val, dim=0)

      num_samples = grd_features_val.shape[0]
      percent1 = int(0.01*num_samples)

      top_1 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=1)
      top_3 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=3)
      top_10 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=10)
      top_percent1 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=percent1)

      self.log('val_top1', top_1, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top3', top_3, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top10', top_10, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top1%', top_percent1, on_step=False, on_epoch=True, prog_bar=True)

      #we deallocate the memory!

      #validation
      self.grd_features_val.clear()
      self.sat_features_val.clear()

      del grd_features_val, sat_features_val

      return top_1, top_3, top_10, top_percent1


    def test_step(self, batch, batch_idx):
        grd_img, sat_img, _ = batch
        out1, out2 = self(grd_img, sat_img)

        self.grd_features_test.append(out1)
        self.sat_features_test.append(out2)


    def on_test_epoch_end(self):

      grd_features_test = torch.cat(self.grd_features_test, dim = 0)
      sat_features_test = torch.cat(self.sat_features_test, dim = 0)

      num_samples = grd_features_test.shape[0]
      percent1 = int(0.01*num_samples)

      top_1 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=1)
      top_3 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=3)
      top_10 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=10)
      top_percent1 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=percent1)

      self.log('test_top1', top_1, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top3', top_3, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top10', top_10, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top1%', top_percent1, on_step=False, on_epoch=True, prog_bar=True)

      #we deallocate the memory!

      self.grd_features_test.clear()
      self.sat_features_test.clear()

      del grd_features_test, sat_features_test

      return top_1, top_3, top_10, top_percent1


    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [None]:
#@title Create Model

branch_type = 'resnet'

###

if branch_type == 'resnet':

    grd_model = ResNet50Branch(128)
    sat_model = ResNet50Branch(128)


elif branch_type == 'saig':

    grd_model = SAIGBranch(
        data_module.size['grd'],
        embed_dim= 256,
        num_heads = 4,
        depth = 4,
        smd_dim = 8
    )

    sat_model = SAIGBranch(
        data_module.size['sat'],
        embed_dim= 256,
        num_heads = 4,
        depth = 4,
        smd_dim = 8
    )


elif branch_type == 'vgg':

    # Output channel of each layer in the convolution layers
    # "M" stands for maxpooling layer
    #VGG16 = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"]
    #VGG16 = [32, 32, "M", 64, 64, "M", 128, 128, 128, "M", 256, 256, 256, "M", 256, 256, 256, "M"]
    VGG16 = [16, 16, "M", 32, 32, "M", 64, 64, 64, "M", 128, 128, 128, "M", 128, 128, 128, "M"]

    grd_model = VGG_net(data_module.size['grd'], VGG16, VGG16.count("M"), output_dim = 100)
    sat_model = VGG_net(data_module.size['sat'], VGG16, VGG16.count("M"), output_dim = 100)

###

model = DualModel(grd_model, sat_model)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 166MB/s]


In [None]:
# @title Create Trainer

trainer = pl.Trainer(
    max_epochs = 30,
    devices = 1,
    callbacks = [RichProgressBar()],
    log_every_n_steps = 3,
    default_root_dir = "/content/"
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
# @title Train

trainer.fit(
    model = model,
    train_dataloaders = train_loader,
    val_dataloaders = val_loader
)

In [None]:
# @title Test

trainer.test(
    model = model,
    dataloaders = val_loader
)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

[{'test_top1': 0.007223476190119982,
  'test_top3': 0.016704289242625237,
  'test_top10': 0.05462753772735596,
  'test_top1%': 0.0}]

# Triple Model (RGB Grd | RGB + SEG Sat)

In [None]:
#@title TripleModel

class TripleModel(pl.LightningModule):

    def __init__(self, model_grd, model_sat, model_seg, loss = InfoNCE(loss_function=nn.CrossEntropyLoss())):
        super(TripleModel, self).__init__()
        self.branch1 = model_grd
        self.branch2 = model_sat
        self.branch3 = model_seg

        #verify that grd output dim is coherent with the sum of the other two models
        if self.branch1.output_dim != (self.branch2.output_dim + self.branch3.output_dim):
          raise ValueError("ATTENTION, MISMATCHING OUTPUT DIMENSIONS FOR THE BRANCHES! Must have output_dim1 = output_dim2+output_dim3")

        self.output_dim = self.branch1.output_dim

        self.loss = loss

        #train
        #self.grd_features_train =torch.empty((0, self.output_dim), device = device)
        #self.sat_features_train =torch.empty((0, self.output_dim), device = device)
        #self.seg_features_train =torch.empty((0, self.output_dim), device = device)

        #validation
        self.grd_features_val = []
        self.sat_features_val = [] #IMPORTANTE: SAT é SIA SAT CHE SEG!!!! LE COMBINIAMO!

        #test
        self.grd_features_test = []
        self.sat_features_test = []



    def forward(self, x1, x2, x3):
        out1 = self.branch1(x1['imgs'])
        out2 = self.branch2(x2['imgs'])
        out3 = self.branch3(x3['imgs'])
        out2 = torch.cat((out2, out3), dim=1) #concatenation of features
        return out1, out2


    def training_step(self, batch, batch_idx):
        grd_img, sat_img, seg_img = batch
        out1, out2 = self(grd_img, sat_img, seg_img)

        loss = self.loss(out1, out2)

        top_1 = top_k_rank_accuracy(out1, out2, k=1)

        self.log('train_top1', top_1, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        grd_img, sat_img, seg_img = batch
        out1, out2 = self(grd_img, sat_img, seg_img)

        self.grd_features_val.append(out1)
        self.sat_features_val.append(out2)


    def on_validation_epoch_end(self):

      grd_features_val = torch.cat(self.grd_features_val, dim=0)
      sat_features_val = torch.cat(self.sat_features_val, dim=0)

      num_samples = grd_features_val.shape[0]
      percent1 = int(0.01*num_samples)

      top_1 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=1)
      top_3 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=3)
      top_10 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=10)
      top_percent1 = top_k_rank_accuracy(grd_features_val, sat_features_val, k=percent1)

      self.log('val_top1', top_1, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top3', top_3, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top10', top_10, on_step=False, on_epoch=True, prog_bar=True)
      self.log('val_top1%', top_percent1, on_step=False, on_epoch=True, prog_bar=True)

      #we deallocate the memory!

      #validation
      self.grd_features_val.clear()
      self.sat_features_val.clear()

      del grd_features_val, sat_features_val

      return top_1, top_3, top_10, top_percent1


    def test_step(self, batch, batch_idx):
        grd_img, sat_img, seg_img = batch
        out1, out2 = self(grd_img, sat_img, seg_img)

        self.grd_features_test.append(out1)
        self.sat_features_test.append(out2)


    def on_test_epoch_end(self):

      grd_features_test = torch.cat(self.grd_features_test, dim = 0)
      sat_features_test = torch.cat(self.sat_features_test, dim = 0)

      num_samples = grd_features_test.shape[0]
      percent1 = int(0.01*num_samples)

      top_1 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=1)
      top_3 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=3)
      top_10 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=10)
      top_percent1 = top_k_rank_accuracy(grd_features_test, sat_features_test, k=percent1)

      self.log('test_top1', top_1, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top3', top_3, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top10', top_10, on_step=False, on_epoch=True, prog_bar=True)
      self.log('test_top1%', top_percent1, on_step=False, on_epoch=True, prog_bar=True)

      #we deallocate the memory!

      self.grd_features_test.clear()
      self.sat_features_test.clear()

      del grd_features_test, sat_features_test

      return top_1, top_3, top_10, top_percent1


    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [None]:
# @title Create Model

grd_model = SAIGBranch(data_module.size['grd'], embed_dim=512)
sat_model = SAIGBranch(data_module.size['sat'], embed_dim=256) #deals directly by itself with resizing
seg_model = SAIGBranch(data_module.size['seg'], embed_dim=256) #deals directly by itself with resizing


model = TripleModel(grd_model, sat_model, seg_model)

In [None]:
trainer = pl.Trainer(
    max_epochs = 30,
    devices = 1,
    callbacks = [RichProgressBar()],
    log_every_n_steps = 3
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(
    model = model,
    train_dataloaders = train_loader,
    val_dataloaders = val_loader
)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [None]:
trainer.test(
    dataloaders = val_loader
)

# Visualization Functions

In [None]:
# @title Visualize Heatmap


@torch.no_grad()
def create_activation_maps(
    model,
    data_module,
    split,
    save_dir,
    use_gpu = True
):

    model.eval()
    spacing = 10
    fading = 0.5

    if split == 'train':
        data_loader = data_module.train_dataloader()
    elif split == 'test':
        data_loader = data_module.test_dataloader()
    elif split == 'val':
        data_loader = data_module.val_dataloader()
    else:
        raise ValueError('split should be "train", "test" or "val"')

    grd_mean = data_module.mean['grd']
    grd_std = data_module.std['grd']
    sat_mean = data_module.mean['sat']
    sat_std = data_module.std['sat']

    grd_height, grd_width = data_module.size['grd']
    sat_height, sat_width = data_module.size['sat']

    for batch_idx, batch in enumerate(data_loader):

        grd_imgs, grd_ids = batch[0]['imgs'], batch[0]['imgs_id']
        sat_imgs, sat_ids = batch[1]['imgs'], batch[1]['imgs_id']

        if use_gpu:
            grd_imgs = grd_imgs.cuda()
            sat_imgs = sat_imgs.cuda()

        grd_output = model.branch1(grd_imgs, featuremaps=True)
        sat_output = model.branch2(sat_imgs, featuremaps=True)

        # compute activation maps for streetview (try adding square root?)
        grd_output = (grd_output**2).sum(1)
        b, h, w = grd_output.size()
        grd_output = grd_output.view(b, h * w)
        grd_output = nn.functional.normalize(grd_output, p=2, dim=1)
        grd_output = grd_output.view(b, h, w)

        # compute activation maps for satmap
        sat_output = (sat_output**2).sum(1)
        b, h, w = sat_output.size()
        sat_output = sat_output.view(b, h * w)
        sat_output = nn.functional.normalize(sat_output, p=2, dim=1)
        sat_output = sat_output.view(b, h, w)

        if use_gpu:
            grd_imgs, grd_output = grd_imgs.cpu(), grd_output.cpu()
            sat_imgs, sat_output = sat_imgs.cpu(), sat_output.cpu()

        for index in range(grd_output.size(0)):

            # get image name
            img_id = str(int(grd_ids[index])).zfill(7)

            # RGB image (from the normalized input image)
            input_img = grd_imgs[index, ...]
            for img, mean, std in zip(input_img, grd_mean, grd_std):
                img.mul_(std).add_(mean).clamp_(0, 1)
            input_img = np.uint8(np.floor(input_img.numpy() * 255))
            input_img = input_img.transpose((1, 2, 0))

            # activation map (from the output image)
            act_map = grd_output[index, ...].numpy()
            act_map = cv2.resize(act_map, (grd_width, grd_height))
            act_map = 255 * (act_map - np.min(act_map)) / (np.max(act_map) - np.min(act_map) + 1e-12)
            act_map = np.uint8(np.floor(act_map))
            act_map = cv2.applyColorMap(act_map, cv2.COLORMAP_JET)

            # overlapping between the two images
            overlapped_img = input_img*(1-fading) + act_map*fading
            overlapped_img[overlapped_img > 255] = 255
            overlapped_img = overlapped_img.astype(np.uint8)

            # save images in a single figure (add white spacing between images)
            output_img = 255 * np.ones((3*grd_height + 2*spacing, grd_width, 3), dtype=np.uint8)
            output_img[:grd_height, ...] = input_img[..., ::-1]
            output_img[grd_height + spacing:2*grd_height + spacing, ...] = act_map
            output_img[2*grd_height + 2*spacing:, ...] = overlapped_img
            cv2.imwrite(os.path.join(save_dir, img_id + '_streetview.jpg'), output_img)

        for index in range(sat_output.size(0)):

            # get image name
            img_id = str(int(sat_ids[index])).zfill(7)

            # RGB image (input image)
            input_img = sat_imgs[index, ...]
            for img, mean, std in zip(input_img, sat_mean, sat_std):
                img.mul_(std).add_(mean).clamp_(0, 1)
            input_img = np.uint8(np.floor(input_img.numpy() * 255))
            input_img = input_img.transpose((1, 2, 0))

            # activation map
            act_map = sat_output[index, ...].numpy()
            act_map = cv2.resize(act_map, (sat_width, sat_height))
            act_map = 255 * (act_map - np.min(act_map)) / (np.max(act_map) - np.min(act_map) + 1e-12)
            act_map = np.uint8(np.floor(act_map))
            act_map = cv2.applyColorMap(act_map, cv2.COLORMAP_JET)

            # overlapped image
            overlapped_img = input_img*(1-fading) + act_map*(fading)
            overlapped_img[overlapped_img > 255] = 255
            overlapped_img = overlapped_img.astype(np.uint8)

            # save images in a single figure (add white spacing between images)
            output_img = 255 * np.ones((3*sat_height + 2*spacing, sat_width, 3), dtype=np.uint8)
            output_img[:sat_height, ...] = input_img[..., ::-1]
            output_img[sat_height + spacing:2*sat_height + spacing, ...] = act_map
            output_img[2*sat_height + 2*spacing:, ...] = overlapped_img
            cv2.imwrite(os.path.join(save_dir, img_id + '_satmap.jpg'), output_img)

In [None]:
# @title Visualize Ranked Results


@torch.no_grad()
def visualize_ranked_results(
    model,
    data_module,
    split,
    save_dir,
    top_k = 5,
    use_gpu = True
):

    spacing = 10
    query_spacing = 30
    border = 5
    text_space = 30

    # select dataloader
    if split == 'train':
        data_loader = data_module.train_dataloader()
    elif split == 'test':
        data_loader = data_module.test_dataloader()
    elif split == 'val':
        data_loader = data_module.val_dataloader()
    else:
        raise ValueError('split should be "train", "test" or "val"')

    # (using data module dimensions)
    grd_height, grd_width = data_module.original_size['grd']
    sat_height, sat_width = data_module.original_size['sat']

    grd_ids = np.empty((0))
    sat_ids = np.empty((0))

    model.grd_features_test = []
    model.sat_features_test = []

    # compute features for each image
    for batch_idx, batch in enumerate(data_loader):

        grd_ids = np.concatenate((grd_ids, batch[0]['imgs_id']))
        sat_ids = np.concatenate((sat_ids, batch[0]['imgs_id']))

        if use_gpu:
            batch[0]['imgs'] = batch[0]['imgs'].cuda()
            batch[1]['imgs'] = batch[1]['imgs'].cuda()
            batch[2]['imgs'] = batch[2]['imgs'].cuda()

        model.test_step(batch, batch_idx)

        if use_gpu:
            batch[0]['imgs'] = batch[0]['imgs'].cpu()
            batch[1]['imgs'] = batch[1]['imgs'].cpu()
            batch[2]['imgs'] = batch[2]['imgs'].cpu()

    grd_features = torch.cat(model.grd_features_test, dim = 0)
    sat_features = torch.cat(model.sat_features_test, dim = 0)

    if use_gpu:
        grd_features = grd_features.cpu()
        sat_features = sat_features.cpu()

    num_samples = grd_features.shape[0]

    model.grd_features = []
    model.sat_features = []

    # compute distance matrix
    grd_features = F.normalize(grd_features, dim=-1)
    sat_features = F.normalize(sat_features, dim=-1)
    dist_matrix = 1 - grd_features @ sat_features.T
    indices = np.argsort(dist_matrix, axis=1)

    for grd_index in range(num_samples):

        # create empty output image
        output_height = grd_height + query_spacing + sat_height + text_space
        output_width = max(grd_width, top_k*sat_width + (top_k-1)*spacing)
        output_img = 255 * np.ones((output_height, output_width, 3), dtype=np.uint8)

        # create query image with black border
        grd_id = str(int(grd_ids[grd_index])).zfill(7)
        grd_path = data_module.data_dir + '/streetview/' + grd_id + '.jpg'
        color = (0,0,0)
        grd_img = cv2.imread(grd_path)
        grd_img = cv2.resize(grd_img, (grd_width, grd_height))
        grd_img = cv2.copyMakeBorder(grd_img, border, border, border, border, cv2.BORDER_CONSTANT, value = color)
        grd_img = cv2.resize(grd_img, (grd_width, grd_height))

        # add query image to the output
        start_width = (output_width - grd_width) // 2
        end_width = start_width + grd_width
        output_img[:grd_height, start_width:end_width, :] = grd_img

        rank = 1
        for sat_index in indices[grd_index, :]:

            # create ranked image with red or green border
            sat_id = str(int(sat_ids[sat_index])).zfill(7)
            sat_path = data_module.data_dir + '/bingmap/input' + sat_id + '.png'
            color = (0, 255, 0) if (grd_id == sat_id) else (0, 0, 255)
            sat_img = cv2.imread(sat_path)
            sat_img = cv2.resize(sat_img, (sat_width, sat_height))
            sat_img = cv2.copyMakeBorder(sat_img, border, border, border, border, cv2.BORDER_CONSTANT, value = color)
            sat_img = cv2.resize(sat_img, (sat_width, sat_height))

            # add ranked image to the output
            start_height = grd_height+query_spacing
            end_height = start_height + sat_height
            start_width = (rank-1) * (sat_width + spacing)
            end_width = start_width + sat_width
            output_img[start_height:end_height, start_width:end_width, :] = sat_img

            # add text about the ranked image
            text = "Rank: {} Distance:{:.4f}".format(rank, dist_matrix[grd_index, sat_index])
            bottom_left = (start_width + 10, output_height - 5)
            cv2.putText(output_img, text, bottom_left, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 1, 2)

            rank += 1
            if rank > top_k:
                break

        # create output image file
        cv2.imwrite(os.path.join(save_dir, grd_id + '_visrank.jpg'), output_img)

In [None]:
!rm -f -r '/content/output/log/activation_maps'
!mkdir '/content/output/log/activation_maps'

create_activation_maps(
    model = model.to(device),
    data_module = data_module,
    split = 'val',
    save_dir = '/content/output/log/activation_maps',
    use_gpu = True
)

In [None]:
!rm -f -r '/content/output/log/ranked_results'
!mkdir '/content/output/log/ranked_results'

visualize_ranked_results(
    model = model.to(device),
    data_module = data_module,
    split = 'val',
    save_dir = '/content/output/log/ranked_results',
    top_k = 5,
    use_gpu = True
)