# Config

In [1]:
import os, sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

class Config:
    batch_size = 64
    num_workers = 2
    num_to_rerank = 100
    seed = 717171
    embed_dim = 1024
    CACHE_DIR = '/kaggle/temp/'
    
hyperparameters = Config()
os.makedirs(hyperparameters.CACHE_DIR, exist_ok = True)

# Installation

In [2]:
!pip install ../input/pkg-wheels/einops-0.3.2-py3-none-any.whl
!pip install ../input/pkg-wheels/faiss_gpu-1.6.3-cp37-cp37m-manylinux2010_x86_64.whl
!cp -r ../input/loftr-repo/ /kaggle/temp/
sys.path.append('/kaggle/temp/loftr-repo/')

Processing /kaggle/input/pkg-wheels/einops-0.3.2-py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.3.2
Processing /kaggle/input/pkg-wheels/faiss_gpu-1.6.3-cp37-cp37m-manylinux2010_x86_64.whl
Installing collected packages: faiss-gpu
Successfully installed faiss-gpu-1.6.3


# Imports

In [3]:
from scipy.spatial import cKDTree
from scipy import spatial
import pydegensac
import copy
import os
import numpy as np # linear algebra
import pandas as pd 
import random
from collections import defaultdict
from tqdm.auto import tqdm
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn. model_selection import KFold, StratifiedKFold
import torch, cv2
import torch.nn as nn
from PIL import Image
from torch.optim import Adam, SGD
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import warnings, math
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.nn.parameter import Parameter
import albumentations as A
import timm, gc
from sklearn.cluster import DBSCAN as dbscan
from src.loftr import LoFTR, default_cfg
import csv, shutil, glob, pickle
warnings.filterwarnings("ignore")

def seed_torch(seed=hyperparameters.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch()

# Dataset

In [4]:
class LandmarkDataset(Dataset):
    def __init__(self,image_ids, mode = 'train', prob_type = 'retrieval'):
        self.image_ids = image_ids
        
        if mode in ('train', 'index', 'test'):
#             self.file_path = f'../input/landmark-recognition-2021/{mode}'
            self.file_path = f'../input/landmark-{prob_type}-2021/{mode}'
        elif mode == 'nolandmark':
            self.file_path = f'../input/google-landmark-2021-validation/valid'
        else:
            raise
            
        self.transform = self._build_augmentation()
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        file_     = self.image_ids[idx]
        file_path = f'{self.file_path}/{file_[0]}/{file_[1]}/{file_[2]}/{file_}.jpg'
        raw_image = cv2.imread(file_path)
        img       = self.process_img(cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB))
        img       = img.transpose(2,0,1)
        return torch.from_numpy(img)
    
    def process_img(self, img):
        if self.transform is not None:
            img = self.transform(image = img)['image']
        img = img.astype(np.float32)
        img = self.normalize(img)
        return img
        
    def normalize(self, img):
        mean = np.array([123.675, 116.28 , 103.53 ], dtype=np.float32)
        std = np.array([58.395   , 57.120, 57.375   ], dtype=np.float32)
        img = img.astype(np.float32)
        img -= mean
        img *= np.reciprocal(std, dtype=np.float32)
        return img
    
    def _build_augmentation(self):
        return  A.Compose([
                A.SmallestMaxSize(512),
                A.CenterCrop(height=448,width=448,p=1.)
            ])
    
class GLRTRFDataset(Dataset):
    def __init__(self, feat, pair_tuples):
        self.feat = feat
        self.pair_tuples = pair_tuples
        return None
    
    def __getitem__(self, index:int):
        sample      = self.pair_tuples[index]
        query_emb   = self.feat[sample[0][0]:sample[0][0]+1]
        gallary_emb = self.feat[sample[0][1]:sample[0][1]+1]
        features    = np.expand_dims(np.array(sample[1]), 0)
        label       = np.expand_dims(np.array(sample[2]), 0)
        del sample
        return query_emb, gallary_emb, features, label
    
    def __len__(self,):
        return len(self.pair_tuples)
    
class GraphDataset(Dataset):

    def __init__(self, feats=None, labels=None, weights=None, pair_tuples=None, k=50, top_neighbors=None):
        self.feats = feats
        self.labels = labels
        self.weights = weights
        self.pair_tuples = pair_tuples
        self.k = k
        self.top_neighbors = top_neighbors

    def __getitem__(self, index):
        i, j = self.pair_tuples[index]
        feat = torch.FloatTensor(self.feats[i][j])

        padding_i = [[0] * feat.shape[0]] * (self.k - len(self.top_neighbors[i]))
        neighbor_feats_i = torch.FloatTensor([
            self.feats[i][neighbor]
            for neighbor in self.top_neighbors[i]
        ] + padding_i)
        padding_j = [[0] * feat.shape[0]] * (self.k - len(self.top_neighbors[j]))
        neighbor_feats_j = torch.FloatTensor([
            self.feats[j][neighbor]
            for neighbor in self.top_neighbors[j]
        ] + padding_j)
        neighbor_feats = torch.cat([feat.unsqueeze(0), neighbor_feats_i, neighbor_feats_j], dim=0)

        outputs = (feat, neighbor_feats)
        if self.labels is not None:
            outputs += (self.labels[i] == self.labels[j],)
        if self.weights is not None:
            outputs += (self.weights[i],)

        return outputs

    def __len__(self):
        return len(self.pair_tuples)

# CNN Model

## Metric Learning Layers

In [5]:
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)

    def forward(self, features):
        cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        return cosine
    
def l2_norm(input, axis = 1):
    norm = torch.norm(input, 2, axis, True)
    output = torch.div(input, norm)

    return output
    
class CurricularFace(nn.Module):
    r"""Implement of CurricularFace (https://arxiv.org/pdf/2004.00288.pdf):
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        device_id: the ID of GPU where the model will be trained by model parallel. 
                       if device_id=None, it will be trained on CPU without model parallel.
        m: margin
        s: scale of outputs
    """

    def __init__(self, in_features, out_features, m=0.4, s=45.0):
        super(CurricularFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.m = m
        self.s = s
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.threshold = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
        self.kernel = Parameter(torch.Tensor(in_features, out_features))
        self.register_buffer("t", torch.zeros(1))
        nn.init.normal_(self.kernel, std=0.01)

    def forward(self, embbedings, label = None):
        embbedings = l2_norm(embbedings, axis=1)
        kernel_norm = l2_norm(self.kernel, axis=0)
        cos_theta = torch.mm(embbedings, kernel_norm)
#         cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
        
#         with torch.no_grad():
#             origin_cos = cos_theta.clone()
#         target_logit = cos_theta[torch.arange(0, embbedings.size(0)), label].view(-1, 1)

#         sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
#         cos_theta_m = (
#             target_logit * self.cos_m - sin_theta * self.sin_m
#         )  # cos(target+margin)
#         mask = cos_theta > cos_theta_m
#         final_target_logit = torch.where(
#             target_logit > self.threshold, cos_theta_m, target_logit - self.mm
#         )
#         hard_example = cos_theta[mask]
#         with torch.no_grad():
#             self.t = target_logit.mean() * 0.01 + (1 - 0.01) * self.t
#         cos_theta[mask] = hard_example * (self.t + hard_example)
#         cos_theta.scatter_(1, label.view(-1, 1).long(), final_target_logit)
#         output = cos_theta * self.s
        return cos_theta

    
class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features*k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine

## Pooling Layers

In [6]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p)[:,:,0,0]

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)
    def __repr__(self):
        return (
            self.__class__.__name__
            + f"(p={self.p.data.tolist()[0]:.4f}, eps={str(self.eps)})"
        )

class GridGeM(nn.Module):
    def __init__(self, p = 3, eps = 1e-6):
        super(GridGeM, self).__init__()
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps
    
    def forward(self, x):
        B, C, H, W = x.shape
        
        f_a, f_b = torch.split(x,[W//2, W//2],dim=-1)
        f_a, f_c = torch.split(f_a,[H//2, H//2],dim=-2)
        f_b, f_d = torch.split(f_b,[H//2, H//2],dim=-2)

        feats = torch.cat([gem(f_a, p=self.p, eps=self.eps),gem(f_b, p=self.p, eps=self.eps),gem(f_c, p=self.p, eps=self.eps),gem(f_d, p=self.p, eps=self.eps),], axis=1)
        
        return feats
        
    def __repr__(self):
        return (
            self.__class__.__name__
            + f"(p={self.p.data.tolist()[0]:.4f}, eps={str(self.eps)})"
        )
    
class GridGeMCosdist(nn.Module):
    def __init__(self, p = 3, eps = 1e-6):
        super(GridGeMCosdist, self).__init__()
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps
        self.neck = nn.AdaptiveAvgPool2d( (1,1) )
    
    def forward(self, x):
        B, C, H, W = x.shape
        
        f_a, f_b = torch.split(x,[W//2, W//2],dim=-1)
        f_a, f_c = torch.split(f_a,[H//2, H//2],dim=-2)
        f_b, f_d = torch.split(f_b,[H//2, H//2],dim=-2)
        f_a = f_a.mean(dim=-1).softmax(dim=-1)
        f_b = f_b.mean(dim=-1).softmax(dim=-1)
        f_c = f_c.mean(dim=-1).softmax(dim=-1)
        f_d = f_d.mean(dim=-1).softmax(dim=-1)

        feats = torch.stack([(1.0 - F.cosine_similarity(f_a, f_b, dim=-1)), (1.0 - F.cosine_similarity(f_a, f_c, dim=-1)),
                           (1.0 - F.cosine_similarity(f_a, f_d, dim=-1)), (1.0 - F.cosine_similarity(f_b, f_c, dim=-1)),
                          (1.0 - F.cosine_similarity(f_b, f_d, dim=-1)), (1.0 - F.cosine_similarity(f_c, f_d, dim=-1))]).view(-1,1)
        
        return feats
        
    def __repr__(self):
        return (
            self.__class__.__name__
            + f"(p={self.p.data.tolist()[0]:.4f}, eps={str(self.eps)})"
        )

## Mix it

In [7]:
class LandmarkNet(nn.Module):
    def __init__(self, out_feature, backbone='tf_efficientnet_b6_ns',  pretrained=True, pool_type = 'gem',metric ='arcface', sub_center = False):
        super(LandmarkNet, self).__init__()
        self.backbne_name = backbone
        self.backbone = timm.create_model(backbone, pretrained=pretrained)
        self.out_feature = out_feature

        if "efficientnet" in backbone:
            self.in_features = self.backbone.classifier.in_features
            self.backbone.global_pool = nn.Identity()
            self.backbone.classifier = nn.Identity()
        elif "nfnet" in backbone:
            self.in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            self.backbone.head.global_pool = nn.Identity()
        elif 'resnet' in backbone:
            self.in_features = self.backbone.fc.in_features
            self.backbone.global_pool = nn.Identity()
            self.backbone.fc = nn.Identity() 
            
        self.out_feature = out_feature
        
        if pool_type == 'gem':
            self.pooling =  GeM()
        elif pool_type == 'gridgem':
            self.pooling =  GridGeM()
            self.in_features *= 4
        elif pool_type == 'gridgemcosdist':
            self.pooling =  GridGeMCosdist()
            self.in_features *= 6
        else:
            self.pooling = nn.AdaptiveAvgPool2d(1)
            
        self.neck = nn.Sequential(
                nn.Linear(self.in_features, 512, bias=True),
                nn.BatchNorm1d(512),
                torch.nn.PReLU()
            )
        if metric == 'arcface':
            if sub_center:
                self.final = ArcMarginProduct_subcenter(512, self.out_feature)
            else:
                self.final = ArcMarginProduct(512, self.out_feature)
        elif metric == 'curricular':
            self.final = CurricularFace(512, self.out_feature)

    def forward(self, x, get_embeddings = True):
        if "efficientnet" in self.backbne_name or "nfnet" in self.backbne_name:
            batch_size = x.shape[0]
            features = self.backbone(x)
            features = self.pooling(features).view(batch_size, -1)
            features = self.neck(features)
            features = F.normalize(features)

        if not get_embeddings:
            return self.final(features)
        return features, self.final(features)

# Transformer Model

In [8]:
class FFN(nn.Module):
    def __init__(self, state_size=200):
        super(FFN, self).__init__()
        self.state_size = state_size

        self.lr1 = nn.Linear(state_size, state_size)
        self.relu = nn.ReLU()
        self.lr2 = nn.Linear(state_size, state_size)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = self.lr1(x)
        x = self.relu(x)
        x = self.lr2(x)
        return self.dropout(x)
    
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.scale = nn.Parameter(torch.ones(1))

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(
            0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.scale * self.pe[:x.size(0), :]
        return self.dropout(x)

def future_mask(seq_length):
    future_mask = np.triu(np.ones((seq_length, seq_length)), k=1).astype('bool')
    return torch.from_numpy(future_mask)

class GLRTRFModel(nn.Module):
    def __init__(self, embed_dim=128, dropout_rate=0.2):
        super(GLRTRFModel, self).__init__()
        self.embed_dim = embed_dim
        self.cat = nn.Sequential(
            nn.Linear(embed_dim*2, embed_dim),
            nn.LayerNorm(embed_dim),
        ) 
        self.cat1 = nn.Sequential(
            nn.Linear(embed_dim*2, embed_dim),
            nn.LayerNorm(embed_dim),
        ) 
        
        self.transformer = nn.Transformer(nhead=8, d_model = embed_dim, num_encoder_layers= N_LAYER, num_decoder_layers= N_LAYER, dropout = dropout_rate)

        self.dropout = nn.Dropout(dropout_rate)
        self.layer_normal = nn.LayerNorm(embed_dim) 

        self.ffn = FFN(embed_dim)
        
        self.mlp1 = nn.Linear(N_FEATURE, 1024)
        self.mlp2 = nn.ReLU()
        self.mlp3 = nn.Linear(1024, 512)
        self.mlp4 = nn.ReLU()
        self.mlp5 = nn.Linear(512, embed_dim)
        self.pred = nn.Linear(embed_dim*2, 1)
    
    def forward(self, query_embedding, gallary_embedding, features):
        device = query_embedding.device 
        
        x1 = self.mlp1(features)
        x1 = self.mlp2(x1)
        x1 = self.mlp3(x1)
        x1 = self.mlp4(x1)
        x1 = self.mlp5(x1)

        x = torch.cat([query_embedding,x1], axis=-1)
        x = self.cat(x)
        e = torch.cat([gallary_embedding, x1], axis=-1)
        e = self.cat1(e)
        
        x = x.permute(1, 0, 2)
        e = e.permute(1, 0, 2)
        
        att_mask = future_mask(x.size(0)).to(device)
        att_output = self.transformer( x,e, src_mask=att_mask, tgt_mask=att_mask, memory_mask = att_mask)
        att_output = self.layer_normal(att_output+e)
        att_output = att_output.permute(1, 0, 2)
        x = self.ffn(att_output)
        x = self.layer_normal(x + att_output)
        x = torch.cat([x,x1], axis = -1)
        x = self.pred(x)
        return x.squeeze(-1)

# GCN Model

In [9]:
class GraphAttentionLayer(nn.Module):

    def __init__(self, in_features, out_features, dropout=0.6, alpha=0.2, concat=True):
        super().__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.empty(size=(2 * out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, h):
        Wh = h @ self.W  # h.shape: (B, N, in_features), Wh.shape: (B, N, out_features)
        a_input = self._prepare_attentional_mechanism_input(Wh)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(3))

        attention = F.softmax(e, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.bmm(attention, Wh)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def _prepare_attentional_mechanism_input(self, Wh):
        B, N, D = Wh.shape

        Wh_repeated_in_chunks = Wh.repeat_interleave(N, dim=1)
        Wh_repeated_alternating = Wh.repeat(1, N, 1)

        all_combinations_matrix = torch.cat([Wh_repeated_in_chunks, Wh_repeated_alternating], dim=2)
        return all_combinations_matrix.view(-1, N, N, 2 * D)

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'


class GATPairClassifier(nn.Module):
    def __init__(self, nfeat, nhid=8, nclass=1, dropout=0.6, alpha=0.2, nheads=8, pooling='first'):
        super().__init__()
        self.dropout = dropout
        self.pooling = pooling

        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        self.out_att = GraphAttentionLayer(nhid * nheads, nhid, dropout=dropout, alpha=alpha, concat=False)

        self.classifier = nn.Sequential(
            nn.Linear(nfeat + nhid, nhid),
            nn.PReLU(),
            nn.BatchNorm1d(nhid),
            nn.Linear(nhid, nclass),
        )

    def forward_gat(self, x):
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x) for att in self.attentions], dim=2)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.elu(self.out_att(x))
        if self.pooling == 'first':
            return x[:, 0]
        elif self.pooling == 'mean':
            return x.mean(dim=1)

    def forward(self, feats, neighbor_feats):
        gat_feats = self.forward_gat(neighbor_feats)
        cat_feats = torch.cat([feats, gat_feats], dim=1)
        return self.classifier(cat_feats).squeeze(1)

# Utils

In [10]:
def get_total_score(global_score, local_score, transformer_score, gcn_score, lgb_score, weights = [1]*5):
    classifier_score = (transformers_score * weights[2] + gcn_score * weights[3] + lgb_score * weights[4])
    score = (global_score * classifier_score * weights[0]) + ((local_score/MAX_INLIER_SCORE) * classifier_score * weights[0])
    return score

## Data Func

In [11]:
def load_labelmap(TRAIN_LABELMAP_PATH):
    with open(TRAIN_LABELMAP_PATH, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        labelmap = {row['id']: row['landmark_id'] for row in csv_reader}
    gc.collect()
    return labelmap

def trf_data():
    pass

def gcn_data():
    pass

def lgb_data():
    pass

## Submission Util

In [12]:
def save_submission_csv(DATASET_DIR, test_ids = None, predictions=None, mode = 'recognition', submit_fname = 'submission.csv'):
    """Saves optional `predictions` as submission.csv.

      The csv has columns {id, landmarks}. The landmarks column is a string
      containing the label and score for the id, separated by a ws delimeter.

      If `predictions` is `None` (default), submission.csv is copied from
      sample_submission.csv in `IMAGE_DIR`.

      Args:
        predictions: Optional dict of image ids to dicts with keys {class, score}.
      """
    print('Submitting ..... ')
    if mode == 'retrieval':
        predicitions = defaultdict(list)
        for ix, i in enumerate(predictions):
            tmp = []
            for j in range(len(i)):
                tmp.append(i[j][0])
            predicitions[test_ids[ix]] = tmp
    elif mode == 'recognition':
        predicitions = defaultdict(lambda: defaultdict())
        for ix, i in enumerate(predictions):
            tmp = {}
            for j in range(len(i)):
                tmp['class'] = i[j][1]
                tmp['score'] = i[j][2]
            predicitions[test_ids[ix]] = tmp
            
    gc.collect()
    if predictions is None:
    # Dummy submission!
        shutil.copyfile(
            os.path.join(DATASET_DIR, 'sample_submission.csv'), 'submission.csv')
        return
    
    if mode == 'recognition':
        with open(submit_fname, 'w') as submission_csv:
            csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'landmarks'])
            csv_writer.writeheader()
            for ix,(image_id, prediction) in enumerate(predicitions.items()):
                label = prediction['class']
                score = prediction['score']
                csv_writer.writerow({'id': image_id, 'landmarks': f'{label} {score}'})
        gc.collect()
        return submit_fname
    else:
        with open(submit_fname, 'w') as submission_csv:
            csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'images'])
            csv_writer.writeheader()
            for image_id, prediction in predicitions.items():
                image_ids = prediction
                csv_writer.writerow({'id': image_id, 'images': ' '.join(image_ids)})
        gc.collect()
        return submit_fname

## Global Feature Extraction

In [13]:
def extract_global_features(df, mode, prob_type, return_probs = False):
    dataset     = LandmarkDataset(df['id'].values, mode = mode, prob_type = prob_type)
    dataloader  = DataLoader(dataset, batch_size = hyperparameters.batch_size, shuffle=False, num_workers=hyperparameters.num_workers)
    feats = []
    with torch.no_grad():
        for img in tqdm(dataloader):
            img  = img.cuda()
            feat = []
            for model in models:
                feat_m,_  = model(img)
                feat.append(feat_m)
            feat = torch.cat(feat,dim=1) 
            
#             if return_probs:
#                 probs_m = (lo_1 + lo_2)/2
#                 save_pickle(probs_m,f'{mode}_probs.pkl')
                
            feats.append(feat.detach().cpu())
        feats = torch.cat(feats)
        feats = feats.cuda()
        feat  = F.normalize(feat) 
        save_pickle(feats,f'{mode}_embedding.pkl')
    del feat, img, dataset, dataloader
    gc.collect()
    if return_probs:
        return df['id'].values, feats, prob_m
    return df['id'].values,feats

## LoFTR Model

In [14]:
def generate_loftr(matcher, image_0, train_images):
    img0 = []
    img1 = []
    img0_scales = []
    img1_scales = []
    for image_1 in train_images:
        img0_raw = cv2.imread(image_0, cv2.IMREAD_GRAYSCALE)
        img1_raw = cv2.imread(image_1, cv2.IMREAD_GRAYSCALE)
        img0_scales.append((img0_raw.shape[1]/640, img0_raw.shape[0]/480))
        img1_scales.append((img1_raw.shape[1]/640, img1_raw.shape[0]/480))
        img0.append(cv2.resize(img0_raw, (640, 480)))
        img1.append(cv2.resize(img1_raw, (640, 480)))
        
    del img0_raw, img1_raw
        
    img0 = torch.from_numpy(np.array(img0)[:,None,...]).cuda() / 255.
    img1 = torch.from_numpy(np.array(img1)[:,None,...]).cuda() / 255.
    batch = {'image0': img0, 'image1': img1}
    del img0 , img1
    with torch.no_grad():
        matcher(batch)
#         mkpts0 = batch['mkpts0_f'].cpu().numpy()
#         mkpts1 = batch['mkpts1_f'].cpu().numpy()
#         mconf = batch['mconf'].cpu().numpy()
    del batch['image0'], batch['image1']
    batch['img0_scales'] = img0_scales
    batch['img1_scales'] = img1_scales
    gc.collect()
    del matcher
    return batch
    
def rescore_and_rerank(test_image_dir, train_image_dir,
                                      test_image_id, train_ids_labels_and_scores, batch_size = 16, ignore_global_score=False, do_sort=True,
                                      loftr_model=None, return_num_inliers=False):
    
    test_image_path = f'{test_image_dir}/{test_image_id[0]}/{test_image_id[1]}/{test_image_id[2]}/{test_image_id}.jpg'
    test_image_dict = {}

    ransac_inliers = []
    
    train_images_path = []
    
    for i in range(len(train_ids_labels_and_scores)):
        train_image_id, label, global_score = train_ids_labels_and_scores[i]
        train_images_path.append(f'{train_image_dir}/{train_image_id[0]}/{train_image_id[1]}/{train_image_id[2]}/{train_image_id}.jpg')
        
    for batch_i in range(0,len(train_images_path), batch_size):
        pred = generate_loftr(loftr_model,test_image_path,train_images_path[batch_i:batch_i+batch_size] )
        test_scales = pred['img0_scales'][0]
        test_keypoints = copy.deepcopy(pred['mkpts0_f']).cpu().numpy()*test_scales[0]
        m_bids = pred['m_bids'].cpu().numpy()
        del test_scales
        up = batch_size if batch_i + batch_size < len(train_images_path) else len(train_images_path) - batch_i
        for i in range(up):
            train_image_id, label, global_score = train_ids_labels_and_scores[batch_i + i]
            train_scales = pred['img1_scales'][i]
            train_keypoints = copy.deepcopy(pred['mkpts1_f'][m_bids == i]).cpu().numpy()*train_scales
            num_inliers = compute_num_inliers(test_keypoints[m_bids == i], None, train_keypoints, None, do_kdtree=False)

            total_score = global_score + num_inliers/MAX_INLIER_SCORE
            train_ids_labels_and_scores[batch_i + i] = (train_image_id, label, total_score)
            ransac_inliers.append((train_image_id, num_inliers))
        del pred, test_keypoints, train_keypoints, train_scales
        
    if do_sort:
        train_ids_labels_and_scores.sort(key=lambda x: x[2], reverse=True)
    gc.collect()   
    if return_num_inliers:
        return ransac_inliers
    else:
        return train_ids_labels_and_scores

## Others

In [15]:
def do_retrieval(labelmap, train_ids, train_embeddings,
                 test_embeddings, num_to_rerank, val_x = None, val_y = None, do_dba=False,
                 gallery_set='index', method = 'cossim',return_vals = False, name ='simple'):
    train_ids_labels_and_scores = [None] * test_embeddings.shape[0]
    
    if method == 'faiss':
        if do_dba:
            faiss_index = faiss.IndexFlatIP(train_embeddings.shape[1])
            faiss_index.add(train_embeddings)
            dba_lens = 10
            weights = np.logspace(0, -1.5, dba_lens)
            weights /= np.sum(weights)
            D, I = faiss_index.search(train_embeddings, dba_lens)
            new_xb = 0
            for i, weight in enumerate(weights):
                new_xb = new_xb + train_embeddings[I[:, i]] * weight
            train_embeddings = new_xb

        faiss_index = faiss.IndexFlatIP(train_embeddings.shape[1])
        faiss_index.add(train_embeddings)
        D, I = faiss_index.search(test_embeddings, num_to_rerank)  # actual search
    elif method == 'cossim':
        if val_x is None:
            D, I = get_topk_cossim(test_embeddings, train_embeddings, batchsize = 64, k=num_to_rerank, device='cuda:0',verbose=True, name = name)
        else:
            D, I = get_topk_cossim_sub(test_embeddings, train_embeddings,val_x, batchsize = 64, k=num_to_rerank, device='cuda:0',verbose=True, name = name)
        
        if val_y is not None:
#             D -= 1* val_y
            pass
    
    for test_index in range(test_embeddings.shape[0]):
        train_ids_labels_and_scores[test_index] = [
          (train_ids[train_index], labelmap[train_ids[train_index]], distance)
          for train_index, distance in zip(I[test_index], D[test_index])
        ]
    del I
    gc.collect()
    if return_vals:
        return train_ids_labels_and_scores, D
    del D
    return train_ids_labels_and_scores

def get_nolandmark_by_dbscan(test_ids, test_embeddings, nolandmark_ids, nolandmark_embeddings):
  # dbscan
    features = np.vstack([test_embeddings, nolandmark_embeddings])
    clusters = dbscan(eps=0.85, n_jobs=-1, min_samples=1).fit_predict(features)
    clusters_np = np.c_[np.r_[test_ids, nolandmark_ids], clusters]
    clusters_df = pd.DataFrame(data=clusters_np, columns=[ID, 'clusters'])
    clusters_df['is_nolandmark'] = [0]*len(test_ids) + [1]*len(nolandmark_ids)
    clusters_gb = clusters_df.groupby('clusters')['is_nolandmark'].agg(['count', 'sum']).reset_index()
    clusters_gb.columns = ['clusters', 'clusters_num', 'nolandmark_num']
    clusters_gb['nolandmark_rate'] = clusters_gb['nolandmark_num'] / clusters_gb['clusters_num']

    test_clusters = clusters_df[0: len(test_ids)]
    test_clusters = test_clusters.merge(clusters_gb, on='clusters', how='left')
    gc.collect()
    return test_clusters

## CosSim

In [16]:
def cos_similarity_matrix(a, b):
    sim_mt = torch.mm(a, b.transpose(0, 1))
    return sim_mt

def get_topk_cossim(test_emb, tr_emb, batchsize = 64, k=10, device='cuda:0',verbose=True, name = 'simple'):
    tr_emb = torch.tensor(tr_emb, dtype = torch.float32, device=torch.device(device))
    test_emb = torch.tensor(test_emb, dtype = torch.float32, device=torch.device(device))
    vals = []
    inds = []
    for test_batch in tqdm(test_emb.split(batchsize),disable=1-verbose):
        sim_mat = cos_similarity_matrix(test_batch, tr_emb)
        vals_batch, inds_batch = torch.topk(sim_mat, k=k, dim=1)
        vals += [vals_batch.detach().cpu()]
        inds += [inds_batch.detach().cpu()]
    vals = torch.cat(vals)
    inds = torch.cat(inds)
    save_pickle([vals, inds],name)
    gc.collect()
    return vals, inds

def get_topk_cossim_sub(test_emb, tr_emb, vals_x, batchsize = 64, k=10, device='cuda:0',verbose=True, name = 'simple'):
    tr_emb = torch.tensor(tr_emb, dtype = torch.float32, device=torch.device(device))
    test_emb = torch.tensor(test_emb, dtype = torch.float32, device=torch.device(device))
    vals_x = torch.tensor(vals_x, dtype = torch.float32, device=torch.device(device))
    vals = []
    inds = []
    for test_batch in tqdm(test_emb.split(batchsize),disable=1-verbose):
        sim_mat = cos_similarity_matrix(test_batch, tr_emb)
        sim_mat = torch.clamp(sim_mat,0,1) - vals_x.repeat(sim_mat.shape[0], 1)
        
        vals_batch, inds_batch = torch.topk(sim_mat, k=k, dim=1)
        vals += [vals_batch.detach().cpu()]
        inds += [inds_batch.detach().cpu()]
    vals = torch.cat(vals)
    inds = torch.cat(inds)
    save_pickle([vals, inds],name)
    gc.collect()
    return vals, inds

## Util

In [17]:
def save_pickle(data, filename = 'tmp.pkl'):
    print('\tSaving {} to {}'.format(filename.split('.')[0], filename))
    pickle.dump(data, open(os.path.join(hyperparameters.CACHE_DIR,filename), 'wb'))
    del data
    
def load_pickle(filename):
    print('\tLoading {} from {}'.format(filename.split('.')[0], filename))
    return pickle.load(open(os.path.join(hyperparameters.CACHE_DIR,filename),'rb'))

def index_df():
    image_ids = []
    for i in glob.glob('../input/landmark-retrieval-2021/index/*/*/*/*.jpg'):
        image_ids.append(i.split('/')[-1].split('.')[0])
    df = pd.DataFrame({'id':image_ids, 'landmark_id': [-2]*len(image_ids)})
    del image_ids
    return df

def targets_prob():
    p = np.load('../input/glr-files/classes.npy')
    idx2landmark_id = {ix:x for ix, x in enumerate(p)}
    landmark_id2idx = {x:ix for ix, x in enumerate(p)}
    del p

    pred_mask = pd.Series(df_train.landmark_id.unique()).map(landmark_id2idx).values
    gc.collect()
    return pred_mask, idx2landmark_id, landmark_id2idx 

## Compute inlier

In [18]:
MAX_INLIER_SCORE = 70
MAX_REPROJECTION_ERROR = 4.0
MAX_RANSAC_ITERATIONS = 1000
HOMOGRAPHY_CONFIDENCE = 0.99

def compute_putative_matching_keypoints(test_keypoints,
                                        test_descriptors,
                                        train_keypoints,
                                        train_descriptors,
                                        max_distance=0.9):
    """Finds matches from `test_descriptors` to KD-tree of `train_descriptors`."""

    train_descriptor_tree = spatial.cKDTree(train_descriptors)
    _, matches = train_descriptor_tree.query(
      test_descriptors, distance_upper_bound=max_distance)

    test_kp_count = test_keypoints.shape[0]
    train_kp_count = train_keypoints.shape[0]

    test_matching_keypoints = np.array([
      test_keypoints[i,]
      for i in range(test_kp_count)
      if matches[i] != train_kp_count
    ])
    train_matching_keypoints = np.array([
      train_keypoints[matches[i],]
      for i in range(test_kp_count)
      if matches[i] != train_kp_count
    ])
    gc.collect()
    return test_matching_keypoints, train_matching_keypoints

def compute_num_inliers(test_keypoints, test_descriptors, train_keypoints,
                        train_descriptors, do_kdtree=True):
    """Returns the number of RANSAC inliers."""

    if do_kdtree:
        test_match_kp, train_match_kp = compute_putative_matching_keypoints(
            test_keypoints, test_descriptors, train_keypoints, train_descriptors)
    else:
        test_match_kp, train_match_kp = test_keypoints, train_keypoints
    if test_match_kp.shape[0] <= 4:  # Min keypoints supported by `pydegensac.findHomography()`
        return 0

    try:
        _, mask = pydegensac.findHomography(test_match_kp, train_match_kp,
                                        MAX_REPROJECTION_ERROR,
                                        HOMOGRAPHY_CONFIDENCE,
                                        MAX_RANSAC_ITERATIONS)
    except np.linalg.LinAlgError:  # When det(H)=0, can't invert matrix.
        return 0
    gc.collect()
    return int(copy.deepcopy(mask).astype(np.float32).sum())

# Load_model

In [19]:
def load_model(model, model_file):
    state_dict = torch.load(model_file)
    if "model_state_dict" in state_dict.keys():
        state_dict = state_dict["model_state_dict"]
    state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
#     del state_dict['final.weight']
    model.load_state_dict(state_dict, strict=True)
    print(f"loaded {model_file}")
    del state_dict
    model.eval()  
    gc.collect()
    return model

model1 = LandmarkNet(81313, backbone = 'tf_efficientnet_b3_ns', pool_type = 'gridgem', metric = 'curricular', pretrained = False)
model1 = load_model(model1,'../input/google-mark-no1/epoch_2_score_0.0016468191840280442.pth').cuda()
# model2 = LandmarkNet(81313, backbone = 'tf_efficientnet_b6_ns', pool_type = 'gridgem', metric = 'arcface')
# model2 = load_model(model,'')

# model1 = LandmarkNet(81313, backbone = 'tf_efficientnet_b3', pool_type = 'gem',sub_center = True, pretrained = False)
# model1 = load_model(model1,'../input/morizin-exp1/epoch_10_score_0.001845540972934233.pth').cuda()
# model2 = LandmarkNet(81313, backbone = 'tf_efficientnet_b3', pool_type = 'gem',sub_center = True, pretrained = False)
# model2 = load_model(model2,'../input/morizin-exp1/epoch_9_score_0.0015537621993018456.pth').cuda()
models = [model1]
del model1

loaded ../input/google-mark-no1/epoch_2_score_0.0016468191840280442.pth


# Inference

In [20]:
def get_prediction(mode = 'retrieval'):
    if mode == 'retrieval':
        train_image_dir = f'../input/landmark-retrieval-2021/index'
        test_image_dir = f'../input/landmark-retrieval-2021/test'
    elif mode == 'recognition':
        train_image_dir = f'../input/landmark-recognition-2021/index'
        test_image_dir = f'../input/landmark-recognition-2021/test'
    
    df_train = pd.read_csv(f'../input/landmark-{mode}-2021/train.csv')
    df_test  = pd.read_csv(f'../input/landmark-{mode}-2021/sample_submission.csv')
    df_nl    = pd.read_csv('../input/google-landmark-2021-validation/valid.csv')
    
    gc.collect()
    
    if mode == 'retrieval':
        if len(df_test) == 1129:
            shutil.copy('../input/landmark-retrieval-2021/sample_submission.csv','./submission.csv')
            return './submission.csv'
        
        df_index = index_df()
        train_ids, train_embeddings = extract_global_features(df_index, mode = 'index', prob_type = mode)
        del df_index
    else:
        if len(df_test) == 10345:
            shutil.copy('../input/landmark-recognition-2021/sample_submission.csv','./submission.csv')
            return './submission.csv'
        train_ids, train_embeddings = extract_global_features(df_train, mode = 'train', prob_type = mode)
    
    nolandmark_ids, nolandmark_embeddings = extract_global_features(df_nl, mode = 'nolandmark', prob_type = mode)
    test_ids, test_embeddings = extract_global_features(df_test, mode = 'test', prob_type = mode)
    
    del df_train, df_test, df_nl
    torch.cuda.empty_cache()
    del globals()['models']
    
    if mode == 'retrieval':
        labelmap = dict([(i, -2) for i in train_ids])
    else:
        labelmap = load_labelmap('../input/landmark-recognition-2021/train.csv')
    nolandmark_labelmap = dict([(i, -1) for i in nolandmark_ids])
    
    gc.collect()
    
    _, val_x = do_retrieval(nolandmark_labelmap, nolandmark_ids,
                                                    nolandmark_embeddings, train_embeddings,
                                                    hyperparameters.num_to_rerank, gallery_set='nolandmark',return_vals = True, name = 'train_nl_sim.pkl')
    
    _, val_y = do_retrieval(nolandmark_labelmap, nolandmark_ids,
                                                    nolandmark_embeddings, test_embeddings,
                                                    hyperparameters.num_to_rerank, gallery_set='nolandmark',return_vals = True, name = 'test_nl_sim.pkl')
    del nolandmark_embeddings, nolandmark_ids, nolandmark_labelmap
    
    train_ids_labels_and_scores = do_retrieval(labelmap, train_ids,
                                               train_embeddings, test_embeddings,
                                               hyperparameters.num_to_rerank,val_x = val_x[:,:].mean(axis=1).detach().cpu().numpy(),val_y = val_y, gallery_set='train', name= 'train_test_sim.pkl')
    torch.cuda.empty_cache()
    del train_embeddings, test_embeddings, val_y, val_x
    gc.collect()
    
#     matcher = LoFTR(config=default_cfg)
#     matcher.load_state_dict(torch.load("/kaggle/temp/loftr-repo/weights/outdoor_ds.ckpt")['state_dict'])
#     matcher = matcher.eval().cuda()
    
#     for test_index, test_id in tqdm(enumerate(test_ids), total=len(test_ids), desc='do LoFTR'):
#         train_ids_labels_and_scores[test_index] = rescore_and_rerank(test_image_dir, train_image_dir, test_id,
#                                                 train_ids_labels_and_scores[test_index], loftr_model=matcher)
    torch.cuda.empty_cache()
    submit_fname = save_submission_csv(f'/kaggle/input/landmark-{mode}-2021', test_ids = test_ids, predictions=train_ids_labels_and_scores, mode = mode)
#     save_pickle(train_ids_labels_and_scores, 'predicition_v1.pkl')
    del train_ids_labels_and_scores
    gc.collect()
    return submit_fname

In [21]:
sub_v1    = pd.read_csv(get_prediction(mode = 'retrieval'))
# trf_model = GLRTRFModel(hyperparameters.embed_dim).eval().cuda()
# gcn_model = GATPairClassifier(nfeat=N_FEATURE, nhid=16, dropout=0.0, nheads=16, pooling='first').eval().cuda()
# lgb_model = load_pickle('')

In [22]:
sub_v1

Unnamed: 0,id,images
0,00084cdf8f600d00,39ff080e3b9e37d9
1,00141b8a5a729084,d75e248790c371d4 a0a13eb5924b395c 49dac2cf6777...
2,0044d82ea7654ece,80f1aba556c3de4e
3,00d5b448fa93e1b8,2c6f6cbaa3f586c6
4,012436be7f659057,
...,...,...
1124,ff06f084134f4df6,acd395de725c6ffa 0adc1ff5ed5df4b5 4b57b48ac2b7...
1125,ff4135c3071f7b36,216d1bea7a259232 067a42c02294ce2c
1126,ff8b519e7dfc5506,59ca927b6e0c8a7a 452a2125ea39a713
1127,ffb08958f4e67f61,b6b5c29be4dd342d
