In [113]:
#basics
import os
import numpy as np
import pandas as pd
from tqdm.auto import trange, tqdm
from glob import glob
from PIL import Image
import pickle
from sklearn.model_selection import train_test_split

#models
import timm
import torch
import torch.nn.functional as F
from oml.registry.models import get_extractor_by_cfg
from oml.registry.transforms import TRANSFORMS_REGISTRY, get_transforms_by_cfg
from albumentations.pytorch import ToTensorV2
from oml.const import MEAN, PAD_COLOR, STD, TNormParam
import albumentations as albu
import cv2

#preprocessing
import torchvision.transforms as T

#dataset
from wildlife_tools.data import FeatureDataset
from wildlife_datasets.datasets import AnimalCLEF2025

#features

from wildlife_tools.features import DeepFeatures
from wildlife_tools.features.local import AlikedExtractor, DiskExtractor, SuperPointExtractor
from wildlife_tools.similarity.wildfusion import SimilarityPipeline, WildFusion

from wildlife_tools.similarity.pairwise.collectors import CollectCounts
from wildlife_tools.similarity import CosineSimilarity

#for wildfusion
from wildlife_tools.similarity.wildfusion import SimilarityPipeline, WildFusion
from wildlife_tools.similarity.pairwise.lightglue import MatchLightGlue
#calibration
from wildlife_tools.similarity.calibration import IsotonicCalibration

In [116]:
root_path = '.'

train = pd.read_csv(f'{root_path}/train.csv')
train = train[['file_name', 'individual_name', 'sequence']]
train['image_id'] = train.index
train['identity'] = train['individual_name']
train['path'] = 'train/' + train['file_name']
train = train[['image_id', 'path', 'identity', 'sequence']]

In [140]:
!zip -r solution1.zip solution1

  adding: solution1/ (stored 0%)
  adding: solution1/ requirements.txt (deflated 52%)
  adding: solution1/README.md (stored 0%)
  adding: solution1/submission.csv (deflated 50%)
  adding: solution1/arkface_effnet_b7_sc089_768.ckpt?download=true (deflated 8%)
  adding: solution1/train_effnet.ipynb (deflated 73%)
  adding: solution1/.ipynb_checkpoints/ (stored 0%)
  adding: solution1/.ipynb_checkpoints/inference-checkpoint.ipynb (deflated 80%)
  adding: solution1/.ipynb_checkpoints/README-checkpoint.md (stored 0%)
  adding: solution1/.ipynb_checkpoints/train_effnet-checkpoint.ipynb (deflated 73%)
  adding: solution1/.ipynb_checkpoints/ requirements-checkpoint.txt (deflated 52%)
  adding: solution1/inference.ipynb (deflated 80%)


In [None]:
!zip -r solution2.zip solution2

  adding: solution2/ (stored 0%)
  adding: solution2/requirements.txt (deflated 52%)
  adding: solution2/README.md (stored 0%)
  adding: solution2/submission.csv (deflated 50%)
  adding: solution2/best-v33.ckpt?download=true

In [117]:
labels = train.identity.unique()
labels_train, labels_val = train_test_split(labels, test_size=0.33, random_state=42, shuffle=True)

train["split"] = np.where(train["identity"].isin(labels_train), "train", "validation")

In [118]:
test = pd.DataFrame()
test['path'] = glob(f'{root_path}/test/*')
test['image_id'] = test.index
test['identity'] = -1

In [119]:
train_dataset = AnimalCLEF2025(root=root_path, metadata=train, load_label=True)
test_dataset = AnimalCLEF2025(root=root_path, metadata=test, load_label=True)
dataset_calibration = AnimalCLEF2025(root=root_path, df=train_dataset.metadata.sample(200), load_label=True)

dataset_query = test_dataset

In [120]:
def make_sub(sims):
    import warnings
    warnings.filterwarnings('ignore')
    mapper_names = dict(zip(dataset_query.metadata.index, dataset_query.metadata.path))
    result_df = pd.DataFrame()
    preds = [np.argsort(sim)[::-1] for sim in sims]
    new_preds = []
    for p in preds:
        new_p = [mapper_names[p_i].split('/')[-1] for p_i in p]
        new_preds.append(new_p)
    result_df['image_name'] = [mapper_names[i].split('/')[-1] for i in range(len(preds))]
    result_df['recommendation'] = new_preds
    return result_df

In [121]:
def clean_by_sequence(dataset, sims, label_mapper, series_mapper):
    sims_sorted = [np.argsort(sim)[::-1] for sim in sims]
    label_array = np.array([label_mapper[i] for i in range(len(label_mapper))])
    series_array = np.array([series_mapper[i] for i in range(len(series_mapper))])
    preds_cleand = []
    for t_i, p in enumerate(sims_sorted):
        t_seq = series_array[t_i]
        sorted_indices = sims_sorted[t_i]
        mask = series_array[sorted_indices] != t_seq
        filtered_indices = sorted_indices[mask]
        preds = label_array[filtered_indices]
        preds_cleand.append(preds)
    return preds_cleand

def cmc(preds, true_labels):

    values = []
    for p, tr in zip(preds, true_labels):
        cur_value = 0.0
        was_classes = []
        for p_i in p:
            if p_i not in was_classes:
                was_classes.append(p_i)
            if len(was_classes) == 5:
                break
        if tr in was_classes:
            cur_value = [1,0.9,0.8,0.7,0.6][was_classes.index(tr)]
        values.append(cur_value)

    return np.mean(values)

def score(sims):
    label_maper = dict(zip(dataset_query.metadata.index,dataset_query.metadata['identity']))
    series_maper = dict(zip(dataset_query.metadata.index,dataset_query.metadata['sequence']))

    cleaned = clean_by_sequence(dataset_query, sims, label_maper, series_maper)
    scored = cmc(cleaned, dataset_query.metadata.identity)
    print('CMC@5', scored)
    return scored

In [125]:
def cosine_similarity(a, b):
    a, b = torch.tensor(a), torch.tensor(b)
    similarity = torch.matmul(F.normalize(a), F.normalize(b).T)
    return similarity.numpy()

class CosineSimilarity:

    def __call__(self, query: FeatureDataset, database: FeatureDataset, **kwargs) -> np.ndarray:
        return cosine_similarity(query.features, database.features)

def manhattan_distance(a, b):
    a = torch.tensor(a)
    b = torch.tensor(b)
    dists = torch.cdist(a, b, p=1)  # p=1 for Manhattan
    return dists.numpy()

class ManhattanDistance:
    def __call__(self, query: FeatureDataset, database: FeatureDataset, **kwargs) -> np.ndarray:
        return - manhattan_distance(query.features, database.features)

def euclidean_distance(a, b):
    a = torch.tensor(a)
    b = torch.tensor(b)
    dists = torch.cdist(a, b, p=2)  # p=2 for Euclidean
    return dists.numpy()

class EuclideanDistance:
    def __call__(self, query: FeatureDataset, database: FeatureDataset, **kwargs) -> np.ndarray:
        return - euclidean_distance(query.features, database.features)

## Get Weights for PreRanker

In [126]:
def get_hits(dataset0, dataset1):
    gt0 = dataset0.labels_string
    gt1 = dataset1.labels_string
    gt_grid0 = np.tile(gt0, (len(gt1), 1)).T
    gt_grid1 = np.tile(gt1, (len(gt0), 1))
    return gt_grid0 == gt_grid1

class MultiRankerPipeline:
    def __init__(self, matchers, extractor, calibration, transform):
        self.matchers = matchers
        self.calibration = calibration
        self.calibration_done = False
        self.extractor = extractor
        self.transform = transform

    def get_feature_dataset(self, dataset):

        if self.transform is not None:
            dataset.transform = self.transform
        if self.extractor is not None:
            return self.extractor(dataset)
        else:
            return dataset

    def fit_calibration(self, dataset0, dataset1):

        if self.calibration is None:
            raise ValueError("Calibration method is not assigned.")

        dataset0 = self.get_feature_dataset(dataset0)
        dataset1 = self.get_feature_dataset(dataset1)
        self.calibration_models = []
        for matcher in self.matchers:
            score = matcher(dataset0, dataset1)
            hits = get_hits(dataset0, dataset1)
            calibration = self.calibration()
            calibration.fit(score.flatten(), hits.flatten())
            self.calibration_models.append(calibration)
        self.calibration_done = True

    def __call__(self, dataset0, dataset1) -> np.ndarray:

        if not self.calibration_done:
            raise ValueError("Calibration is not fitted. Use fit_calibration method.")

        dataset0 = self.get_feature_dataset(dataset0)
        dataset1 = self.get_feature_dataset(dataset1)
        scores = []
        for matcher,calibration in zip(self.matchers, self.calibration_models):
            score = matcher(dataset0, dataset1)
            if calibration is not None:
                    score = calibration.predict(score.flatten()).reshape(score.shape)
            scores.append(score)
        return np.stack(scores)

In [127]:
def mega_pipe(transforms, model_name, device, dataset_query, dataset_database, use_custom=True ):
    
    if not use_custom:
        model_mega = timm.create_model(model_name, num_classes=0, pretrained=True)
    else:
        cfg = {
            "extractor":{
                "name": "vit",
                "args":{
                    "arch": "vitl14_reg",
                    "normalise_features": True,            
                    "weights": "best-v33.ckpt?download=true"
                },
            },
        }
        model = get_extractor_by_cfg(cfg['extractor'])
        model_mega = model
    
    matcher_mega = MultiRankerPipeline(
        matchers = [CosineSimilarity(), ManhattanDistance(), EuclideanDistance()],
        extractor = DeepFeatures(model=model_mega, device=device, batch_size=16),
        transform = transforms,
        calibration = IsotonicCalibration
    )
    
    matcher_mega.fit_calibration(dataset_calibration, dataset_calibration)
    sims = matcher_mega(
        dataset_query,
        dataset_database
    )

    return sims

In [128]:
def find_matched_sims(sims, k):
    sims_corrected = np.where((0.99 <= sims) & (sims <= 1.0), -np.inf, sims)
    best_matches = sims_corrected.argsort(-1)[:, ::-1][:, :k]
    sims = sims[best_matches].mean(axis=1)
    return sims

In [129]:
class ResizeAndPad:
    def __init__(self, size, pad_value=0):
        self.size = size
        self.pad_value = pad_value

    def __call__(self, img):
        w, h = img.size
        scale = self.size / max(w, h)
        new_w, new_h = int(w * scale), int(h * scale)
        img = img.resize((new_w, new_h), Image.BILINEAR)

        pad_w = self.size - new_w
        pad_h = self.size - new_h

        pad_left = pad_w // 2
        pad_right = pad_w - pad_left
        pad_top = pad_h // 2
        pad_bottom = pad_h - pad_top

        padding = (pad_left, pad_top, pad_right, pad_bottom)
        img = T.functional.pad(img, padding, fill=self.pad_value, padding_mode='constant')
        return img

transform_1 = T.Compose([
    ResizeAndPad(img_sz_mega, pad_value=PAD_COLOR),
    T.RandomHorizontalFlip(p=1.0),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD),
])

transform_2 = T.Compose([
    ResizeAndPad(img_sz_mega, pad_value=PAD_COLOR),
    T.RandomVerticalFlip(p=1.0),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD),
])

transform_3 = T.Compose([
    ResizeAndPad(img_sz_mega, pad_value=PAD_COLOR),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD),
])

In [130]:
img_sz_mega = 224
mega_transforms = [
    transform_3,
    transform_2,
    transform_1
]

In [56]:
results_mega = []
devices = ['cuda:0', 'cuda:0', 'cuda:0']
for device, tr in zip(devices, mega_transforms):
    print('run with transforms: ', tr)
    results_mega.append(
        mega_pipe(
            tr, 
            'hf-hub:BVRA/MegaDescriptor-L-384',
            device, 
            dataset_query=dataset_query,
            dataset_database=dataset_query,
            use_custom=True,
        )
    )

run with transforms:  Compose(
    <__main__.ResizeAndPad object at 0x7f1a67046010>
    ToTensor()
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
)
Prefix <model.model.> was removed from the state dict.


100%|███████████████████████████████████████████████████████████████| 13/13 [00:04<00:00,  2.63it/s]
100%|███████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.92it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:16<00:00,  3.88it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:18<00:00,  3.83it/s]


run with transforms:  Compose(
    <__main__.ResizeAndPad object at 0x7f1a670460d0>
    RandomVerticalFlip(p=1.0)
    ToTensor()
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
)
Prefix <model.model.> was removed from the state dict.


100%|███████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.70it/s]
100%|███████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.50it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:17<00:00,  3.85it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:21<00:00,  3.68it/s]


run with transforms:  Compose(
    <__main__.ResizeAndPad object at 0x7f1a671a88d0>
    RandomHorizontalFlip(p=1.0)
    ToTensor()
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
)
Prefix <model.model.> was removed from the state dict.


100%|███████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.80it/s]
100%|███████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.86it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:18<00:00,  3.79it/s]
100%|█████████████████████████████████████████████████████████████| 299/299 [01:19<00:00,  3.78it/s]


In [58]:
sims_mega_blended = np.array([i[0] + 1/5 * i[1] + 1/5 * i[2] for i in results_mega]).mean(axis=0)

In [59]:
sims_mega_blended_extra = find_matched_sims(sims_mega_blended, k=8)

In [60]:
sims_mega_blended = 2/3 * sims_mega_blended + 1/3 * sims_mega_blended_extra

In [61]:
def get_pairs(priority,B=100):
    _, idx1 = torch.topk(torch.tensor(priority), min(B, priority.shape[1]))
    idx0 = np.indices(idx1.numpy().shape)[0]
    grid_indices = np.stack([idx0.flatten(), idx1.flatten()]).T
    return grid_indices

pairs = get_pairs(sims_mega_blended)

In [62]:
class MultiTreshSimilarityPipeline:
    def __init__(self, matcher = None, extractor = None, calibration = None, transform = None):
        self.matcher = matcher
        self.calibration_type = calibration
        self.calibration_done = False
        self.extractor = extractor
        self.transform = transform

    def get_feature_dataset(self, dataset):
        if self.transform is not None:
            dataset.transform = self.transform
        if self.extractor is not None:
            return self.extractor(dataset)
        else:
            return dataset

    def fit_calibration(self, dataset0, dataset1):
        dataset0 = self.get_feature_dataset(dataset0)
        dataset1 = self.get_feature_dataset(dataset1)
        score = self.matcher(dataset0, dataset1)
        self.calibrators = {}
        hits = get_hits(dataset0, dataset1)
        
        for k in score:
            calibration = self.calibration_type()
            calibration.fit(score[k].flatten(), hits.flatten())
            self.calibrators[k] = calibration
            
        self.calibration_done = True

    def __call__(self, dataset0, dataset1, pairs = None):
        if not self.calibration_done:
            raise ValueError("Calibration is not fitted. Use fit_calibration method.")

        dataset0 = self.get_feature_dataset(dataset0)
        dataset1 = self.get_feature_dataset(dataset1)
        score = self.matcher(dataset0, dataset1, pairs=pairs)
        for k in score:
            calibration = self.calibrators[k]
            if pairs is not None:
                pairs = np.array(pairs)
                idx0 = pairs[:, 0]
                idx1 = pairs[:, 1]
                score[k][idx0, idx1] = calibration.predict(score[k][idx0, idx1])
            else:
                 score[k] = calibration.predict(score[k].flatten()).reshape(score[k].shape)
        return score

In [131]:
def superpoint_pipe(transforms, num_keypoints, thrs):

    matcher = MultiTreshSimilarityPipeline(
        matcher = MatchLightGlue(
            features='superpoint',
            device='cuda',
            batch_size=16,
            collector = CollectCounts(thresholds=thrs)
        ),
        extractor = SuperPointExtractor(max_num_keypoints=num_keypoints),
        transform = transforms,
        calibration = IsotonicCalibration
    )

    matcher.fit_calibration(dataset_calibration, dataset_calibration)
    
    sims = matcher(
        dataset_query,
        dataset_query,
        pairs=pairs
    )

    sims_cor = {}
    for k in sims:
       sims_cor[k] = np.where(np.isnan(sims[k]), -np.inf, sims[k])

    return sims_cor

In [132]:
aliked_transforms_512 = [
    T.Compose([
        T.Resize([512, 512]),
        T.ToTensor(),
    ]),
    T.Compose([
        T.Resize([512, 512]),
        T.RandomVerticalFlip(p=1),
        T.ToTensor(),
    ]),
    T.Compose([
        T.Resize([512, 512]),
        T.RandomHorizontalFlip(p=1),
        T.ToTensor(),
    ])
]

In [None]:
results_superpoint_512 = []
for tr in aliked_transforms_512:
    print('run with transforms: ', tr)
    results_superpoint_512.append(
        superpoint_pipe(tr, num_keypoints=384, thrs=[0.4, 0.5, 0.6, 0.7])
    )

In [137]:
final_sims = np.array([i[0.6] for i in results_superpoint_512]).mean(axis=0)

In [None]:
sub = make_sub(final_sims)

In [135]:
sub.to_csv('submission.csv', index=False)

In [99]:
# !zip submission.zip submission.csv
# from huggingface_hub import HfApi
# api = HfApi(token="")
# api.upload_file(
#     path_or_fileobj="submission.zip",
#     path_in_repo="submission.zip",
#     repo_id="seyolax/subs_ntoii_final",
#     repo_type="dataset",
# )

  adding: subs/submission_2.csv (deflated 50%)


submission_2.zip:   0%|          | 0.00/457M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/seyolax/subs_ntoii_final/commit/149e4fff319c318894a3b89143e5e4fb0a021b8e', commit_message='Upload submission_2.zip with huggingface_hub', commit_description='', oid='149e4fff319c318894a3b89143e5e4fb0a021b8e', pr_url=None, pr_revision=None, pr_num=None)