# Previous Code (Starter Code)

In [4]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Kaggle credentials set.
Kaggle credentials successfully validated.


In [2]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

animal_clef_2025_path = kagglehub.competition_download('animal-clef-2025')

print('Data source import complete.')


Downloading from https://www.kaggle.com/api/v1/competitions/data/download-all/animal-clef-2025...


100%|██████████| 1.72G/1.72G [00:14<00:00, 126MB/s]

Extracting files...





Data source import complete.


In [3]:
!pip install git+https://github.com/WildlifeDatasets/wildlife-datasets@develop
!pip install git+https://github.com/WildlifeDatasets/wildlife-tools

Collecting git+https://github.com/WildlifeDatasets/wildlife-datasets@develop
  Cloning https://github.com/WildlifeDatasets/wildlife-datasets (to revision develop) to /tmp/pip-req-build-4u_6q320
  Running command git clone --filter=blob:none --quiet https://github.com/WildlifeDatasets/wildlife-datasets /tmp/pip-req-build-4u_6q320
  Running command git checkout -b develop --track origin/develop
  Switched to a new branch 'develop'
  Branch 'develop' set up to track remote branch 'develop' from 'origin'.
  Resolved https://github.com/WildlifeDatasets/wildlife-datasets to commit 753d9bf64861c3e17011136b3436bf58bf02317f
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: wildlife-datasets
  Building wheel for wildlife-datasets (pyproject.toml) ... [?25l[?25hdone
  Created wheel for wildlife-datasets: filename=wildlife_datasets-1.0.6

In [5]:
import os
import numpy as np
import pandas as pd
import timm
import torchvision.transforms as T
from wildlife_datasets.datasets import AnimalCLEF2025
from wildlife_tools.features import DeepFeatures
from wildlife_tools.similarity import CosineSimilarity
from wildlife_tools.similarity.wildfusion import SimilarityPipeline, WildFusion
from wildlife_tools.similarity.pairwise.lightglue import MatchLightGlue
from wildlife_tools.similarity.pairwise.loftr import MatchLOFTR
from wildlife_tools.similarity.pairwise.loftr import LoFTR
from wildlife_tools.features.local import AlikedExtractor
from wildlife_tools.similarity.calibration import IsotonicCalibration

from wildlife_tools.features.local import SuperPointExtractor
from wildlife_tools.similarity.pairwise.collectors import CollectCounts



from transformers import CLIPProcessor, CLIPModel


def create_sample_submission(dataset_query, predictions, file_name='sample_submission.csv'):
    df = pd.DataFrame({
        'image_id': dataset_query.metadata['image_id'],
        'identity': predictions
    })
    df.to_csv(file_name, index=False)

# Our New Code - WildFusion (Ultimate)

In [9]:
# 0) Download the AnimalCLEF 2025 dataset using KaggleHub
root = kagglehub.competition_download('animal-clef-2025')

# 1) Define a simple image resizing transformation for display or preprocessing
transform_display = T.Compose([
    T.Resize([384, 384]),  # Resize all images to 384x384
])

# 1.5) Set the computation device (NOTE - change to 'cpu' if not run on GPU)
device = 'cuda'

# 2) Define the standard image preprocessing transform for CNN models
transform = T.Compose([
    *transform_display.transforms,  # Apply the 384x384 resize
    T.ToTensor(),                   # Convert PIL image to PyTorch tensor (C, H, W)
    T.Normalize(                    # Normalize using ImageNet mean and std
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225)
    )
])

# 3) Define transformation specifically for Aliked (keypoint-based local descriptors)
transforms_aliked = T.Compose([
    T.Resize([512, 512]),  # Resize to 512x512 for consistent keypoint extraction
    T.ToTensor()
])

# 4) Load the DINOv2 model (ViT-based self-supervised vision transformer)
# 'vit_small_patch14_dinov2' is a small ViT architecture pre-trained using DINO
dino = timm.create_model("vit_small_patch14_dinov2", pretrained=True).to(device).eval()

# Get expected input size (H, W) from DINO model’s configuration
_, H, W = dino.default_cfg['input_size']

# 5) Define preprocessing for DINOv2 input (matching its training stats)
transform_dino = T.Compose([
    T.Resize((H, W)),                 # Resize to DINO’s expected input resolution
    T.ToTensor(),                    # Convert to tensor
    T.Normalize(                     # Normalize using DINO's training mean/std
        mean=dino.default_cfg['mean'],
        std=dino.default_cfg['std']
    )
])

# Create a similarity pipeline using DINO + CosineSimilarity
# This pipeline extracts deep features from the DINO model, computes pairwise cosine similarity
# It also supports score calibration using isotonic regression
matcher_dino = SimilarityPipeline(
    matcher     = CosineSimilarity(),                                     # Use cosine similarity between feature vectors
    extractor   = DeepFeatures(model=dino, device=device, batch_size=16),# Use DINO as the feature extractor
    transform   = transform_dino,                                         # Use DINO-specific preprocessing
    calibration = IsotonicCalibration()                                   # Apply optional calibration on scores
)


In [7]:
root = kagglehub.competition_download('animal-clef-2025')

# Load the entire dataset using the custom AnimalCLEF2025 class
# Setting `load_label=True` means we want to load identity labels (for supervised tasks)
dataset = AnimalCLEF2025(root, load_label=True)

# Split the dataset into the labeled gallery (database) and unlabeled probe set (query)
# - database: known individuals (used for retrieval)
# - query: images to be re-identified or labeled as new individuals
dataset_database = dataset.get_subset(dataset.metadata['split'] == 'database')
dataset_query    = dataset.get_subset(dataset.metadata['split'] == 'query')

# Create a calibration subset from the first 500 database samples (for score normalization)
# This subset will be used to fit calibration models (e.g. isotonic regression)
cal_meta = dataset_database.metadata[:500].reset_index(drop=True)

# Load the calibration dataset using the metadata subset
dataset_calibration = AnimalCLEF2025(
    root,
    df=cal_meta,        # The 500 labeled samples from the database
    load_label=True     # Load identity labels for calibration
)

# Store the number of query images for evaluation or batching
n_query = len(dataset_query)


In [10]:
# NOTE - If no access to pth file, uncomment below and comment out STEP 1)
# name = 'hf-hub:BVRA/MegaDescriptor-L-384'
# model = timm.create_model(name, num_classes=0, pretrained=True)
# device = 'cuda'
import torch

# STEP 1️): Load fine-tuned MegaDescriptor (MD) from local checkpoint
MODEL_NAME = 'hf-hub:BVRA/MegaDescriptor-L-384'
ckpt       = torch.load('hybrid_checkpoint.pth', map_location=device)
md         = timm.create_model(MODEL_NAME, num_classes=0, pretrained=False)
md.load_state_dict(ckpt['backbone'])  # Load fine-tuned weights
md         = md.to(device).eval()     # Move to GPU and set to eval mode

# STEP 2️): Create MegaDescriptor similarity pipeline (global visual features)
matcher_mega = SimilarityPipeline(
    matcher     = CosineSimilarity(),                                 # Cosine distance for global embeddings
    extractor   = DeepFeatures(model=md, device=device, batch_size=16),  # Extract features from MD
    transform   = transform,                                          # Use ImageNet-style normalization
    calibration = IsotonicCalibration()                               # Calibrate scores if needed
)

# STEP 3️): Load DINOv2 model for transformer-based features
dino = timm.create_model(
    "vit_large_patch14_dinov2",   # Larger ViT model pre-trained via DINO self-supervision
    pretrained=True,
    num_classes=0                 # No classification head
).to(device).eval()

# Use DINO's recommended transforms
dino_cfg = dino.default_cfg
transform_dino = T.Compose([
    T.Resize(dino_cfg["input_size"][1:]),
    T.CenterCrop(dino_cfg["input_size"][1:]),
    T.ToTensor(),
    T.Normalize(mean=dino_cfg["mean"], std=dino_cfg["std"]),
])

# Create DINO similarity pipeline
matcher_dino = SimilarityPipeline(
   matcher     = CosineSimilarity(),                                       # Cosine matching on ViT features
   extractor   = DeepFeatures(model=dino, device=device, batch_size=16),  # DINO-based feature extractor
   transform   = transform_dino,                                           # DINO-specific preprocessing
   calibration = IsotonicCalibration()
)

# STEP 4️): Create Aliked + LightGlue pipeline (keypoint-based matching)
matcher_aliked = SimilarityPipeline(
    matcher     = MatchLightGlue(features='aliked', device=device, batch_size=16),
    extractor   = AlikedExtractor(),
    transform   = transforms_aliked,
    calibration = IsotonicCalibration()
)

# STEP 5️): Create SuperPoint + LightGlue pipeline (alternate keypoint matcher)
matcher_superpoint = SimilarityPipeline(
    matcher     = MatchLightGlue(features='superpoint', device=device, batch_size=16),
    extractor   = SuperPointExtractor(),
    transform   = T.Compose([T.Resize([512,512]), T.ToTensor()]),
    calibration = IsotonicCalibration()
)




config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
Downloading: "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/aliked_lightglue.pth" to /root/.cache/torch/hub/checkpoints/aliked_lightglue_v0-1_arxiv.pth
100%|██████████| 45.4M/45.4M [00:00<00:00, 113MB/s]
Downloading: "https://github.com/Shiaoming/ALIKED/raw/main/models/aliked-n16.pth" to /root/.cache/torch/hub/checkpoints/aliked-n16.pth
100%|██████████| 2.61M/2.61M [00:00<00:00, 37.0MB/s]
Downloading: "https://github.com/magicleap/SuperGluePretrainedNetwork/raw/master/models/weights/superpoint_v1.pth" to /root/.cache/torch/hub/checkpoints/superpoint_v1.pth
100%|██████████| 4.96M/4.96M [00:00<00:00, 60.5MB/s]
Downloading: "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_lightglue.pth" to /root/.cache/torch/hub/checkpoints/superpoint_lightglue_v0-1_arxiv.pth
100%|██████████| 45.3M/45.3M [00:00<00:00, 167MB/s]


In [11]:
# 1️) Get the full database metadata
db_meta = dataset_database.metadata.copy().reset_index(drop=True)

# 2️) Sample a balanced calibration subset from the database
#    - We sample up to 300 images per species (e.g., turtles, salamanders, lynx)
#    - Ensures that calibration isn't biased toward species with more images
per_species = 300
cal_meta = (
    db_meta
    .groupby("dataset", group_keys=False)  # group by species name
    .apply(lambda df: df.sample(min(len(df), per_species), random_state=42))  # sample per species
    .reset_index(drop=True)
)

# 3️) Create the calibration dataset (queries for fitting score calibration)
#    - This dataset contains labeled images that will be used to calibrate each pipeline
dataset_calibration = AnimalCLEF2025(
    root,
    df=cal_meta,         # sampled calibration metadata
    transform=transform, # preprocessing matching MegaDescriptor pipeline
    load_label=True      # load identity labels for calibration scoring
)

# 4️) Create the reference gallery (small_db) for calibration
#     - This subset includes only the database images present in cal_meta
#     - Ensures calibration queries only match against valid known individuals
small_db = dataset_database.get_subset(
    dataset_database.metadata["image_id"].isin(cal_meta["image_id"])  # match by image_id
)

# 5️) Initialize and fit the WildFusion calibrator using your 4 pipelines
wildfusion = WildFusion(
    calibrated_pipelines=[matcher_aliked, matcher_mega, matcher_dino, matcher_superpoint],  # all matchers
    priority_pipeline=matcher_mega  # MegaDescriptor drives final similarity ranking
)
wildfusion.fit_calibration(dataset_calibration, small_db)  # learn score calibration per matcher

# 6️) Run WildFusion on the actual (unlabeled) query set to get similarity scores
#     - Each query image is matched against all database images
#     - Output is a similarity matrix: [num_query, num_database]
similarity = wildfusion(dataset_query, dataset_database, B=25)

# 7️) Get top-matching database index and similarity score for each query
pred_idx    = similarity.argmax(axis=1)               # index of best match in the DB
pred_scores = similarity[np.arange(n_query), pred_idx]# score of that best match
labels      = dataset_database.labels_string          # list of known individual labels in DB


  .apply(lambda df: df.sample(min(len(df), per_species), random_state=42))
100%|█████████████████████████████████████████████████████████████| 900/900 [00:24<00:00, 36.93it/s]
100%|█████████████████████████████████████████████████████████████| 900/900 [00:22<00:00, 39.21it/s]
100%|███████████████████████████████████████████████████████| 50625/50625 [1:25:02<00:00,  9.92it/s]
100%|███████████████████████████████████████████████████████████████| 57/57 [01:11<00:00,  1.26s/it]
100%|███████████████████████████████████████████████████████████████| 57/57 [01:12<00:00,  1.27s/it]
100%|███████████████████████████████████████████████████████████████| 57/57 [04:48<00:00,  5.06s/it]
100%|███████████████████████████████████████████████████████████████| 57/57 [04:48<00:00,  5.07s/it]
100%|█████████████████████████████████████████████████████████████| 900/900 [00:22<00:00, 39.70it/s]
100%|█████████████████████████████████████████████████████████████| 900/900 [00:22<00:00, 39.30it/s]
100%|███████████

In [31]:
# 1) define per-species cutoffs
species_thr = {
    'SeaTurtleID2022': 0.15,
    'SalamanderID2025': 0.24,
    'LynxID2025':       0.28,
}

# fallback
default_thr = 0.4

# 2) grab each query’s species name
species_list = dataset_query.metadata['dataset'].tolist()

# 3) build final predictions in one pass
final_preds = []
for idx, score, sp in zip(pred_idx, pred_scores, species_list):
    thr = species_thr.get(sp, default_thr)
    if score >= thr:
        final_preds.append(labels[idx])
    else:
        final_preds.append('new_individual')

# 4) write out
create_sample_submission(dataset_query,
                         final_preds,
                         file_name='submission_per_species.csv')
print("→ Wrote submission_per_species.csv")


→ Wrote submission_per_species.csv
