<a href="https://colab.research.google.com/github/NickyTan8899/tjy/blob/main/%E2%80%9Cpretrain_100epochs%E2%80%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

animal_clef_2025_path = kagglehub.competition_download('animal-clef-2025')
hathawaytan_balanced_accuracy_path = kagglehub.notebook_output_download('hathawaytan/balanced-accuracy')
hathawaytan_baseline_with_wildfusion_path = kagglehub.notebook_output_download('hathawaytan/baseline-with-wildfusion')

print('Data source import complete.')


In [None]:
!pip install git+https://github.com/WildlifeDatasets/wildlife-datasets@develop
!pip install git+https://github.com/WildlifeDatasets/wildlife-tools

In [None]:
import numpy as np
from typing import List, Union

def baks_compute(
        y_true: Union[List, np.ndarray],
        y_pred: Union[List, np.ndarray],
        identity_test_only: Union[List, np.ndarray]
    ) -> float:
    """Computes BAKS (balanced accuracy on known samples).

    Focuses only on samples with known identities (not in identity_test_only).

    Args:
        y_true: True labels
        y_pred: Predicted labels
        identity_test_only: Labels of unknown identities (only in test set)

    Returns:
        Balanced accuracy score for known samples
    """
    # Convert inputs to numpy arrays with object dtype to handle mixed types
    y_true = np.array(y_true, dtype=object)
    y_pred = np.array(y_pred, dtype=object)
    identity_test_only = np.array(identity_test_only, dtype=object)

    # Filter out unknown samples
    mask = ~np.isin(y_true, identity_test_only)
    y_true_known = y_true[mask]
    y_pred_known = y_pred[mask]

    if len(y_true_known) == 0:
        return 0.0

    # Get unique classes in the filtered true labels
    unique_classes = np.unique(y_true_known)
    n_classes = len(unique_classes)

    # Compute per-class accuracy and average
    class_accuracies = []
    for cls in unique_classes:
        cls_mask = (y_true_known == cls)
        if np.sum(cls_mask) > 0:
            cls_acc = np.mean(y_pred_known[cls_mask] == cls)
            class_accuracies.append(cls_acc)

    # Return the balanced accuracy (mean of per-class accuracies)
    return np.mean(class_accuracies) if class_accuracies else 0.0

def baus_compute(
        y_true: Union[List, np.ndarray],
        y_pred: Union[List, np.ndarray],
        identity_test_only: Union[List, np.ndarray],
        new_class: Union[int, str]
    ) -> float:
    """Computes BAUS (balanced accuracy on unknown samples).

    Focuses only on samples with unknown identities (in identity_test_only).

    Args:
        y_true: True labels
        y_pred: Predicted labels
        identity_test_only: Labels of unknown identities (only in test set)
        new_class: Label used for identifying unknown samples

    Returns:
        Balanced accuracy score for unknown samples
    """
    # Convert inputs to numpy arrays with object dtype to handle mixed types
    y_true = np.array(y_true, dtype=object)
    y_pred = np.array(y_pred, dtype=object)
    identity_test_only = np.array(identity_test_only, dtype=object)

    # Filter to include only unknown samples
    mask = np.isin(y_true, identity_test_only)
    y_true_unknown = y_true[mask]
    y_pred_unknown = y_pred[mask]

    if len(y_true_unknown) == 0:
        return 0.0

    # Get unique unknown classes
    unique_unknown_classes = np.unique(y_true_unknown)

    # For each unknown class, check if they were correctly predicted as new_class
    class_accuracies = []
    for cls in unique_unknown_classes:
        cls_mask = (y_true_unknown == cls)
        if np.sum(cls_mask) > 0:
            # For unknown samples, correct prediction is new_class
            cls_acc = np.mean(y_pred_unknown[cls_mask] == new_class)
            class_accuracies.append(cls_acc)

    # Return the balanced accuracy (mean of per-class accuracies)
    return np.mean(class_accuracies) if class_accuracies else 0.0


def compute_geometric_mean(baks, baus):
    return np.sqrt(baks * baus)

In [None]:
import os
import numpy as np
import pandas as pd
import timm
import torchvision.transforms as T
from wildlife_datasets.datasets import AnimalCLEF2025
from wildlife_tools.features import DeepFeatures
from wildlife_tools.similarity import CosineSimilarity
from wildlife_tools.similarity.wildfusion import SimilarityPipeline, WildFusion
from wildlife_tools.similarity.pairwise.lightglue import MatchLightGlue
from wildlife_tools.similarity.pairwise.loftr import MatchLOFTR
from wildlife_tools.features.local import AlikedExtractor,SuperPointExtractor,SiftExtractor,DiskExtractor
from wildlife_tools.similarity.calibration import IsotonicCalibration,LogisticCalibration
import sys
# sys.path.append('/kaggle/input/balanced-accuracy')  # 添加路径
# from metric import score,BAKS,BAUS

def create_sample_submission(dataset_query, predictions, file_name='submission.csv'):
    df = pd.DataFrame({
        'image_id': dataset_query.metadata['image_id'],
        'identity': predictions
    })
    df.to_csv(file_name, index=False)

In [None]:

root = animal_clef_2025_path
# transform_display = T.Compose([
#     T.Resize([384, 384]),
# ])
# transform = T.Compose([
#     *transform_display.transforms,
#     T.ToTensor(),
#     T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
# ])

# transforms_aliked = T.Compose([
#     T.Resize([256, 256]),
#     T.ToTensor()
# ])
# transforms_sift = T.Compose([
#     T.Resize([512, 512]),
#     T.ToTensor()
# ])
root

'/root/.cache/kagglehub/competitions/animal-clef-2025'

In [None]:
# Loading the dataset
dataset = AnimalCLEF2025(root, load_label=True,transform=transform)
dataset_database = dataset.get_subset(dataset.metadata['split'] == 'database')
dataset_query = dataset.get_subset(dataset.metadata['split'] == 'query')
dataset_calibration = AnimalCLEF2025(root, df=dataset_database.metadata[:100], load_label=True)
meta=dataset.metadata.query('split == "database"')
num_classes = meta["identity"].nunique()
print(f"种类数: {num_classes}")
n_query=len(dataset_query)
n_query

种类数: 1102


2135

In [None]:
from itertools import chain
import torch
import timm
import pandas as pd
import torchvision.transforms as T
from torch.optim import SGD
import wildlife_tools
from wildlife_tools.data import WildlifeDataset
from wildlife_tools.train import ArcFaceLoss, BasicTrainer
import importlib
import wildlife_tools.data.dataset
import gc

# 清理 Python 层的垃圾
gc.collect()

# 清除 CUDA 缓存（释放未使用的显存）
torch.cuda.empty_cache()
importlib.reload(wildlife_tools.data.dataset)

# 使用类
WildlifeDataset = wildlife_tools.data.dataset.WildlifeDataset
# Dataset configuration
# metadata = pd.read_csv('/content/metadata.csv')
# image_root = '../data/images/size-256'
transform = T.Compose([
    T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    T.RandAugment(num_ops=2, magnitude=20),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
# dataset = WildlifeDataset(
#     metadata = metadata.query('split == "train"'),
#     root = image_root,
#     transform=transform
# )
# train_dataset=AnimalCLEF2025(root=root,transform=transform ,load_label=True)
train_dataset=WildlifeDataset(root=root,metadata =meta,transform=transform ,load_label=True)
print(len(train_dataset))
# Backbone and loss configuration
backbone = timm.create_model('swin_base_patch4_window7_224', num_classes=0, pretrained=True)
with torch.no_grad():
    dummy_input = torch.randn(1, 3, 224, 224)
    embedding_size = backbone(dummy_input).shape[1]
    print(embedding_size)#1024
objective = ArcFaceLoss(num_classes=train_dataset.num_classes, embedding_size=embedding_size, margin=0.5, scale=64)


# Optimizer and scheduler configuration
params = chain(backbone.parameters(), objective.parameters())
optimizer = SGD(params=params, lr=0.001, momentum=0.9)
min_lr = optimizer.defaults.get("lr") * 1e-3
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=min_lr)

importlib.reload(wildlife_tools.train.trainer)

# Step 2: 重新导入类（必须在 reload 之后！）
from wildlife_tools.train.trainer import BasicTrainer
# Setup training
trainer = BasicTrainer(
    dataset=train_dataset,
    model=backbone,
    objective=objective,
    optimizer=optimizer,
    scheduler=scheduler,
    batch_size=64,
    accumulation_steps=2,
    num_workers=2,
    epochs=100,
    device='cuda',
)

trainer.train() # Call the modified train function
trainer.save("/content/final_checkpoints", file_name="final_model.pth")

13074


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

1536


Epoch 0:   0%|                                                              | 0/205 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 486.00 MiB. GPU 0 has a total capacity of 39.56 GiB of which 254.88 MiB is free. Process 79589 has 39.30 GiB memory in use. Of the allocated memory 38.44 GiB is allocated by PyTorch, and 378.02 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Loading the models
# name = 'hf-hub:BVRA/MegaDescriptor-B-224'
# model = timm.create_model(name, num_classes=0, pretrained=True)
device = 'cuda'
model=backbone
pipelines = [

    # SimilarityPipeline(
    #     matcher = MatchLightGlue(features='superpoint'),
    #     extractor = SuperPointExtractor(),
    #     transform = T.Compose([
    #         T.Resize([512, 512]),
    #         T.ToTensor()
    #     ]),
    #     calibration = IsotonicCalibration()
    # ),

    SimilarityPipeline(
        matcher = MatchLightGlue(features='aliked'),
        extractor = AlikedExtractor(),
        transform = T.Compose([
            T.Resize([256, 256]),
            T.ToTensor()
        ]),
        calibration = IsotonicCalibration()
    ),

    SimilarityPipeline(
        matcher = MatchLightGlue(features='disk'),
        extractor = DiskExtractor(),
        transform = T.Compose([
            T.Resize([256, 256]),
            T.ToTensor()
        ]),
        calibration = IsotonicCalibration()
    ),

    # SimilarityPipeline(
    #     matcher = MatchLightGlue(features='sift'),
    #     extractor = SiftExtractor(),
    #     transform = T.Compose([
    #         T.Resize([512, 512]),
    #         T.ToTensor()
    #     ]),
    #     calibration = IsotonicCalibration()
    # ),

#     SimilarityPipeline(
#         matcher = MatchLOFTR(pretrained='outdoor'),
#         extractor = None,
#         transform = T.Compose([
#             T.Resize([512, 512]),
#             T.Grayscale(),
#             T.ToTensor(),
#         ]),
#         calibration = IsotonicCalibration()
#     ),

#     SimilarityPipeline(
#         matcher = CosineSimilarity(),
#         extractor = DeepFeatures(
#             model = timm.create_model('hf-hub:BVRA/wildlife-mega-L-384', num_classes=0, pretrained=True)
#         ),
#         transform = T.Compose([
#             T.Resize(size=(384, 384)),
#             T.ToTensor(),
#             T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
#         ]),
#         calibration = IsotonicCalibration()
#     ),
]

matcher_mega = SimilarityPipeline(
    matcher = CosineSimilarity(),
    extractor = DeepFeatures(model=model, device=device, batch_size=16),
    transform = transform,
    calibration =IsotonicCalibration()
)

In [None]:
# Calibrating the WildFusion

wildfusion = WildFusion(calibrated_pipelines = pipelines, priority_pipeline = matcher_mega)
wildfusion.fit_calibration(dataset_calibration, dataset_calibration)

In [None]:
# Compute WildFusion similarity
similarity = wildfusion(dataset_query, dataset_database, B=25)

In [None]:
pred_idx = similarity.argsort(axis=1)[:,-1]
pred_scores = similarity[range(n_query), pred_idx]
similarity.shape
labels = dataset_database.labels_string
pred_scores.shape

In [None]:
for threshold in [0.2, 0.3, 0.4, 0.5, 0.6,0.7,0.8]:
    predictions = labels[pred_idx]
    predictions[pred_scores < threshold] = 'new_individual'
    create_sample_submission(dataset_query, predictions, file_name=f'sample_submission_{threshold}.csv')