In [None]:
import os
import random
import gc
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm
import timm


## Configuration

In [None]:
class config:
    
    seed = 42
    debug = False
    apex = False
    print_freq = 100
    num_workers = 2
    
    train_csv = '/kaggle/input/birdclef-2025/train.csv'
    taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
    spectrogram_npy = '/kaggle/input/birdclef2025-train-melspec-data-npy/birdClef_train_melspec_data.npy'

    model_name = 'efficientnet'  
    pretrained = True
    in_channels = 1

    LOAD_DATA = True  
    FS = 32000
    TARGET_DURATION = 5.0
    TARGET_SHAPE = (256, 256)
    
    N_FFT = 1024
    HOP_LENGTH = 512
    N_MELS = 128
    FMIN = 50
    FMAX = 14000
    
    device = 'cuda'
    epochs = 10  
    batch_size = 32  
    criterion = 'LogitsLossBCE'

    n_fold = 5
    selected_folds = [0, 1, 2, 3, 4]   

    optimizer = 'AdamW'
    lr = 5e-4 
    weight_decay = 1e-5

    scheduler = 'CosineAnnealingLR'
    min_lr = 1e-6
    T_max = epochs

    aug_prob = 0.5  
    mixup_alpha = 0.5  
    
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]

cfg = config()

In [None]:
def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(cfg.seed)

## Chuẩn bị dữ liệu

In [None]:
class BirdCLEFDatasetFromNPY(Dataset):
    def __init__(self, df, cfg, spectrograms=None, mode="train"):
        self.df = df
        self.cfg = cfg
        self.mode = mode

        self.spectrograms = spectrograms
        
        taxonomy_df = pd.read_csv(self.cfg.taxonomy_csv)
        self.species_ids = taxonomy_df['primary_label'].tolist()
        self.num_classes = len(self.species_ids)
        self.label_to_idx = {label: idx for idx, label in enumerate(self.species_ids)}

        sample_names = set(self.df['sample_name'])

        found_samples = sum(1 for name in sample_names if name in self.spectrograms)
        print(f"Found {found_samples} matching spectrograms for {mode} dataset out of {len(self.df)} samples")
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        samplename = row['sample_name']
        spec = None

        spec = self.spectrograms[samplename]

        spec = torch.tensor(spec, dtype=torch.float32).unsqueeze(0) 

        if self.mode == "train" and random.random() < self.cfg.aug_prob:
            spec = self.apply_spec_augmentations(spec)
        
        target = self.encode_label(row['primary_label'])
        
        if 'secondary_labels' in row and row['secondary_labels'] not in [[''], None, np.nan]:
            if isinstance(row['secondary_labels'], str):
                secondary_labels = eval(row['secondary_labels'])
            else:
                secondary_labels = row['secondary_labels']
            
            for label in secondary_labels:
                if label in self.label_to_idx:
                    target[self.label_to_idx[label]] = 1.0
        
        return {
            'melspec': spec, 
            'target': torch.tensor(target, dtype=torch.float32),
            'filename': row['filename']
        }
    
    def apply_spec_augmentations(self, spec):
        if random.random() < 0.5:
            num_masks = random.randint(1, 3)
            for _ in range(num_masks):
                width = random.randint(5, 20)
                start = random.randint(0, spec.shape[2] - width)
                spec[0, :, start:start+width] = 0
        
        if random.random() < 0.5:
            num_masks = random.randint(1, 3)
            for _ in range(num_masks):
                height = random.randint(5, 20)
                start = random.randint(0, spec.shape[1] - height)
                spec[0, start:start+height, :] = 0
        
        # Random brightness/contrast
        if random.random() < 0.5:
            gain = random.uniform(0.8, 1.2)
            bias = random.uniform(-0.1, 0.1)
            spec = spec * gain + bias
            spec = torch.clamp(spec, 0, 1) 
            
        return spec
    
    def encode_label(self, label):
        """Encode label to one-hot vector"""
        target = np.zeros(self.num_classes)
        if label in self.label_to_idx:
            target[self.label_to_idx[label]] = 1.0
        return target

In [None]:
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return {}
        
    result = {key: [] for key in batch[0].keys()}
    
    for item in batch:
        for key, value in item.items():
            result[key].append(value)
    
    for key in result:
        if key == 'target' and isinstance(result[key][0], torch.Tensor):
            result[key] = torch.stack(result[key])
        elif key == 'melspec' and isinstance(result[key][0], torch.Tensor):
            shapes = [t.shape for t in result[key]]
            if len(set(str(s) for s in shapes)) == 1:
                result[key] = torch.stack(result[key])
    
    return result

## Định nghĩa model

In [None]:
class BirdCLEFModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        
        taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
        cfg.num_classes = len(taxonomy_df)
        
        self.backbone = timm.create_model(
            cfg.model_name,
            pretrained=cfg.pretrained,
            in_chans=cfg.in_channels,
            drop_rate=0.2,
            drop_path_rate=0.2
        )

        backbone_out = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.feat_dim = backbone_out
        self.classifier = nn.Linear(backbone_out, cfg.num_classes)
        
        self.mixup_enabled = hasattr(cfg, 'mixup_alpha') and cfg.mixup_alpha > 0
        if self.mixup_enabled:
            self.mixup_alpha = cfg.mixup_alpha
            
    def forward(self, x, targets=None):
    
        if self.training and self.mixup_enabled and targets is not None:
            mixed_x, targets_a, targets_b, lam = self.mixup_data(x, targets)
            x = mixed_x
        else:
            targets_a, targets_b, lam = None, None, None
        
        features = self.backbone(x)
        
        if isinstance(features, dict):
            features = features['features']
            
        if len(features.shape) == 4:
            features = self.pooling(features)
            features = features.view(features.size(0), -1)
        
        logits = self.classifier(features)
        
        if self.training and self.mixup_enabled and targets is not None:
            loss = self.mixup_criterion(F.binary_cross_entropy_with_logits, 
                                        logits, targets_a, targets_b, lam)
            return logits, loss
            
        return logits
    
    def mixup_data(self, x, targets):
        batch_size = x.size(0)
        lam = np.random.beta(self.mixup_alpha, self.mixup_alpha)
        indices = torch.randperm(batch_size).to(x.device)
        mixed_x = lam * x + (1 - lam) * x[indices]
        return mixed_x, targets, targets[indices], lam
    
    def mixup_criterion(self, criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

## Training Loop

In [None]:
def train_epoch(model, loader, optimizer, criterion, device, scheduler=None):
    
    model.train()
    losses = []
    all_targets = []
    all_outputs = []
    
    pbar = tqdm(enumerate(loader), total=len(loader), desc="Training")
    
    for step, batch in pbar:
    
        if isinstance(batch['melspec'], list):
            batch_outputs = []
            batch_losses = []
            
            for i in range(len(batch['melspec'])):
                inputs = batch['melspec'][i].unsqueeze(0).to(device)
                target = batch['target'][i].unsqueeze(0).to(device)
                
                optimizer.zero_grad()
                output = model(inputs)
                loss = criterion(output, target)
                loss.backward()
                
                batch_outputs.append(output.detach().cpu())
                batch_losses.append(loss.item())
            
            optimizer.step()
            outputs = torch.cat(batch_outputs, dim=0).numpy()
            loss = np.mean(batch_losses)
            targets = batch['target'].numpy()
            
        else:
            inputs = batch['melspec'].to(device)
            targets = batch['target'].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            
            if isinstance(outputs, tuple):
                outputs, loss = outputs  
            else:
                loss = criterion(outputs, targets)
                
            loss.backward()
            optimizer.step()
            
            outputs = outputs.detach().cpu().numpy()
            targets = targets.detach().cpu().numpy()
        
            
        all_outputs.append(outputs)
        all_targets.append(targets)
        losses.append(loss if isinstance(loss, float) else loss.item())
        
        pbar.set_postfix({
            'train_loss': np.mean(losses[-10:]) if losses else 0,
            'lr': optimizer.param_groups[0]['lr']
        })
    
    all_outputs = np.concatenate(all_outputs)
    all_targets = np.concatenate(all_targets)
    auc = calculate_auc(all_targets, all_outputs)
    avg_loss = np.mean(losses)
    
    return avg_loss, auc

def validate(model, loader, criterion, device):
   
    model.eval()
    losses = []
    all_targets = []
    all_outputs = []
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Validation"):
            if isinstance(batch['melspec'], list):
                batch_outputs = []
                batch_losses = []
                
                for i in range(len(batch['melspec'])):
                    inputs = batch['melspec'][i].unsqueeze(0).to(device)
                    target = batch['target'][i].unsqueeze(0).to(device)
                    
                    output = model(inputs)
                    loss = criterion(output, target)
                    
                    batch_outputs.append(output.detach().cpu())
                    batch_losses.append(loss.item())
                
                outputs = torch.cat(batch_outputs, dim=0).numpy()
                loss = np.mean(batch_losses)
                targets = batch['target'].numpy()
                
            else:
                inputs = batch['melspec'].to(device)
                targets = batch['target'].to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                
                outputs = outputs.detach().cpu().numpy()
                targets = targets.detach().cpu().numpy()
            
            all_outputs.append(outputs)
            all_targets.append(targets)
            losses.append(loss if isinstance(loss, float) else loss.item())
                
    all_outputs = np.concatenate(all_outputs)
    all_targets = np.concatenate(all_targets)
    
    auc = calculate_auc(all_targets, all_outputs)
    avg_loss = np.mean(losses)
    
    return avg_loss, auc

def calculate_auc(targets, outputs):
  
    num_classes = targets.shape[1]
    aucs = []
    
    probs = 1 / (1 + np.exp(-outputs))
    
    for i in range(num_classes):
        
        if np.sum(targets[:, i]) > 0:
            class_auc = roc_auc_score(targets[:, i], probs[:, i])
            aucs.append(class_auc)
    
    return np.mean(aucs) if aucs else 0.0

## Training!

In [None]:
def get_criterion_logitsLoss(cfg):
    criterion = nn.BCEWithLogitsLoss()
    return criterion
def run_training(df, cfg,criterion):

    taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
    species_ids = taxonomy_df['primary_label'].tolist()
    cfg.num_classes = len(species_ids)
    
    spectrograms = None
    spectrograms = np.load(cfg.spectrogram_npy, allow_pickle=True).item()

        
    skf = StratifiedKFold(n_splits=cfg.n_fold, shuffle=True, random_state=cfg.seed)
    
    best_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['primary_label'])):
        if fold not in cfg.selected_folds:
            continue
            
        print('Fold'+ {fold})
        
        train_df = df.iloc[train_idx].reset_index(drop=True)
        val_df = df.iloc[val_idx].reset_index(drop=True)
        
        print(f'Training set: {len(train_df)} samples')
        print(f'Validation set: {len(val_df)} samples')
        
        train_dataset = BirdCLEFDatasetFromNPY(train_df, cfg, spectrograms=spectrograms, mode='train')
        val_dataset = BirdCLEFDatasetFromNPY(val_df, cfg, spectrograms=spectrograms, mode='valid')
        
        train_loader = DataLoader(
            train_dataset, 
            batch_size=cfg.batch_size, 
            shuffle=True, 
            num_workers=cfg.num_workers,
            pin_memory=True,
            collate_fn=collate_fn,
            drop_last=True
        )
        
        val_loader = DataLoader(
            val_dataset, 
            batch_size=cfg.batch_size, 
            shuffle=False, 
            num_workers=cfg.num_workers,
            pin_memory=True,
            collate_fn=collate_fn
        )
        
        model = BirdCLEFModel(cfg).to(cfg.device)
        optimizer = optim.AdamW(
                model.parameters(),
                lr=cfg.lr,
                weight_decay=cfg.weight_decay
            )
        criterion = criterion
        
        scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=cfg.T_max,
                eta_min=cfg.min_lr
            )
        
        best_auc = 0
        best_epoch = 0
        print('number of epochs:', cfg.epochs)
        for epoch in range(cfg.epochs):
            print(f"\nEpoch {epoch+1}/{cfg.epochs}")
            
            train_loss, train_auc = train_epoch(
                model, 
                train_loader, 
                optimizer, 
                criterion, 
                cfg.device,
                scheduler
            )
            
            val_loss, val_auc = validate(model, val_loader, criterion, cfg.device)

            scheduler.step()

            print(f"Train Loss: {train_loss:.4f}, Train AUC: {train_auc:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val AUC: {val_auc:.4f}")
            
            save_dir = "logits_loss" if cfg.criterion == 'LogitsLossBCE' else "focal_loss"
            
            if val_auc > best_auc:
                best_auc = val_auc
                best_epoch = epoch + 1

                torch.save({
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
                    'epoch': epoch,
                    'val_auc': val_auc,
                    'train_auc': train_auc,
                    'cfg': cfg
                }, os.path.join(save_dir, f"model_f{fold}.pth"))
        
        best_scores.append(best_auc)
        
        del model, optimizer, scheduler, train_loader, val_loader
        torch.cuda.empty_cache()
        gc.collect()
    
    print("Cross-Validation Results:")
    for fold, score in enumerate(best_scores):
        print(f"Fold {cfg.selected_folds[fold]}: {score:.4f}")
    print(f"Mean AUC: {np.mean(best_scores):.4f}")
    print("="*60)

In [None]:
if __name__ == "__main__":
    import time

    train_df = pd.read_csv(cfg.train_csv)
    taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
    train_df['filepath'] = '/kaggle/input/birdclef-2025/train_audio' + '/' + train_df.filename
    train_df['sample_name'] = train_df['filename'].map(lambda x: x.split('/')[0] + '-' + x.split('/')[-1].split('.')[0])

    criterion = get_criterion_logitsLoss(cfg)
    run_training(train_df, cfg, criterion)
    
    print("\nTraining complete!")

Loaded 28564 pre-computed mel spectrograms

Training set: 22851 samples
Validation set: 5713 samples
Found 22851 matching spectrograms for train dataset out of 22851 samples
Found 5713 matching spectrograms for valid dataset out of 5713 samples
number of epochs: 10

Epoch 1/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0361, Train AUC: 0.6206
Val Loss: 0.0251, Val AUC: 0.8184
New best AUC: 0.8184 at epoch 1

Epoch 2/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0230, Train AUC: 0.8230
Val Loss: 0.0203, Val AUC: 0.9018
New best AUC: 0.9018 at epoch 2

Epoch 3/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0192, Train AUC: 0.9063
Val Loss: 0.0181, Val AUC: 0.9213
New best AUC: 0.9213 at epoch 3

Epoch 4/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0167, Train AUC: 0.9393
Val Loss: 0.0167, Val AUC: 0.9365
New best AUC: 0.9365 at epoch 4

Epoch 5/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0146, Train AUC: 0.9583
Val Loss: 0.0164, Val AUC: 0.9388
New best AUC: 0.9388 at epoch 5

Epoch 6/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0127, Train AUC: 0.9744
Val Loss: 0.0158, Val AUC: 0.9437
New best AUC: 0.9437 at epoch 6

Epoch 7/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0108, Train AUC: 0.9832
Val Loss: 0.0157, Val AUC: 0.9443
New best AUC: 0.9443 at epoch 7

Epoch 8/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0092, Train AUC: 0.9893
Val Loss: 0.0156, Val AUC: 0.9453
New best AUC: 0.9453 at epoch 8

Epoch 9/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0080, Train AUC: 0.9923
Val Loss: 0.0157, Val AUC: 0.9455
New best AUC: 0.9455 at epoch 9

Epoch 10/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0075, Train AUC: 0.9944
Val Loss: 0.0158, Val AUC: 0.9458
New best AUC: 0.9458 at epoch 10

Best AUC for fold 0: 0.9458 at epoch 10

Training set: 22851 samples
Validation set: 5713 samples
Found 22851 matching spectrograms for train dataset out of 22851 samples
Found 5713 matching spectrograms for valid dataset out of 5713 samples
number of epochs: 10

Epoch 1/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0370, Train AUC: 0.5737
Val Loss: 0.0260, Val AUC: 0.7993
New best AUC: 0.7993 at epoch 1

Epoch 2/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0235, Train AUC: 0.8128
Val Loss: 0.0199, Val AUC: 0.8931
New best AUC: 0.8931 at epoch 2

Epoch 3/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0191, Train AUC: 0.8915
Val Loss: 0.0175, Val AUC: 0.9275
New best AUC: 0.9275 at epoch 3

Epoch 4/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0164, Train AUC: 0.9369
Val Loss: 0.0159, Val AUC: 0.9417
New best AUC: 0.9417 at epoch 4

Epoch 5/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0140, Train AUC: 0.9610
Val Loss: 0.0153, Val AUC: 0.9443
New best AUC: 0.9443 at epoch 5

Epoch 6/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0120, Train AUC: 0.9747
Val Loss: 0.0151, Val AUC: 0.9501
New best AUC: 0.9501 at epoch 6

Epoch 7/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0101, Train AUC: 0.9852
Val Loss: 0.0151, Val AUC: 0.9484

Epoch 8/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7832b1911750>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7832b1911750>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1604, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1587, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0086, Train AUC: 0.9908
Val Loss: 0.0151, Val AUC: 0.9476

Epoch 9/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0075, Train AUC: 0.9937
Val Loss: 0.0152, Val AUC: 0.9491

Epoch 10/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0069, Train AUC: 0.9950
Val Loss: 0.0151, Val AUC: 0.9489

Best AUC for fold 1: 0.9501 at epoch 6

Training set: 22851 samples
Validation set: 5713 samples
Found 22851 matching spectrograms for train dataset out of 22851 samples
Found 5713 matching spectrograms for valid dataset out of 5713 samples
number of epochs: 10

Epoch 1/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0363, Train AUC: 0.6069
Val Loss: 0.0255, Val AUC: 0.8171
New best AUC: 0.8171 at epoch 1

Epoch 2/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0229, Train AUC: 0.8345
Val Loss: 0.0204, Val AUC: 0.8948
New best AUC: 0.8948 at epoch 2

Epoch 3/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0191, Train AUC: 0.9022
Val Loss: 0.0179, Val AUC: 0.9141
New best AUC: 0.9141 at epoch 3

Epoch 4/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0166, Train AUC: 0.9348
Val Loss: 0.0166, Val AUC: 0.9294
New best AUC: 0.9294 at epoch 4

Epoch 5/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0145, Train AUC: 0.9598
Val Loss: 0.0160, Val AUC: 0.9380
New best AUC: 0.9380 at epoch 5

Epoch 6/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0126, Train AUC: 0.9743
Val Loss: 0.0156, Val AUC: 0.9421
New best AUC: 0.9421 at epoch 6

Epoch 7/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0107, Train AUC: 0.9834
Val Loss: 0.0154, Val AUC: 0.9475
New best AUC: 0.9475 at epoch 7

Epoch 8/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0091, Train AUC: 0.9895
Val Loss: 0.0155, Val AUC: 0.9468

Epoch 9/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0080, Train AUC: 0.9931
Val Loss: 0.0156, Val AUC: 0.9470

Epoch 10/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0074, Train AUC: 0.9945
Val Loss: 0.0155, Val AUC: 0.9475

Best AUC for fold 2: 0.9475 at epoch 7

Training set: 22851 samples
Validation set: 5713 samples
Found 22851 matching spectrograms for train dataset out of 22851 samples
Found 5713 matching spectrograms for valid dataset out of 5713 samples
number of epochs: 10

Epoch 1/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0364, Train AUC: 0.5951
Val Loss: 0.0254, Val AUC: 0.7933
New best AUC: 0.7933 at epoch 1

Epoch 2/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0228, Train AUC: 0.8240
Val Loss: 0.0198, Val AUC: 0.9030
New best AUC: 0.9030 at epoch 2

Epoch 3/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0186, Train AUC: 0.8994
Val Loss: 0.0177, Val AUC: 0.9289
New best AUC: 0.9289 at epoch 3

Epoch 4/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0159, Train AUC: 0.9409
Val Loss: 0.0162, Val AUC: 0.9360
New best AUC: 0.9360 at epoch 4

Epoch 5/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0136, Train AUC: 0.9620
Val Loss: 0.0157, Val AUC: 0.9418
New best AUC: 0.9418 at epoch 5

Epoch 6/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0116, Train AUC: 0.9771
Val Loss: 0.0152, Val AUC: 0.9469
New best AUC: 0.9469 at epoch 6

Epoch 7/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0098, Train AUC: 0.9861
Val Loss: 0.0153, Val AUC: 0.9458

Epoch 8/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0082, Train AUC: 0.9914
Val Loss: 0.0154, Val AUC: 0.9473
New best AUC: 0.9473 at epoch 8

Epoch 9/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0071, Train AUC: 0.9944
Val Loss: 0.0155, Val AUC: 0.9474
New best AUC: 0.9474 at epoch 9

Epoch 10/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0066, Train AUC: 0.9952
Val Loss: 0.0155, Val AUC: 0.9475
New best AUC: 0.9475 at epoch 10

Best AUC for fold 3: 0.9475 at epoch 10

Training set: 22852 samples
Validation set: 5712 samples
Found 22852 matching spectrograms for train dataset out of 22852 samples
Found 5712 matching spectrograms for valid dataset out of 5712 samples
number of epochs: 10

Epoch 1/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0363, Train AUC: 0.5974
Val Loss: 0.0253, Val AUC: 0.8119
New best AUC: 0.8119 at epoch 1

Epoch 2/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0232, Train AUC: 0.8221
Val Loss: 0.0201, Val AUC: 0.8999
New best AUC: 0.8999 at epoch 2

Epoch 3/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0192, Train AUC: 0.8953
Val Loss: 0.0181, Val AUC: 0.9288
New best AUC: 0.9288 at epoch 3

Epoch 4/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0166, Train AUC: 0.9363
Val Loss: 0.0165, Val AUC: 0.9348
New best AUC: 0.9348 at epoch 4

Epoch 5/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0143, Train AUC: 0.9609
Val Loss: 0.0156, Val AUC: 0.9438
New best AUC: 0.9438 at epoch 5

Epoch 6/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0124, Train AUC: 0.9747
Val Loss: 0.0153, Val AUC: 0.9478
New best AUC: 0.9478 at epoch 6

Epoch 7/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0105, Train AUC: 0.9850
Val Loss: 0.0151, Val AUC: 0.9482
New best AUC: 0.9482 at epoch 7

Epoch 8/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0089, Train AUC: 0.9904
Val Loss: 0.0154, Val AUC: 0.9449

Epoch 9/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0078, Train AUC: 0.9932
Val Loss: 0.0154, Val AUC: 0.9456

Epoch 10/10


Training:   0%|          | 0/714 [00:00<?, ?it/s]

Validation:   0%|          | 0/179 [00:00<?, ?it/s]

Train Loss: 0.0072, Train AUC: 0.9949
Val Loss: 0.0155, Val AUC: 0.9453

Best AUC for fold 4: 0.9482 at epoch 7

Cross-Validation Results:
Fold 0: 0.9458
Fold 1: 0.9501
Fold 2: 0.9475
Fold 3: 0.9475
Fold 4: 0.9482
Mean AUC: 0.9478

Training complete!


## Định nghĩa hàm loss mới FocalLossBCE

In [None]:
class FocalLossBCE(torch.nn.Module):
    def __init__(
            self,
            alpha: float = 0.25,
            gamma: float = 2,
            reduction: str = "mean",
            bce_weight: float = 0.6,
            focal_weight: float = 1.4,
    ):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.bce = torch.nn.BCEWithLogitsLoss(reduction=reduction)
        self.bce_weight = bce_weight
        self.focal_weight = focal_weight

    def forward(self, logits, targets):
        focall_loss = torchvision.ops.focal_loss.sigmoid_focal_loss(
            inputs=logits,
            targets=targets,
            alpha=self.alpha,
            gamma=self.gamma,
            reduction=self.reduction,
        )
        bce_loss = self.bce(logits, targets)
        return self.bce_weight * bce_loss + self.focal_weight * focall_loss

def get_criterion_FocalLoss(cfg):
    return FocalLossBCE()

In [None]:
if __name__ == "__main__":

    train_df = pd.read_csv(cfg.train_csv)
    taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
    train_df['filepath'] = '/kaggle/input/birdclef-2025/train_audio' + '/' + train_df.filename
    train_df['sample_name'] = train_df['filename'].map(lambda x: x.split('/')[0] + '-' + x.split('/')[-1].split('.')[0])

    cfg.criterion = 'FocalLossBCE'
    criterion = get_criterion_FocalLoss(cfg)
    run_training(train_df, cfg, criterion)
    
    print("\nTraining complete!")