In [1]:
import pandas as pd
import numpy as np
import os
import warnings
import random 
import gc
import logging
import time
from pathlib import Path

from sklearn.metrics import roc_auc_score



In [None]:
class CFG:

    seed=42
    train_csv='/kaggle/input/birdclef-2025/train.csv'
    taxonomy_csv='/kaggle/input/birdclef-2025/taxonomy.csv'
    samp_sumbission_csv='/kaggle/input/birdclef-2025/sample_submission.csv'
    test_soundscapes_csv = '/kaggle/input/birdclef-2025/test_soundscapes'
    trainaudio_csv = '/kaggle/input/birdclef-2025/train_audio'
    optimizer = 'AdamW'
    lr = 5e-4 
    weight_decay = 1e-5
    model_name='efficientnet_b0'
    pretrained = True
    in_channels = 1
    N_FFT = 1024
    HOP_LENGTH = 512
    N_MELS = 128
    FMIN = 50
    FMAX = 14000
    FS = 32000
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    epochs=20
    batch_size=32
    criterion = 'BCEWithLogitsLoss'
    scheduler = 'CosineAnnealingLR'
    min_lr = 1e-6
    T_max = epochs
    TARGET_DURATION = 5.0
    TARGET_SHAPE = (256, 256) 
cfg=CFG()

In [None]:
def audio2melspec(audio_data):
    if np.isnan(audio_data).any():
        mean_sig=np.nanmean(audio_data)
        audio_data=np.nantonum(audio_data,nan=mean_sig)
    mel_spec = librosa.feature.melspectrogram(
    y=audio_data,
    sr=32000,
    n_fft=1024,
    hop_length=512,
    n_mels=128,
    fmin=50,
    fmax=14000,
    power=2.0,
)
    audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
    target_samples = int(cfg.TARGET_DURATION * cfg.FS)
    if len(audio_data) < target_samples:
            n_copy = math.ceil(target_samples / len(audio_data))
            if n_copy > 1:
                audio_data = np.concatenate([audio_data] * n_copy)
    len_start=max(0, int(len(audio_data) / 2 - target_samples / 2))
    len_end = min(len(audio_data), start_idx + target_samples)
    center_audio = audio_data[len_start:len_end]
        if len(center_audio) < target_samples:
            center_audio = np.pad(center_audio, (0, target_samples - len(center_audio)), mode='constant')
    mel_spec = audio2melspec(center_audio, cfg)
    if mel_spec.shape != cfg.TARGET_SHAPE:
        mel_spec = cv2.resize(mel_spec, cfg.TARGET_SHAPE, interpolation=cv2.INTER_LINEAR)
    return mel_spec.astype(np.float32)
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None
def spectogram(cfg,code)
    start_time = time.time()

    all_bird_data = {}
    errors = []
     try:
            samplename = row['samplename']
            filepath = row['filepath']
            mel_spec = process_audio_file(filepath, cfg)
            if mel_spec is not None:
                all_bird_data[samplename] = mel_spec
            except Exception as e:
                print(f"Error processing {row.filepath}: {e}")
                errors.append((row.filepath, str(e)))
            end_time = time.time()
    return all_bird_data




In [None]:
class Birdcleffromnpy()
    def __init__(self, df, cfg, spectrograms=None, mode="train"):
        self.df = df
        self.cfg = cfg
        self.mode = mode
        self.spectograms=spectograms
        taxonomy_df = pd.read_csv(self.cfg.taxonomy_csv)
        self.species_ids = taxonomy_df['primary_label'].tolist()
        self.num_classes = len(self.species_ids)
        self.label_to_idx = {label: idx for idx, label in enumerate(self.species_ids)}
        if 'filepath' not in self.df.columns:
            self.df['filepath'] = self.df['filename'].apply(
                lambda f: os.path.join(self.cfg.train_datadir, f)
            )
        if 'samplename' not in self.df.columns:
            self.df['samplename'] = self.df['filename'].apply(
                lambda x: x.split('/')[0] + '-' + Path(x).stem
            )
        if self.spectrograms:
        found_samples = self.df['samplename'].isin(self.spectrograms).sum()
            print(f"Found {found_samples} matching spectrograms for {mode} dataset out of {len(self.df)} samples")
        if cfg.debug:
            self.df = self.df.sample(min(1000, len(self.df)), random_state=cfg.seed).reset_index(drop=True)
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        samplename = row['samplename']
        spec = None
        if self.spectrograms and samplename in self.spectrograms:
            spec = self.spectrograms[samplename]
        elif not self.cfg.LOAD_DATA:
            spec = process_audio_file(row['filepath'], self.cfg)
        if spec is None:
            spec=np.zeroes(self.cfg.TARGET_SHAPE,dtype=np.float32)
            if self.mode=="train":
                print(f"Warning :Spectogram for{samplename} not found coludnt be genrated")
                spec = torch.tensor(spec,dtype=torch.float32).unsqueeze(0)
            if self.mode == "train" and random.random() < self.cfg.aug_prob:
                spec = self.apply_spec_augmentations(spec)
            target = self.encode_label(row['primary_label'])
            if 'secondary_labels' in row and row['secondary_labels'] not in [[''], None, np.nan]:       
            secondary_labels = row.get('secondary_labels', [])
            if isinstance(secondary_labels, str):
                try:
                    secondary_labels = eval(secondary_labels)
                except:
                    secondary_labels = []
            if isinstance(secondary_labels, list) and secondary_labels != [''] and secondary_labels is not None:
                for label in secondary_labels:
                    idx = self.label_to_idx.get(label)
                    if idx is not None:
                        target[idx] = 1.0
            return {
                'melspec': spec, 
                'target': torch.tensor(target, dtype=torch.float32),
                'filename': row['filename']
            }
    def apply_spec_augmentations(self, spec):
        #horizontal
        if random.random() < 0.5:
            num_masks = random.randint(1, 3)
            for _ in range(num_masks):
                width = random.randint(5, 20)
                start = random.randint(0, spec.shape[2] - width)
                spec[0, :, start:start+width] = 0
                #vertical
        if random.random() < 0.5:
            num_masks = random.randint(1, 3)
            for _ in range(num_masks):
                height = random.randint(5, 20)
                start = random.randint(0, spec.shape[1] - height)
                spec[0, start:start+height, :] = 0





        





In [None]:
class Model(nn.Module):
    def __init__(self,cfg):
        super().__init()
        taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
        self.cfg=cfg
        cfg.num_classes = len(taxonomy_df)
        self.model = timm.create_model(cfg.model_name, pretrained=cfg.pretrained, in_channels=cfg.in_channels,drop_rate=0.2,drop_path_rate=0.2)
        if 'efficientnet' in cfg.model_name:
            backbone_out=self.backbone.classifier.in_features
            self.backbone.classifier.in_features
            self.backbone.classifier=nn.Identitiy()
            elif 'resnet in' in cfg.model_name:
            backbone_out=self.backbone.fc.in_features
        else:
            backbone_out=self.backbone.get_classifier().in_features
            self.backbone.resnet_classifier(0,'')
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.feat_dim = backbone_out
        self.classifier = nn.Linear(backbone_out, cfg.num_classes)
        self.mixup_enabled = hasattr(cfg, 'mixup_alpha') and cfg.mixup_alpha > 0
        if self.mixup_enabled:
            self.mixup_alpha = cfg.mixup_alpha
    def forward(self, x, targets=None):
        if self.training and self.mixup_enabled and targets is not None:
            mixed_x, targets_a, targets_b, lam = self.mixup_data(x, targets)
            x = mixed_x
        else:
            targets_a, targets_b, lam = None, None, None
              
        features = self.backbone(x)
        
        if isinstance(features, dict):
            features = features['features']
            
        if len(features.shape) == 4:
            features = self.pooling(features)
            features = features.view(features.size(0), -1)
        
        logits = self.classifier(features)
        
        if self.training and self.mixup_enabled and targets is not None:
            loss = self.mixup_criterion(F.binary_cross_entropy_with_logits, 
                                        logits, targets_a, targets_b, lam)
                return logits, loss
                
            return logits
        
    def mixup_data(self, x, targets):
        """Applies mixup to the data batch"""
        batch_size = x.size(0)

        lam = np.random.beta(self.mixup_alpha, self.mixup_alpha)


