In [1]:
import os
import cv2
import math
import time
import librosa
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import soundfile as sf
import torch
import warnings
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv('/kaggle/input/birdclef-2025/train.csv')
train_df

Unnamed: 0,primary_label,secondary_labels,type,filename,collection,rating,url,latitude,longitude,scientific_name,common_name,author,license
0,1139490,[''],[''],1139490/CSA36385.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0
1,1139490,[''],[''],1139490/CSA36389.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0
2,1192948,[''],[''],1192948/CSA36358.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0
3,1192948,[''],[''],1192948/CSA36366.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.2800,-73.8582,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0
4,1192948,[''],[''],1192948/CSA36373.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28559,ywcpar,[''],[''],ywcpar/iNat77392.ogg,iNat,0.0,https://static.inaturalist.org/sounds/77392.m4a,7.6921,-80.3379,Amazona ochrocephala,Yellow-crowned Parrot,Yennifer Alfaro,cc-by-nc 4.0
28560,ywcpar,[''],[''],ywcpar/iNat78624.ogg,iNat,0.0,https://static.inaturalist.org/sounds/78624.wav,8.9918,-79.4877,Amazona ochrocephala,Yellow-crowned Parrot,Evan Centanni,cc-by-nc-sa 4.0
28561,ywcpar,[''],[''],ywcpar/iNat789234.ogg,iNat,0.0,https://static.inaturalist.org/sounds/789234.wav,9.2316,-70.2041,Amazona ochrocephala,Yellow-crowned Parrot,Henrry,cc-by 4.0
28562,ywcpar,[''],[''],ywcpar/iNat819873.ogg,iNat,0.0,https://static.inaturalist.org/sounds/819873.mp3,10.5838,-66.8545,Amazona ochrocephala,Yellow-crowned Parrot,Alejandro Luy,cc-by-nc 4.0


In [3]:
taxonomy_df = pd.read_csv('/kaggle/input/birdclef-2025/taxonomy.csv')
species_class_map = dict(zip(taxonomy_df['primary_label'], taxonomy_df['class_name']))
species_class_map

{'1139490': 'Insecta',
 '1192948': 'Insecta',
 '1194042': 'Insecta',
 '126247': 'Amphibia',
 '1346504': 'Insecta',
 '134933': 'Amphibia',
 '135045': 'Amphibia',
 '1462711': 'Insecta',
 '1462737': 'Insecta',
 '1564122': 'Insecta',
 '21038': 'Amphibia',
 '21116': 'Amphibia',
 '21211': 'Amphibia',
 '22333': 'Amphibia',
 '22973': 'Amphibia',
 '22976': 'Amphibia',
 '24272': 'Amphibia',
 '24292': 'Amphibia',
 '24322': 'Amphibia',
 '41663': 'Mammalia',
 '41778': 'Mammalia',
 '41970': 'Mammalia',
 '42007': 'Mammalia',
 '42087': 'Mammalia',
 '42113': 'Mammalia',
 '46010': 'Mammalia',
 '47067': 'Mammalia',
 '476537': 'Amphibia',
 '476538': 'Amphibia',
 '48124': 'Insecta',
 '50186': 'Insecta',
 '517119': 'Amphibia',
 '523060': 'Insecta',
 '528041': 'Insecta',
 '52884': 'Insecta',
 '548639': 'Insecta',
 '555086': 'Amphibia',
 '555142': 'Amphibia',
 '566513': 'Mammalia',
 '64862': 'Amphibia',
 '65336': 'Amphibia',
 '65344': 'Amphibia',
 '65349': 'Amphibia',
 '65373': 'Amphibia',
 '65419': 'Amphibia

In [4]:
label_list = sorted(train_df['primary_label'].unique())
label_id_list = list(range(len(label_list)))
label2id = dict(zip(label_list, label_id_list))
id2label = dict(zip(label_id_list, label_list))

In [5]:
w_df = train_df[['primary_label', 'rating', 'filename']].copy()
w_df['target'] = w_df.primary_label.map(label2id)
w_df['filepath'] = '/kaggle/input/birdclef-2025/train_audio/' + w_df['filename']
w_df['sample_name'] = w_df['filename'].map(lambda x: x.split('/')[0] + '-' + x.split('/')[-1].split('.')[0])
w_df['class'] = w_df['primary_label'].map(lambda x: species_class_map.get(x, 'Unknown'))
w_df

Unnamed: 0,primary_label,rating,filename,target,filepath,sample_name,class
0,1139490,0.0,1139490/CSA36385.ogg,0,/kaggle/input/birdclef-2025/train_audio/113949...,1139490-CSA36385,Insecta
1,1139490,0.0,1139490/CSA36389.ogg,0,/kaggle/input/birdclef-2025/train_audio/113949...,1139490-CSA36389,Insecta
2,1192948,0.0,1192948/CSA36358.ogg,1,/kaggle/input/birdclef-2025/train_audio/119294...,1192948-CSA36358,Insecta
3,1192948,0.0,1192948/CSA36366.ogg,1,/kaggle/input/birdclef-2025/train_audio/119294...,1192948-CSA36366,Insecta
4,1192948,0.0,1192948/CSA36373.ogg,1,/kaggle/input/birdclef-2025/train_audio/119294...,1192948-CSA36373,Insecta
...,...,...,...,...,...,...,...
28559,ywcpar,0.0,ywcpar/iNat77392.ogg,205,/kaggle/input/birdclef-2025/train_audio/ywcpar...,ywcpar-iNat77392,Aves
28560,ywcpar,0.0,ywcpar/iNat78624.ogg,205,/kaggle/input/birdclef-2025/train_audio/ywcpar...,ywcpar-iNat78624,Aves
28561,ywcpar,0.0,ywcpar/iNat789234.ogg,205,/kaggle/input/birdclef-2025/train_audio/ywcpar...,ywcpar-iNat789234,Aves
28562,ywcpar,0.0,ywcpar/iNat819873.ogg,205,/kaggle/input/birdclef-2025/train_audio/ywcpar...,ywcpar-iNat819873,Aves


In [None]:
N_FFT = 1024
HOP_LENGTH = 512
N_MELS = 128
FMIN = 50
FMAX = 14000
FS = 32000
TARGET_DURATION = 5.0
TARGET_SHAPE = (256, 256) 
def audio2melspec(audio_data):
    if np.isnan(audio_data).any():
        mean_signal = np.nanmean(audio_data)
        audio_data = np.nan_to_num(audio_data, nan=mean_signal)

    mel_spec = librosa.feature.melspectrogram(
        y=audio_data,
        sr=FS,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        n_mels=N_MELS,
        fmin=FMIN,
        fmax=FMAX,
        power=2.0
    )

    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_norm = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min() + 1e-8)
    
    return mel_spec_norm

In [None]:
melspec_data = {}
errors = []
for i, row in tqdm(w_df.iterrows(), total=len(w_df)):
    audio_data, _ = librosa.load(row['filepath'], sr=FS)
    target_samples = int(TARGET_DURATION * FS)

    if len(audio_data) < target_samples:
        n_copy = math.ceil(target_samples / len(audio_data))
        if n_copy > 1:
            audio_data = np.concatenate([audio_data] * n_copy)

    start_idx = max(0, int(len(audio_data) / 2 - target_samples / 2))
    end_idx = min(len(audio_data), start_idx + target_samples)
    center_audio = audio_data[start_idx:end_idx]

    if len(center_audio) < target_samples:
        center_audio = np.pad(center_audio, 
                            (0, target_samples - len(center_audio)), 
                            mode='constant')

    mel_spec = audio2melspec(center_audio)

    if mel_spec.shape != TARGET_SHAPE:
        mel_spec = cv2.resize(mel_spec, TARGET_SHAPE, interpolation=cv2.INTER_LINEAR)
    melspec_data[row['sample_name']] = mel_spec.astype(np.float32)
        

  0%|          | 0/28564 [00:00<?, ?it/s]

In [8]:
np.save("birdClef_train_melspec_data.npy", melspec_data, allow_pickle=True)