In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import entropy
# For audio loading and inspection
import librosa #numpy
import librosa.display
from IPython.display import Audio
import random
import soundfile as sf
import torch 
import torchaudio #pytorch
from pathlib import Path

In [2]:
BASE_DIR = r'/kaggle/input/birdclef-2025/'

In [3]:
train_df = pd.read_csv(os.path.join(BASE_DIR, 'train.csv'))
train_df.head()

Unnamed: 0,primary_label,secondary_labels,type,filename,collection,rating,url,latitude,longitude,scientific_name,common_name,author,license
0,1139490,[''],[''],1139490/CSA36385.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0
1,1139490,[''],[''],1139490/CSA36389.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0
2,1192948,[''],[''],1192948/CSA36358.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0
3,1192948,[''],[''],1192948/CSA36366.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.28,-73.8582,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0
4,1192948,[''],[''],1192948/CSA36373.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0


In [4]:
taxonomy_df = pd.read_csv(os.path.join(BASE_DIR, 'taxonomy.csv'))
taxonomy_df.head()

Unnamed: 0,primary_label,inat_taxon_id,scientific_name,common_name,class_name
0,1139490,1139490,Ragoniella pulchella,Ragoniella pulchella,Insecta
1,1192948,1192948,Oxyprora surinamensis,Oxyprora surinamensis,Insecta
2,1194042,1194042,Copiphora colombiae,Copiphora colombiae,Insecta
3,126247,126247,Leptodactylus insularum,Spotted Foam-nest Frog,Amphibia
4,1346504,1346504,Neoconocephalus brachypterus,Neoconocephalus brachypterus,Insecta


# 2. Feature Engineering

In [5]:
SAMPLE_RATE = 32000
DURATION = 5
NUM_SAMPLES = SAMPLE_RATE * DURATION
N_MELS = 128

In [6]:
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=N_MELS
)

In [7]:
amplitude_to_db = torchaudio.transforms.AmplitudeToDB()

AUDIO_DIR = os.path.join(BASE_DIR, 'train_audio')
GITHUB_DIR = '/kaggle/working/BirdCLEF_2025_KaggleCompetition'
OUTPUT_DIR = os.path.join(GITHUB_DIR, 'preprocessed_mels')

In [8]:
!git clone https://Sai-Sam-N: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@github.com/Sai-Sam-N/BirdCLEF_2025_KaggleCompetition.git $GITHUB_DIR

Cloning into '/kaggle/working/BirdCLEF_2025_KaggleCompetition'...
remote: Enumerating objects: 23751, done.[K
remote: Total 23751 (delta 0), reused 0 (delta 0), pack-reused 23751 (from 3)[K
Receiving objects: 100% (23751/23751), 2.25 GiB | 36.36 MiB/s, done.
Resolving deltas: 100% (5018/5018), done.


In [9]:
!cd $GITHUB_DIR && \
 git checkout -b logMels_generation && \
 git config user.name "Sai-Sam-N" && \
 git config user.email "saisamyuktha@gmail.com"

Switched to a new branch 'logMels_generation'


In [10]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [11]:
train_df.shape

(28564, 13)

In [12]:
train_df.iloc[:,0].unique().shape #number of directories to be present

(206,)

In [13]:
train_df.iloc[:,0].shape # number of files to be present

(28564,)

In [14]:
train_df_mels_tracker = train_df.copy()
train_df_mels_tracker['done'] = 'N'
train_df_mels_tracker.head()

Unnamed: 0,primary_label,secondary_labels,type,filename,collection,rating,url,latitude,longitude,scientific_name,common_name,author,license,done
0,1139490,[''],[''],1139490/CSA36385.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0,N
1,1139490,[''],[''],1139490/CSA36389.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3206,-73.7128,Ragoniella pulchella,Ragoniella pulchella,Fabio A. Sarria-S,cc-by-nc-sa 4.0,N
2,1192948,[''],[''],1192948/CSA36358.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0,N
3,1192948,[''],[''],1192948/CSA36366.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.28,-73.8582,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0,N
4,1192948,[''],[''],1192948/CSA36373.ogg,CSA,0.0,http://colecciones.humboldt.org.co/rec/sonidos...,7.3791,-73.7313,Oxyprora surinamensis,Oxyprora surinamensis,Fabio A. Sarria-S,cc-by-nc-sa 4.0,N


In [15]:
train_df_mels_tracker.to_csv(os.path.join(GITHUB_DIR, 'mels_tracker.csv'), index=False)

In [16]:
def preprocess_audio_file(filename):
    path = os.path.join(AUDIO_DIR, filename)
    waveform, sr = torchaudio.load(path)

    if sr != SAMPLE_RATE:
        resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)
        waveform = resampler(waveform)

    waveform = waveform.mean(dim=0)
    num_samples = waveform.shape[0]

    if num_samples < NUM_SAMPLES:
        padding = NUM_SAMPLES - num_samples
        waveform = torch.nn.functional.pad(waveform, (0, padding))
    else:
        waveform = waveform[:NUM_SAMPLES]

    mel_spec = mel_transform(waveform)
    log_mel = amplitude_to_db(mel_spec)
    log_mel = (log_mel - log_mel.mean()) / log_mel.std()

    return log_mel.unsqueeze(0)

def git_stage_and_commit(new_files, commit_message):
    if not new_files:
        return
    for f in new_files:
        rel_path = f.relative_to(GITHUB_DIR)
        !cd {GITHUB_DIR} && git add "{rel_path}"
    !cd {GITHUB_DIR} && git commit -m "{commit_message}" && git push origin logMels_generation

def preprocess_and_save_all(df, batch_size=10):
    saved_files = []
    for idx, row in df.iterrows():
        audio_id = row['filename']
        out_path = Path(OUTPUT_DIR) / audio_id.replace('.ogg', '.pt')
        if out_path.exists():
            continue
        try:
            log_mel = preprocess_audio_file(audio_id)
            out_path.parent.mkdir(parents=True, exist_ok=True)
            torch.save(log_mel, out_path)
            saved_files.append(out_path)
            print(f"{audio_id} processed")
            train_df_mels_tracker.loc[idx,'done'] = 'Y'

            if len(saved_files) >= batch_size:
                git_stage_and_commit(saved_files, f"Add {len(saved_files)} log-mel spectrograms")
                saved_files.clear()
                train_df_mels_tracker.to_csv(os.path.join(GITHUB_DIR, 'mels_tracker.csv'), index=False)

        except Exception as e:
            print(f"Error processing {audio_id}: {e}")

    # Final commit
    git_stage_and_commit(saved_files, "Final batch of log-mel spectrograms")

In [17]:
preprocess_and_save_all(train_df)

1139490/CSA36385.ogg processed
1139490/CSA36389.ogg processed
1192948/CSA36358.ogg processed
1192948/CSA36366.ogg processed
1192948/CSA36373.ogg processed
1192948/CSA36388.ogg processed
1194042/CSA18783.ogg processed
1194042/CSA18794.ogg processed
1194042/CSA18802.ogg processed
126247/XC941297.ogg processed
[logMels_generation 08410109] Add 10 log-mel spectrograms
 10 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 preprocessed_mels/1139490/CSA36385.pt
 create mode 100644 preprocessed_mels/1139490/CSA36389.pt
 create mode 100644 preprocessed_mels/1192948/CSA36358.pt
 create mode 100644 preprocessed_mels/1192948/CSA36366.pt
 create mode 100644 preprocessed_mels/1192948/CSA36373.pt
 create mode 100644 preprocessed_mels/1192948/CSA36388.pt
 create mode 100644 preprocessed_mels/1194042/CSA18783.pt
 create mode 100644 preprocessed_mels/1194042/CSA18794.pt
 create mode 100644 preprocessed_mels/1194042/CSA18802.pt
 create mode 100644 preprocessed_mels/126247/XC94

In [18]:
print('Done')

Done
