In [50]:
import librosa
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import json

## PreProcessing

Converting audio files to frequency-binning features and saving them.

In [51]:
# Paths
DATA_ROOT = Path("/kaggle/input/birdclef-2025")
AUDIO_ROOT = DATA_ROOT / "train_audio"

# Spectrogram binning params
SAMPLE_RATE = 32000
N_FFT = 1024
HOP_LENGTH = 512
MAX_FREQ = 16000
BIN_SIZE = 5
NUM_BINS = MAX_FREQ // BIN_SIZE

def audio_to_binary_vector(path):
    y, sr = librosa.load(path, sr=SAMPLE_RATE)
    S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))
    freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)

    idx = freqs <= MAX_FREQ
    S = S[idx, :]
    freqs = freqs[idx]

    energy = S.max(axis=1)
    threshold = np.percentile(energy, 85)

    binary_vec = np.zeros(NUM_BINS, dtype=int)
    for i, f in enumerate(freqs):
        bin_idx = int(f // BIN_SIZE)
        if energy[i] > threshold:
            binary_vec[bin_idx] = 1
    return binary_vec

# Top species from species_stats earlier
top_species = ["grekis", "compau", "trokin", "roahaw", "banana", "whtdov", "socfly1", "yeofly1", "bobfly1", "wbwwre1"]

output = []

for sp in top_species:
    folder = AUDIO_ROOT / sp
    files = sorted(folder.glob("*.ogg"))[:50]

    for f in files:
        try:
            vec = audio_to_binary_vector(f)
            output.append({
                "species": sp,
                "filename": f.name,
                "vector": vec.tolist()
            })
        except Exception as e:
            print(f"{f.name} — {e}")

# Save to file
df = pd.DataFrame(output)
df.to_json("/kaggle/working/bird_vectors_top10.json", orient="records", lines=True)
print("Saved", len(df), "samples.")


Saved 500 samples.
