In [None]:
from pathlib import Path
import re
import numpy as np
import librosa
import tensorflow as tf
import kagglehub

SR = 32000
NSAMP = 5 * SR
AUDIO_DIR = Path("../data/Tinamidae_5s")
OUT_DIR   = Path("../data/Tinamidae_5s_npz")

def load_model():
    model_dir = kagglehub.model_download(
        "google/bird-vocalization-classifier/TensorFlow2/bird-vocalization-classifier/8"
    )
    mdl = tf.saved_model.load(model_dir)
    infer = mdl.signatures.get("serving_default") or list(mdl.signatures.values())[0]
    return infer

def load_audio_5s(path: Path):
    y, _ = librosa.load(str(path), sr=SR, mono=True)
    if len(y) < NSAMP:
        y = np.pad(y, (0, NSAMP - len(y)))
    else:
        y = y[:NSAMP]
    return y.astype(np.float32)

def parse_labels_from_dir(dir_name: str):
    # Ejemplos de carpeta: "Tinamus_major", "Nothura maculosa", "Crypturellus-tataupa"
    toks = re.split(r"[ _\-]+", dir_name.strip())
    genus   = toks[0] if len(toks) >= 1 else "Unknown"
    species = toks[1] if len(toks) >= 2 else "sp"
    return genus, species

def process_one_file(infer, wav_path: Path, out_path: Path, genus: str, species: str):
    y = load_audio_5s(wav_path)
    out = infer(inputs=tf.constant(y[None, :], dtype=tf.float32))
    emb = out["embedding"][0].numpy()  # (1280,)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    # Guardamos strings como arrays unicode para evitar pickle
    np.savez(out_path, embedding=emb, genus=np.array(genus), species=np.array(species))

def main():
    infer = load_model()
    for species_dir in sorted(p for p in AUDIO_DIR.iterdir() if p.is_dir()):
        genus, species = parse_labels_from_dir(species_dir.name)
        for wav_path in sorted(species_dir.rglob("*.wav")):
            rel = wav_path.relative_to(AUDIO_DIR)
            out_path = OUT_DIR / rel.with_suffix(".npz")
            process_one_file(infer, wav_path, out_path, genus, species)
            print(f"OK  {rel}  →  {out_path.relative_to(OUT_DIR)}")

if __name__ == "__main__":
    main()


In [None]:
import os, numpy as np

ROOT = "../data/Tinamidae_5s_npz"  # carpeta con 46 spp
NPZ_KEY = "embedding"

# Reúne rutas .npz
paths = []
for sp in os.listdir(ROOT):
    d = os.path.join(ROOT, sp)
    if os.path.isdir(d):
        for fn in os.listdir(d):
            if fn.lower().endswith(".npz"):
                paths.append(os.path.join(d, fn))
if not paths:
    raise SystemExit(f"Sin .npz en {ROOT}")

# Inicializa con el primero
with np.load(paths[0], allow_pickle=False) as f0:
    e0 = np.asarray(f0[NPZ_KEY], dtype=np.float64)
    if e0.ndim != 1:
        raise ValueError(f"Embedding no 1D en {paths[0]}: {e0.shape}")
    D = e0.shape[0]
    dim_min, dim_max = e0.copy(), e0.copy()

ok, bad = 1, 0

# Agrega el resto
for p in paths[1:]:
    try:
        with np.load(p, allow_pickle=False) as f:
            e = np.asarray(f[NPZ_KEY], dtype=np.float64)
            if e.ndim != 1 or e.shape[0] != D:
                raise ValueError(f"Shape inesperado: {e.shape}")
            dim_min = np.minimum(dim_min, e)
            dim_max = np.maximum(dim_max, e)
            ok += 1
    except Exception as ex:
        bad += 1

# Resúmenes
gmin = float(dim_min.min())
gmax = float(dim_max.max())

def count_in(a, b):
    m = (dim_min >= a) & (dim_max <= b)
    return int(m.sum())

print(f"Archivos OK: {ok} | omitidos: {bad} | dimensiones: {D}")
print(f"Rango global observado: [{gmin:.6f}, {gmax:.6f}]")
print(f"Dims dentro de [-1, 1]:  {count_in(-1, 1)} / {D}")
print(f"Dims dentro de [-3, 3]:  {count_in(-3, 3)} / {D}")
print(f"Dims dentro de [-10,10]: {count_in(-10,10)} / {D}")
