In [None]:
# Setup único (rutas + parámetros + funciones)

from pathlib import Path
import numpy as np
import pandas as pd
import librosa
import soundfile as sf

# ---- Rutas de entrada ----
ROOT_NODES = Path("../results/asr_KNN/SYNTH")        # node_*
ROOT_TIPS  = Path("../results/asr_KNN/SYNTH_TIPS")   # tip_*

# ---- Salida ----
OUT_AUDIO_NODES = ROOT_NODES / "AUDIO_SONIF"
OUT_AUDIO_TIPS  = ROOT_TIPS  / "AUDIO_SONIF"
OUT_AUDIO_NODES.mkdir(parents=True, exist_ok=True)
OUT_AUDIO_TIPS.mkdir(parents=True, exist_ok=True)

# ---- Audio / STFT
FS       = 44100
TLIM     = (0.0, 5.0)      # s
FLIM_KHZ = (0.0, 10.0)     # kHz

N_FFT = 512
OVLP  = 0.75
HOP   = int(round(N_FFT * (1 - OVLP)))  # 128

# ---- Griffin–Lim 
GL_ITERS  = 100
GL_CENTER = False
GL_MOM    = 0.0

# ---- Qué leer 
STORE_SUBDIR = "store.at"
CSV_MEAN = "Mmean_wtd.csv"
CSV_PMAX = "PC1max_wtd.csv"
CSV_PMIN = "PC1min_wtd.csv"

# ---- Utilidades 
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def compute_n_frames(dur_s: float, fs: int, n_fft: int, hop: int):
    N_target = int(round(dur_s * fs))
    n_frames = int(np.floor((N_target - n_fft) / hop) + 1)
    return N_target, n_frames

def interp_time_rows(M, t_src, t_dst):
    """Interpola en tiempo cada fila (frecuencia) de M."""
    Y, _ = M.shape
    M_t = np.empty((Y, len(t_dst)), dtype=float)
    for i in range(Y):
        M_t[i, :] = np.interp(t_dst, t_src, M[i, :], left=M[i, 0], right=M[i, -1])
    return M_t

def normalize_with_mean_reference(M_t, mean_t, apply_sqrt=True):
    """
    Normaliza M_t usando min/max POR FRAME del mean_t.
    - mean_t define la escala en cada frame (columna).
    - Se recorta solo por abajo (>=0) para evitar magnitudes negativas.
    """
    Y, T = M_t.shape
    out = np.empty_like(M_t, dtype=float)

    for j in range(T):
        ref = mean_t[:, j]
        cmin, cmax = np.min(ref), np.max(ref)
        if cmax > cmin:
            out[:, j] = (M_t[:, j] - cmin) / (cmax - cmin)
        else:
            out[:, j] = 0.0

    out = np.maximum(out, 0.0)  # magnitud no puede ser negativa
    if apply_sqrt:
        out = np.sqrt(out)
    return out

def map_to_stft_bins(M_norm, f_mesh_hz, fs, n_fft, flim_hz):
    """Interpola a bins STFT (rfftfreq) hasta flim_hz y normaliza global."""
    freqs = np.fft.rfftfreq(n_fft, d=1.0/fs)  # 257 bins
    mag = np.zeros((len(freqs), M_norm.shape[1]), dtype=float)

    fmax = min(flim_hz, f_mesh_hz[-1])
    idx = np.where(freqs <= fmax)[0]
    kmax = int(np.max(idx)) if len(idx) else 1

    for j in range(M_norm.shape[1]):
        mag[:kmax+1, j] = np.interp(
            freqs[:kmax+1],
            f_mesh_hz,
            M_norm[:, j],
            left=M_norm[0, j],
            right=M_norm[-1, j]
        )

    mx = np.max(mag)
    if np.isfinite(mx) and mx > 0:
        mag /= mx

    return mag

def griffin_lim_reconstruct(mag, n_iter, hop, win_len, n_fft, center, momentum, length):
    y = librosa.griffinlim(
        S=mag,
        n_iter=n_iter,
        hop_length=hop,
        win_length=win_len,
        window="hann",
        center=center,
        init="random",
        momentum=momentum,
        length=length
    )
    y = y / (np.max(np.abs(y)) + 1e-12)
    return y

def read_surface_csv(csv_path: Path):
    """CSV con header tipo V1..V140."""
    return pd.read_csv(csv_path, header=0).values.astype(float)

def sonify_three_surfaces_anchored(Mmean, Mmax, Mmin, out_mean, out_max, out_min,
                                  t_frames, N_sig, dur, flim_hz, alpha=1.0):
    """
    Sonifica mean + max + min usando normalización anclada al mean.
    alpha controla qué tan lejos vamos hacia los extremos:
      - alpha=1.0 usa Mmax/Mmin tal cual
      - alpha=0.5 va “mitad de camino” desde mean hacia Mmax/Mmin
    """
    Y, X = Mmean.shape

    # Ejes de malla ( 0–5 s y 0–10 kHz)
    t_mesh = np.linspace(0.0, dur, X)
    f_mesh_hz = np.linspace(FLIM_KHZ[0], FLIM_KHZ[1], Y) * 1000.0

    # Interpolar a frames destino (igual para los 3)
    mean_t = interp_time_rows(Mmean, t_mesh, t_frames)
    max_t0 = interp_time_rows(Mmax,  t_mesh, t_frames)
    min_t0 = interp_time_rows(Mmin,  t_mesh, t_frames)

    # “max/min” como desplazamiento desde el mean (controlado por alpha)
    max_t = mean_t + alpha * (max_t0 - mean_t)
    min_t = mean_t + alpha * (min_t0 - mean_t)

    # Normalización anclada al mean (misma escala por frame)
    mean_norm = normalize_with_mean_reference(mean_t, mean_t, apply_sqrt=True)
    max_norm  = normalize_with_mean_reference(max_t,  mean_t, apply_sqrt=True)
    min_norm  = normalize_with_mean_reference(min_t,  mean_t, apply_sqrt=True)

    # Magnitud STFT
    mag_mean = map_to_stft_bins(mean_norm, f_mesh_hz, FS, N_FFT, flim_hz)
    mag_max  = map_to_stft_bins(max_norm,  f_mesh_hz, FS, N_FFT, flim_hz)
    mag_min  = map_to_stft_bins(min_norm,  f_mesh_hz, FS, N_FFT, flim_hz)

    # Griffin–Lim
    y_mean = griffin_lim_reconstruct(mag_mean, GL_ITERS, HOP, N_FFT, N_FFT, GL_CENTER, GL_MOM, N_sig)
    y_max  = griffin_lim_reconstruct(mag_max,  GL_ITERS, HOP, N_FFT, N_FFT, GL_CENTER, GL_MOM, N_sig)
    y_min  = griffin_lim_reconstruct(mag_min,  GL_ITERS, HOP, N_FFT, N_FFT, GL_CENTER, GL_MOM, N_sig)

    sf.write(str(out_mean), y_mean.astype(np.float32), FS, subtype="PCM_16")
    sf.write(str(out_max),  y_max.astype(np.float32),  FS, subtype="PCM_16")
    sf.write(str(out_min),  y_min.astype(np.float32),  FS, subtype="PCM_16")


In [None]:
# Batch: sonificar mean + max + min (nodos y tips)


# ---- Parámetros de esta celda 
ALPHA_PC1 = 1.0   # 1.0 = extremos tal cual
WAV_MEAN  = "sonif_mean.wav"
WAV_MAX   = "sonif_max.wav"
WAV_MIN   = "sonif_min.wav"

dur = TLIM[1] - TLIM[0]
flim_hz = FLIM_KHZ[1] * 1000.0

_, n_frames = compute_n_frames(dur, FS, N_FFT, HOP)
t_frames = np.linspace(0.0, dur, n_frames)
N_sig = (n_frames - 1) * HOP + N_FFT  # longitud exacta para center=False

print("n_frames =", n_frames, "| N_sig =", N_sig)

node_dirs = sorted([p for p in ROOT_NODES.glob("node_*") if p.is_dir()])
tip_dirs  = sorted([p for p in ROOT_TIPS.glob("tip_*")  if p.is_dir()])
print(f"Nodos: {len(node_dirs)} | Tips: {len(tip_dirs)} | Total: {len(node_dirs)+len(tip_dirs)}")

def process_units(unit_dirs, out_root):
    for unit_path in unit_dirs:
        store_at = unit_path / STORE_SUBDIR
        out_dir  = out_root / unit_path.name
        ensure_dir(out_dir)

        Mmean = read_surface_csv(store_at / CSV_MEAN)
        Mmax  = read_surface_csv(store_at / CSV_PMAX)
        Mmin  = read_surface_csv(store_at / CSV_PMIN)

        sonify_three_surfaces_anchored(
            Mmean, Mmax, Mmin,
            out_mean = out_dir / WAV_MEAN,
            out_max  = out_dir / WAV_MAX,
            out_min  = out_dir / WAV_MIN,
            t_frames=t_frames,
            N_sig=N_sig,
            dur=dur,
            flim_hz=flim_hz,
            alpha=ALPHA_PC1
        )

        print(f"[OK] {unit_path.name} -> {out_dir}")

process_units(node_dirs, OUT_AUDIO_NODES)
process_units(tip_dirs,  OUT_AUDIO_TIPS)


In [None]:
# VIDEO


from pathlib import Path
import numpy as np
import soundfile as sf
import librosa
import matplotlib.pyplot as plt
import imageio.v2 as imageio
import subprocess
import shutil
import tempfile
import warnings

from matplotlib.colors import LinearSegmentedColormap, Normalize
from matplotlib.collections import LineCollection

warnings.filterwarnings("ignore", category=UserWarning)

# PARÁMETROS BASE

#UNITS = ["node_76"]
UNITS = ["node_47", "node_48", "node_49", "node_76", "node_77", "node_80", "node_74"]

ROOT_NODES = Path("../results/asr_KNN/SYNTH") / "AUDIO_SONIF"
ROOT_TIPS  = Path("../results/asr_KNN/SYNTH_TIPS") / "AUDIO_SONIF"

AUDIO_TAGS = [
#    ("mean", "sonif_mean.wav"),
    ("max",  "sonif_max.wav"),
    ("min",  "sonif_min.wav"),
]

# audio prep
FS = 44100
LEAD_SILENCE_S = 1.0
FADEIN_MS  = 40.0
FADEOUT_MS = 40.0 

# mel spectrogram
N_FFT  = 1024
HOP    = 256
N_MELS = 128
FMIN   = 0
FMAX   = 10000

DB_FLOOR = -40.0
FPS = 30

# visual
DPI = 300
FIG_W, FIG_H = 12, (1808/300)

# Cursor
CURSOR_COLOR = (0.0, 0.0, 0.0)  # negro

# labels
X_LABEL = "Time (s)"
Y_LABEL = "Frequency (kHz)"
CB_LABEL = "Power (dBFS)"
X_TICKS_S = [0, 1, 2, 3, 4, 5, 6]
Y_TICKS_HZ = [0, 500, 1000, 2000, 4000, 6000, 8000, 10000]

# CMAP
RAINBOW_W2K = [
    "#ffffff",
    "#ff0000",
    "#ff7a00",
    "#ffd400",
    "#28c300",
    "#00d9ff",
    "#0066ff",
    "#0000FF",
]
CMAP = LinearSegmentedColormap.from_list("W_RAINBOW_K", RAINBOW_W2K, N=256)

# Ajustes visuales
GRID_COLOR = (0.6, 0.6, 0.6)
GRID_ALPHA = 0.30
GRID_LW    = 0.8

FFT_WIN = 2048
FFT_LINE_LW = 2.0
FFT_PANEL_RATIO = 1.4
PANEL_WSPACE = 0.12

FFT_DB_STEP = 10
FFT_X_LABEL = "Power (dBFS)"

# márgenes tuneados
FIG_LEFT   = 0.05
FIG_RIGHT  = 0.999
FIG_TOP    = 0.93
FIG_BOTTOM = 0.24

# Envolvente max-hold
HOLD_LINE_LW = 1.2
HOLD_LINE_ALPHA = 0.55
HOLD_LINE_ZORDER = 9

# --- FULL FFT (GLOBAL) al final ---
FULLFFT_COLOR = "purple"
FULLFFT_LW = 1.2
FULLFFT_ALPHA = 0.70

# Solo al final: últimos N ms (y queda congelada)
FULLFFT_SHOW_LAST_MS = 20.0
FULLFFT_MIN_FRAMES   = 5
FULLFFT_SMOOTH_HZ    = 120.0
FULLFFT_MIN_BINS     = 21
FULLFFT_MAX_POINTS   = 2500

# Normalización opcional (pico -> 0 dB)
FULLFFT_PEAK_NORMALIZE = True

# Trim de silencio SOLO para la FFT global (baja piso por edges / silencio)
FULLFFT_TRIM_SILENCE = True
FULLFFT_TRIM_TOP_DB  = 40.0
FULLFFT_EDGE_FADE_MS = 12.0

# leyendas dentro del panel FFT
FFT_LEG_ANCHOR_X = 0.04
FFT_LEG_ANCHOR_Y = 0.97
FFT_LEG_FONTSIZE = 7
FFT_LEG_PAD_Y    = 0.028
FFT_LEG_LINE_X0  = 0.04
FFT_LEG_LINE_X1  = 0.16

FULLFFT_LABEL_NORM = "Global FFT (normalized)"
FULLFFT_LABEL_RAW  = "Global FFT"

# Helpers
def unit_dir(name: str) -> Path:
    if name.startswith("node_"):
        return ROOT_NODES / name
    if name.startswith("tip_"):
        return ROOT_TIPS / name
    if (ROOT_TIPS / f"tip_{name}").exists():
        return ROOT_TIPS / f"tip_{name}"
    raise ValueError(f"No reconozco unidad '{name}'. Usa 'node_###' o 'tip_...'")

def apply_fadein(y, sr, fade_ms):
    n = int(round((fade_ms / 1000.0) * sr))
    if n <= 1:
        return y
    ramp = np.linspace(0.0, 1.0, n, endpoint=True)
    y2 = y.copy()
    y2[:n] *= ramp
    return y2

def apply_fadeout(y, sr, fade_ms):
    n = int(round((fade_ms / 1000.0) * sr))
    if n <= 1:
        return y
    ramp = np.linspace(1.0, 0.0, n, endpoint=True)
    y2 = y.copy()
    y2[-n:] *= ramp
    return y2

def prepend_silence(y, sr, silence_s):
    n0 = int(round(silence_s * sr))
    if n0 <= 0:
        return y
    return np.concatenate([np.zeros(n0, dtype=y.dtype), y])

def normalize_peak_0dbfs(y):
    peak = np.max(np.abs(y)) if len(y) else 0.0
    if peak <= 0:
        return y
    return y / peak

def mel_dbfs(y, sr):
    S = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=N_FFT, hop_length=HOP,
        n_mels=N_MELS, fmin=FMIN, fmax=FMAX, power=2.0
    )
    S_db = librosa.power_to_db(S, ref=1.0)
    return np.clip(S_db, DB_FLOOR, 0.0)

def make_cbar_ticks(db_floor):
    rng = abs(float(db_floor))
    if rng <= 25:
        step = 5
    elif rng <= 65:
        step = 10
    else:
        step = 20
    ticks = list(np.arange(db_floor, 0.0001, step))
    if len(ticks) == 0 or abs(ticks[0] - db_floor) > 1e-6:
        ticks = [db_floor] + ticks
    if abs(ticks[-1] - 0.0) > 1e-6:
        ticks.append(0.0)
    ticks = [0.0 if abs(t) < 1e-9 else float(np.round(t, 3)) for t in ticks]
    return ticks

def mel_bin_positions_for_hz_ticks(y_ticks_hz):
    mel_freqs = librosa.mel_frequencies(n_mels=N_MELS, fmin=FMIN, fmax=FMAX)
    mel_bins = np.arange(N_MELS)
    y_ticks_hz = [f for f in y_ticks_hz if (f >= mel_freqs[0] - 1e-9) and (f <= mel_freqs[-1] + 1e-9)]
    y_pos = np.interp(y_ticks_hz, mel_freqs, mel_bins)
    y_lab = [f"{f/1000:.0f}" if f % 1000 == 0 else f"{f/1000:.1f}" for f in y_ticks_hz]
    return y_pos, y_lab

def find_ffmpeg():
    try:
        import imageio_ffmpeg
        p = imageio_ffmpeg.get_ffmpeg_exe()
        if p and Path(p).exists():
            return p
    except Exception:
        pass
    for p in ["/opt/homebrew/bin/ffmpeg", "/usr/local/bin/ffmpeg"]:
        if Path(p).exists():
            return p
    p = shutil.which("ffmpeg")
    if p:
        return p
    raise RuntimeError("No encuentro un ffmpeg usable (imageio-ffmpeg / homebrew / PATH).")

def mux_audio(video_silent_mp4, audio_wav, out_mp4):
    ffmpeg = find_ffmpeg()
    cmd = [
        ffmpeg, "-y",
        "-i", str(video_silent_mp4),
        "-i", str(audio_wav),
        "-c:v", "copy",
        "-c:a", "aac",
        "-b:a", "192k",
        "-shortest",
        str(out_mp4)
    ]
    subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# FFT profile (instantánea)
def fft_profile_dbfs_v2_mel_ypos_with_binned(y, sr, t, fft_win, fmin, fmax, n_mels):
    center = int(round(t * sr))
    half = fft_win // 2
    a = max(0, center - half)
    b = min(len(y), center + half)

    seg = y[a:b].astype(np.float32)
    if len(seg) < fft_win:
        seg = np.pad(seg, (0, fft_win - len(seg)), mode="constant")

    win = np.hanning(fft_win).astype(np.float32)
    segw = seg * win

    X = np.fft.rfft(segw, n=fft_win)
    mag = np.abs(X).astype(np.float32)
    freqs = np.fft.rfftfreq(fft_win, d=1.0/sr).astype(np.float32)

    mask = (freqs >= fmin) & (freqs <= fmax)
    freqs = freqs[mask]
    mag = mag[mask]

    if len(mag) == 0:
        cur_mel_db = np.full(n_mels, DB_FLOOR, dtype=np.float32)
        return np.array([0.0], dtype=np.float32), np.array([DB_FLOOR], dtype=np.float32), cur_mel_db

    coherent_gain = (np.sum(win) / 2.0)
    amp = mag / (coherent_gain + 1e-12)

    p = (amp ** 2).astype(np.float32)
    x_db = librosa.power_to_db(p, ref=1.0)
    x_db = np.clip(x_db, DB_FLOOR, 0.0).astype(np.float32)

    mel_freqs = librosa.mel_frequencies(n_mels=n_mels, fmin=fmin, fmax=fmax).astype(np.float32)
    mel_bins  = np.arange(n_mels, dtype=np.float32)
    y_pos = np.interp(freqs, mel_freqs, mel_bins).astype(np.float32)

    mel_idx = np.clip(np.rint(y_pos).astype(np.int32), 0, n_mels - 1)
    cur_mel_db = np.full(n_mels, DB_FLOOR, dtype=np.float32)
    np.maximum.at(cur_mel_db, mel_idx, x_db)

    return y_pos, x_db, cur_mel_db

# FULL FFT (GLOBAL) estándar + suavizado en dB
def full_fft_global_curve_dbfs_smoothed(y_audio, sr, fmin, fmax,
                                        smooth_hz, min_bins, max_points,
                                        peak_normalize=False):
    # quitar el lead-in silence para que no “aplane” la global
    n0 = int(round(LEAD_SILENCE_S * sr))
    y_use = y_audio[n0:] if len(y_audio) > n0 else y_audio
    y_use = y_use.astype(np.float32)

    # TRIM de silencio (solo para el cálculo del espectro global)
    if FULLFFT_TRIM_SILENCE and len(y_use) > 0:
        y_use, _ = librosa.effects.trim(y_use, top_db=float(FULLFFT_TRIM_TOP_DB))
        # micro-fades en los bordes del segmento recortado (reduce leakage)
        y_use = apply_fadein(y_use, sr, FULLFFT_EDGE_FADE_MS)
        y_use = apply_fadeout(y_use, sr, FULLFFT_EDGE_FADE_MS)

    L = len(y_use)
    if L < 8:
        return np.array([DB_FLOOR], dtype=np.float32), np.array([0.0], dtype=np.float32)

    # FFT estándar del vector completo (con Hann)
    win_t = np.hanning(L).astype(np.float32)
    coherent_gain = (np.sum(win_t) / 2.0) + 1e-12

    X = np.fft.rfft(y_use * win_t)
    mag = np.abs(X).astype(np.float32)
    freqs = np.fft.rfftfreq(L, d=1.0/sr).astype(np.float32)

    mask = (freqs >= fmin) & (freqs <= fmax)
    freqs = freqs[mask]
    mag = mag[mask]
    if len(mag) < 3:
        return np.array([DB_FLOOR], dtype=np.float32), np.array([0.0], dtype=np.float32)

    amp = mag / coherent_gain
    p = (amp ** 2).astype(np.float32)

    # dB primero
    x_db = librosa.power_to_db(p, ref=1.0).astype(np.float32)

    # suavizado en dB
    bin_hz = float(sr) / float(L)
    k = int(round(float(smooth_hz) / max(bin_hz, 1e-12)))
    k = max(int(min_bins), k)
    if k % 2 == 0:
        k += 1
    if k >= 3 and k < len(x_db):
        ker = np.ones(k, dtype=np.float32) / float(k)
        pad = k // 2
        x_pad = np.pad(x_db, (pad, pad), mode="reflect")
        x_db = np.convolve(x_pad, ker, mode="valid").astype(np.float32)

    # normalizar
    if peak_normalize:
        x_db = x_db - float(np.max(x_db))

    # clip final
    x_db = np.clip(x_db, DB_FLOOR, 0.0).astype(np.float32)

    # map a eje y MEL
    mel_freqs = librosa.mel_frequencies(n_mels=N_MELS, fmin=fmin, fmax=fmax).astype(np.float32)
    mel_bins = np.arange(N_MELS, dtype=np.float32)
    y_pos = np.interp(freqs, mel_freqs, mel_bins).astype(np.float32)

    # decimar
    if max_points is not None and len(x_db) > int(max_points):
        step = int(np.ceil(len(x_db) / int(max_points)))
        x_db = x_db[::step]
        y_pos = y_pos[::step]

    return x_db, y_pos

# Render
def render_video_with_cursor_fft_and_hold(y_audio, S_db, out_mp4_silent, duration_s):
    n_vid_frames = int(np.ceil(duration_s * FPS))
    times = np.linspace(0.0, duration_s, n_vid_frames)

    # FULL FFT: solo en la cola (ms -> frames, con mínimo)
    frames_by_ms = int(np.ceil((FULLFFT_SHOW_LAST_MS / 1000.0) * FPS))
    n_last = max(1, frames_by_ms, int(FULLFFT_MIN_FRAMES))
    show_from_idx = max(0, len(times) - n_last)

    fig = plt.figure(figsize=(FIG_W, FIG_H), dpi=DPI)
    gs = fig.add_gridspec(1, 2, width_ratios=[FFT_PANEL_RATIO, 6.0], wspace=PANEL_WSPACE)

    ax = fig.add_subplot(gs[0, 1])
    ax_fft = fig.add_subplot(gs[0, 0], sharey=ax)

    ax.set_facecolor("white")
    ax_fft.set_facecolor("white")
    fig.patch.set_facecolor("white")

    fig.subplots_adjust(left=FIG_LEFT, right=FIG_RIGHT, top=FIG_TOP, bottom=FIG_BOTTOM)

    # Spectrogram
    im = ax.imshow(
        S_db, origin="lower", aspect="auto", cmap=CMAP,
        vmin=DB_FLOOR, vmax=0.0,
        extent=[0.0, duration_s, 0.0, N_MELS - 1],
        zorder=0
    )
    ax.set_xlim(0.0, duration_s)

    ax.set_xlabel(X_LABEL)
    ax.set_ylabel(Y_LABEL)

    xt = [t for t in X_TICKS_S if t <= duration_s + 1e-9]
    ax.set_xticks(xt)

    y_pos_ticks, y_lab = mel_bin_positions_for_hz_ticks(Y_TICKS_HZ)
    ax.set_yticks(y_pos_ticks)
    ax.set_yticklabels(y_lab)
    ax.tick_params(axis="y", labelleft=True, left=True, pad=8)

    cb = fig.colorbar(im, ax=ax, pad=0.015)
    cb.set_label(CB_LABEL)
    cb.set_ticks(make_cbar_ticks(DB_FLOOR))

    # Cursor
    axes_lw = float(plt.rcParams.get("axes.linewidth", 1.0))
    cursor = ax.axvline(0.0, color=CURSOR_COLOR, linewidth=axes_lw, zorder=6)

    # FFT panel
    ax_fft.set_ylim(0.0, N_MELS - 1)
    ax_fft.set_xlim(0.0, DB_FLOOR)
    ax_fft.tick_params(axis="y", labelleft=False, left=False)

    xticks = list(np.arange(0, DB_FLOOR - 1e-9, -FFT_DB_STEP))
    if len(xticks) == 0 or xticks[-1] != DB_FLOOR:
        xticks.append(DB_FLOOR)
    ax_fft.set_xticks(xticks)
    ax_fft.set_xlabel(FFT_X_LABEL)

    for sp in ax_fft.spines.values():
        sp.set_visible(False)

    norm = Normalize(vmin=DB_FLOOR, vmax=0.0)

    # FFT instantánea (coloreada)
    lc = LineCollection([], cmap=CMAP, norm=norm, linewidth=FFT_LINE_LW)
    ax_fft.add_collection(lc)

    # MAX-HOLD (negra)
    hold_db = np.full(N_MELS, DB_FLOOR, dtype=np.float32)
    y_hold = np.arange(N_MELS, dtype=np.float32)

    hold_color = (0.0, 0.0, 0.0)
    hold_line, = ax_fft.plot(
        hold_db, y_hold,
        color=hold_color,
        linewidth=HOLD_LINE_LW,
        alpha=HOLD_LINE_ALPHA,
        zorder=HOLD_LINE_ZORDER
    )

    # FULL FFT global (purple)
    full_x_db, full_y_pos = full_fft_global_curve_dbfs_smoothed(
        y_audio, FS, FMIN, FMAX,
        smooth_hz=FULLFFT_SMOOTH_HZ,
        min_bins=FULLFFT_MIN_BINS,
        max_points=FULLFFT_MAX_POINTS,
        peak_normalize=FULLFFT_PEAK_NORMALIZE
    )
    full_line, = ax_fft.plot(
        full_x_db, full_y_pos,
        color=FULLFFT_COLOR,
        linewidth=FULLFFT_LW,
        alpha=FULLFFT_ALPHA,
        zorder=HOLD_LINE_ZORDER + 3
    )
    full_line.set_visible(False)

    # Mini-leyendas (dentro del panel FFT)
    # Hold legend fijo
    y0 = FFT_LEG_ANCHOR_Y
    ax_fft.plot(
        [FFT_LEG_LINE_X0, FFT_LEG_LINE_X1], [y0, y0],
        transform=ax_fft.transAxes,
        color=hold_color,
        linewidth=HOLD_LINE_LW,
        alpha=HOLD_LINE_ALPHA,
        solid_capstyle="butt",
        zorder=HOLD_LINE_ZORDER + 10
    )
    ax_fft.text(
        FFT_LEG_LINE_X1 + 0.02, y0,
        "FFT peak hold",
        transform=ax_fft.transAxes,
        va="center", ha="left",
        fontsize=FFT_LEG_FONTSIZE,
        color=hold_color,
        zorder=HOLD_LINE_ZORDER + 10
    )

    # Full-FFT legend (aparece con la purple)
    y1 = y0 - FFT_LEG_PAD_Y
    full_label = FULLFFT_LABEL_NORM if FULLFFT_PEAK_NORMALIZE else FULLFFT_LABEL_RAW

    full_leg_line = ax_fft.plot(
        [FFT_LEG_LINE_X0, FFT_LEG_LINE_X1], [y1, y1],
        transform=ax_fft.transAxes,
        color=FULLFFT_COLOR,
        linewidth=FULLFFT_LW,
        alpha=FULLFFT_ALPHA,
        solid_capstyle="butt",
        zorder=HOLD_LINE_ZORDER + 10
    )[0]
    full_leg_text = ax_fft.text(
        FFT_LEG_LINE_X1 + 0.02, y1,
        full_label,
        transform=ax_fft.transAxes,
        va="center", ha="left",
        fontsize=FFT_LEG_FONTSIZE,
        color="black",
        zorder=HOLD_LINE_ZORDER + 10
    )
    full_leg_line.set_visible(False)
    full_leg_text.set_visible(False)

    # grids
    ax.grid(True, axis="y", color=GRID_COLOR, alpha=GRID_ALPHA, linewidth=GRID_LW, zorder=5)
    ax_fft.grid(True, axis="y", color=GRID_COLOR, alpha=GRID_ALPHA, linewidth=GRID_LW, zorder=5)
    ax_fft.grid(True, axis="x", color=GRID_COLOR, alpha=GRID_ALPHA * 0.8, linewidth=GRID_LW, zorder=5)

    writer = imageio.get_writer(str(out_mp4_silent), fps=FPS, codec="libx264", quality=8)
    try:
        for i, t in enumerate(times):
            cursor.set_xdata([t, t])

            y_pos, x_db, cur_mel_db = fft_profile_dbfs_v2_mel_ypos_with_binned(
                y_audio, sr=FS, t=t,
                fft_win=FFT_WIN, fmin=FMIN, fmax=FMAX, n_mels=N_MELS
            )

            # actualizar max-hold
            hold_db = np.maximum(hold_db, cur_mel_db)
            hold_line.set_xdata(hold_db)

            # actualizar FFT instantánea
            if len(x_db) >= 2:
                pts = np.column_stack([x_db, y_pos]).reshape(-1, 1, 2)
                segs = np.concatenate([pts[:-1], pts[1:]], axis=1)
                lc.set_segments(segs)
                lc.set_array(x_db[:-1])
                lc.set_clim(DB_FLOOR, 0.0)
            else:
                lc.set_segments([])

            # FULL FFT + mini-leyenda: solo en la cola (congelada)
            if i >= show_from_idx:
                full_line.set_visible(True)
                full_leg_line.set_visible(True)
                full_leg_text.set_visible(True)

            fig.canvas.draw()
            frame = np.asarray(fig.canvas.buffer_rgba())[:, :, :3].copy()
            writer.append_data(frame)
    finally:
        writer.close()
        plt.close(fig)

# Loop principal
for u in UNITS:
    udir = unit_dir(u)
    print(f"== {u} ==")

    for tag, fname in AUDIO_TAGS:
        wav_path = udir / fname
        if not wav_path.exists():
            print(f"[SKIP] {u} no tiene {fname}")
            continue

        y, sr = sf.read(wav_path)
        if y.ndim > 1:
            y = y[:, 0]

        if sr != FS:
            y = librosa.resample(y.astype(float), orig_sr=sr, target_sr=FS)
            sr = FS
        y = y.astype(np.float32)

        # fades + silencio + fadeout
        y = apply_fadein(y, sr, FADEIN_MS)
        y = prepend_silence(y, sr, LEAD_SILENCE_S)
        y = apply_fadeout(y, sr, FADEOUT_MS)

        # normalización
        y = normalize_peak_0dbfs(y)

        duration_s = len(y) / sr
        S_db = mel_dbfs(y, sr)

        out_mp4 = udir / f"video_{tag}_mel.mp4"

        with tempfile.TemporaryDirectory() as td:
            td = Path(td)
            tmp_wav = td / "audio.wav"
            tmp_mp4 = td / "video_silent.mp4"

            sf.write(tmp_wav, y, sr, subtype="PCM_16")
            render_video_with_cursor_fft_and_hold(y, S_db, tmp_mp4, duration_s)
            mux_audio(tmp_mp4, tmp_wav, out_mp4)

        print(f"[OK] {out_mp4.name}")


In [None]:
# FIGURA FIJA (PNG) — Mel-spectrogram + panel FFT (solo GLOBAL FFT) + etiquetas de peaks


from pathlib import Path
import numpy as np
import soundfile as sf
import librosa
import matplotlib.pyplot as plt
import warnings
from matplotlib.colors import LinearSegmentedColormap

warnings.filterwarnings("ignore", category=UserWarning)

# PARÁMETROS BASE

UNITS = ["node_47"]
#UNITS = ["node_47", "node_48", "node_49", "node_76", "node_77", "node_80", "node_74"]

ROOT_NODES = Path("../results/asr_KNN/SYNTH") / "AUDIO_SONIF"
ROOT_TIPS  = Path("../results/asr_KNN/SYNTH_TIPS") / "AUDIO_SONIF"

AUDIO_TAGS = [
    ("mean", "sonif_mean.wav"),
    ("max",  "sonif_max.wav"),
    ("min",  "sonif_min.wav"),
]

# audio prep
FS = 44100
LEAD_SILENCE_S = 1.0
FADEIN_MS  = 40.0
FADEOUT_MS = 40.0

# mel spectrogram
N_FFT  = 1024
HOP    = 256
N_MELS = 128
FMIN   = 0
FMAX   = 10000

DB_FLOOR = -40.0

# visual
DPI = 600
FIG_W, FIG_H = 12, (1808/300)

# labels
X_LABEL  = "Time (s)"
Y_LABEL  = "Frequency (kHz)"
CB_LABEL = "Power (dBFS)"
X_TICKS_S  = [0, 1, 2, 3, 4, 5, 6]
Y_TICKS_HZ = [0, 500, 1000, 2000, 4000, 6000, 8000, 10000]

# CMAP
RAINBOW_W2K = [
    "#ffffff",
    "#ff0000",
    "#ff7a00",
    "#ffd400",
    "#28c300",
    "#00d9ff",
    "#0066ff",
    "#0000FF",
]
CMAP = LinearSegmentedColormap.from_list("W_RAINBOW_K", RAINBOW_W2K, N=256)

# Ajustes visuales
GRID_COLOR = (0.6, 0.6, 0.6)
GRID_ALPHA = 0.30
GRID_LW    = 0.8

FFT_PANEL_RATIO = 1.4
PANEL_WSPACE    = 0.12

FFT_DB_STEP  = 10
FFT_X_LABEL  = "Power (dBFS)"

# márgenes
FIG_LEFT   = 0.05
FIG_RIGHT  = 0.999
FIG_TOP    = 0.93
FIG_BOTTOM = 0.13

# --- FULL FFT (GLOBAL) 
FULLFFT_COLOR = "purple"
FULLFFT_LW    = 1.2
FULLFFT_ALPHA = 0.70

FULLFFT_SMOOTH_HZ      = 120.0
FULLFFT_MIN_BINS       = 21
FULLFFT_MAX_POINTS     = 2500
FULLFFT_PEAK_NORMALIZE = True

FULLFFT_TRIM_SILENCE = True
FULLFFT_TRIM_TOP_DB  = 40.0
FULLFFT_EDGE_FADE_MS = 12.0

# mini-leyenda dentro del panel FFT
FFT_LEG_ANCHOR_X = 0.04
FFT_LEG_ANCHOR_Y = 0.97
FFT_LEG_FONTSIZE = 9
FFT_LEG_LINE_X0  = 0.04
FFT_LEG_LINE_X1  = 0.16

FULLFFT_LABEL_NORM = "Global FFT (normalized)"
FULLFFT_LABEL_RAW  = "Global FFT"

# Etiquetas de peaks (targets + búsqueda local)
PEAK_TARGETS_KHZ   = [1.3, 1.7, 2.4]
PEAK_SEARCH_BW_HZ  = 150.0

# marcador
PEAK_MARKER_SIZE   = 18
PEAK_MARKER_FC     = "black"
PEAK_MARKER_EC     = "black"
PEAK_MARKER_LW     = 0.0

# texto
PEAK_TEXT_FONTSIZE = 8
PEAK_Y_OFFSET_BINS = 2.0

# offset
PEAK_X_OFFSET_DB_DEFAULT = -1.2 + 6.0

PEAK_X_OFFSET_DB_BY_INDEX = {
    1: +6.0,   # mover etiqueta del 2º peak a la izquierda
}

# evitar cortes
FFT_XPAD_LEFT_DB = 6.0
FFT_XPAD_RIGHT_DB = 0.0

# Helpers
def unit_dir(name: str) -> Path:
    if name.startswith("node_"):
        return ROOT_NODES / name
    if name.startswith("tip_"):
        return ROOT_TIPS / name
    if (ROOT_TIPS / f"tip_{name}").exists():
        return ROOT_TIPS / f"tip_{name}"
    raise ValueError(f"No reconozco unidad '{name}'. Usa 'node_###' o 'tip_...'")

def apply_fadein(y, sr, fade_ms):
    n = int(round((fade_ms / 1000.0) * sr))
    if n <= 1:
        return y
    ramp = np.linspace(0.0, 1.0, n, endpoint=True)
    y2 = y.copy()
    y2[:n] *= ramp
    return y2

def apply_fadeout(y, sr, fade_ms):
    n = int(round((fade_ms / 1000.0) * sr))
    if n <= 1:
        return y
    ramp = np.linspace(1.0, 0.0, n, endpoint=True)
    y2 = y.copy()
    y2[-n:] *= ramp
    return y2

def prepend_silence(y, sr, silence_s):
    n0 = int(round(silence_s * sr))
    if n0 <= 0:
        return y
    return np.concatenate([np.zeros(n0, dtype=y.dtype), y])

def normalize_peak_0dbfs(y):
    peak = np.max(np.abs(y)) if len(y) else 0.0
    if peak <= 0:
        return y
    return y / peak

def mel_dbfs(y, sr):
    S = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=N_FFT, hop_length=HOP,
        n_mels=N_MELS, fmin=FMIN, fmax=FMAX, power=2.0
    )
    S_db = librosa.power_to_db(S, ref=1.0)
    return np.clip(S_db, DB_FLOOR, 0.0)

def make_cbar_ticks(db_floor):
    rng = abs(float(db_floor))
    if rng <= 25:
        step = 5
    elif rng <= 65:
        step = 10
    else:
        step = 20
    ticks = list(np.arange(db_floor, 0.0001, step))
    if len(ticks) == 0 or abs(ticks[0] - db_floor) > 1e-6:
        ticks = [db_floor] + ticks
    if abs(ticks[-1] - 0.0) > 1e-6:
        ticks.append(0.0)
    ticks = [0.0 if abs(t) < 1e-9 else float(np.round(t, 3)) for t in ticks]
    return ticks

def mel_bin_positions_for_hz_ticks(y_ticks_hz):
    mel_freqs = librosa.mel_frequencies(n_mels=N_MELS, fmin=FMIN, fmax=FMAX)
    mel_bins = np.arange(N_MELS)
    y_ticks_hz = [f for f in y_ticks_hz if (f >= mel_freqs[0] - 1e-9) and (f <= mel_freqs[-1] + 1e-9)]
    y_pos = np.interp(y_ticks_hz, mel_freqs, mel_bins)
    y_lab = [f"{f/1000:.0f}" if f % 1000 == 0 else f"{f/1000:.1f}" for f in y_ticks_hz]
    return y_pos, y_lab

def pick_peak_near(freqs_hz, x_db, target_hz, bw_hz):
    lo = target_hz - bw_hz
    hi = target_hz + bw_hz
    m = (freqs_hz >= lo) & (freqs_hz <= hi)
    if not np.any(m):
        j = int(np.argmin(np.abs(freqs_hz - target_hz)))
        return j
    jj = np.where(m)[0]
    jmax = jj[int(np.argmax(x_db[jj]))]  # más cercano a 0 dB
    return int(jmax)

# FULL FFT (GLOBAL) estándar + suavizado en dB (devuelve freqs_hz para peaks)
def full_fft_global_curve_dbfs_smoothed(y_audio, sr, fmin, fmax,
                                        smooth_hz, min_bins, max_points,
                                        peak_normalize=False):
    # quitar el lead-in silence
    n0 = int(round(LEAD_SILENCE_S * sr))
    y_use = y_audio[n0:] if len(y_audio) > n0 else y_audio
    y_use = y_use.astype(np.float32)

    # TRIM de silencio (solo FFT global)
    if FULLFFT_TRIM_SILENCE and len(y_use) > 0:
        y_use, _ = librosa.effects.trim(y_use, top_db=float(FULLFFT_TRIM_TOP_DB))
        y_use = apply_fadein(y_use, sr, FULLFFT_EDGE_FADE_MS)
        y_use = apply_fadeout(y_use, sr, FULLFFT_EDGE_FADE_MS)

    L = len(y_use)
    if L < 8:
        return np.array([0.0], dtype=np.float32), np.array([DB_FLOOR], dtype=np.float32), np.array([0.0], dtype=np.float32)

    win_t = np.hanning(L).astype(np.float32)
    coherent_gain = (np.sum(win_t) / 2.0) + 1e-12

    X = np.fft.rfft(y_use * win_t)
    mag = np.abs(X).astype(np.float32)
    freqs = np.fft.rfftfreq(L, d=1.0/sr).astype(np.float32)

    mask = (freqs >= fmin) & (freqs <= fmax)
    freqs = freqs[mask]
    mag   = mag[mask]
    if len(mag) < 3:
        return np.array([0.0], dtype=np.float32), np.array([DB_FLOOR], dtype=np.float32), np.array([0.0], dtype=np.float32)

    amp = mag / coherent_gain
    p = (amp ** 2).astype(np.float32)

    x_db = librosa.power_to_db(p, ref=1.0).astype(np.float32)

    # suavizado en dB
    bin_hz = float(sr) / float(L)
    k = int(round(float(smooth_hz) / max(bin_hz, 1e-12)))
    k = max(int(min_bins), k)
    if k % 2 == 0:
        k += 1
    if k >= 3 and k < len(x_db):
        ker = np.ones(k, dtype=np.float32) / float(k)
        pad = k // 2
        x_pad = np.pad(x_db, (pad, pad), mode="reflect")
        x_db = np.convolve(x_pad, ker, mode="valid").astype(np.float32)

    if peak_normalize:
        x_db = x_db - float(np.max(x_db))

    x_db = np.clip(x_db, DB_FLOOR, 0.0).astype(np.float32)

    mel_freqs = librosa.mel_frequencies(n_mels=N_MELS, fmin=fmin, fmax=fmax).astype(np.float32)
    mel_bins  = np.arange(N_MELS, dtype=np.float32)
    y_pos = np.interp(freqs, mel_freqs, mel_bins).astype(np.float32)

    if max_points is not None and len(x_db) > int(max_points):
        step = int(np.ceil(len(x_db) / int(max_points)))
        freqs = freqs[::step]
        x_db  = x_db[::step]
        y_pos = y_pos[::step]

    return freqs, x_db, y_pos

# Render PNG
def render_static_png(y_audio, S_db, out_png, duration_s):
    fig = plt.figure(figsize=(FIG_W, FIG_H), dpi=DPI)
    gs = fig.add_gridspec(1, 2, width_ratios=[FFT_PANEL_RATIO, 6.0], wspace=PANEL_WSPACE)

    ax = fig.add_subplot(gs[0, 1])
    ax_fft = fig.add_subplot(gs[0, 0], sharey=ax)

    ax.set_facecolor("white")
    ax_fft.set_facecolor("white")
    fig.patch.set_facecolor("white")

    fig.subplots_adjust(left=FIG_LEFT, right=FIG_RIGHT, top=FIG_TOP, bottom=FIG_BOTTOM)

    # Spectrogram
    im = ax.imshow(
        S_db, origin="lower", aspect="auto", cmap=CMAP,
        vmin=DB_FLOOR, vmax=0.0,
        extent=[0.0, duration_s, 0.0, N_MELS - 1],
        zorder=0
    )
    ax.set_xlim(0.0, duration_s)

    ax.set_xlabel(X_LABEL)
    ax.set_ylabel(Y_LABEL)

    xt = [t for t in X_TICKS_S if t <= duration_s + 1e-9]
    ax.set_xticks(xt)

    y_pos_ticks, y_lab = mel_bin_positions_for_hz_ticks(Y_TICKS_HZ)
    ax.set_yticks(y_pos_ticks)
    ax.set_yticklabels(y_lab)
    ax.tick_params(axis="y", labelleft=True, left=True, pad=8)

    cb = fig.colorbar(im, ax=ax, pad=0.015)
    cb.set_label(CB_LABEL)
    cb.set_ticks(make_cbar_ticks(DB_FLOOR))

    # FFT panel
    x_left  = 0.0 + float(FFT_XPAD_LEFT_DB)
    x_right = float(DB_FLOOR) - float(FFT_XPAD_RIGHT_DB)
    ax_fft.set_xlim(x_left, x_right)
    ax_fft.set_ylim(0.0, N_MELS - 1)
    ax_fft.tick_params(axis="y", labelleft=False, left=False)

    xticks = list(np.arange(0, DB_FLOOR - 1e-9, -FFT_DB_STEP))
    if len(xticks) == 0 or xticks[-1] != DB_FLOOR:
        xticks.append(DB_FLOOR)
    ax_fft.set_xticks(xticks)
    ax_fft.set_xlabel(FFT_X_LABEL)

    for sp in ax_fft.spines.values():
        sp.set_visible(False)

    ax.grid(True, axis="y", color=GRID_COLOR, alpha=GRID_ALPHA, linewidth=GRID_LW, zorder=5)
    ax_fft.grid(True, axis="y", color=GRID_COLOR, alpha=GRID_ALPHA, linewidth=GRID_LW, zorder=5)
    ax_fft.grid(True, axis="x", color=GRID_COLOR, alpha=GRID_ALPHA * 0.8, linewidth=GRID_LW, zorder=5)

    # Global FFT
    freqs_hz, full_x_db, full_y_pos = full_fft_global_curve_dbfs_smoothed(
        y_audio, FS, FMIN, FMAX,
        smooth_hz=FULLFFT_SMOOTH_HZ,
        min_bins=FULLFFT_MIN_BINS,
        max_points=FULLFFT_MAX_POINTS,
        peak_normalize=FULLFFT_PEAK_NORMALIZE
    )
    ax_fft.plot(
        full_x_db, full_y_pos,
        color=FULLFFT_COLOR,
        linewidth=FULLFFT_LW,
        alpha=FULLFFT_ALPHA,
        zorder=10
    )

    # mini-leyenda
    y0 = FFT_LEG_ANCHOR_Y
    ax_fft.plot(
        [FFT_LEG_LINE_X0, FFT_LEG_LINE_X1], [y0, y0],
        transform=ax_fft.transAxes,
        color=FULLFFT_COLOR,
        linewidth=FULLFFT_LW,
        alpha=FULLFFT_ALPHA,
        solid_capstyle="butt",
        zorder=20
    )
    lab = FULLFFT_LABEL_NORM if FULLFFT_PEAK_NORMALIZE else FULLFFT_LABEL_RAW
    ax_fft.text(
        FFT_LEG_LINE_X1 + 0.02, y0,
        lab,
        transform=ax_fft.transAxes,
        va="center", ha="left",
        fontsize=FFT_LEG_FONTSIZE,
        color="black",
        zorder=20
    )

    # Etiquetar peaks
    for idx, fk in enumerate(PEAK_TARGETS_KHZ):
        target_hz = float(fk) * 1000.0
        j = pick_peak_near(freqs_hz, full_x_db, target_hz, PEAK_SEARCH_BW_HZ)

        fpk_hz = float(freqs_hz[j])
        dbpk   = float(full_x_db[j])
        ypk    = float(full_y_pos[j])

        # marcador sólido negro
        ax_fft.scatter(
            [dbpk], [ypk],
            s=PEAK_MARKER_SIZE,
            facecolor=PEAK_MARKER_FC,
            edgecolor=PEAK_MARKER_EC,
            linewidth=PEAK_MARKER_LW,
            zorder=30
        )

        # offsets
        x_off = float(PEAK_X_OFFSET_DB_BY_INDEX.get(idx, PEAK_X_OFFSET_DB_DEFAULT))
        x_text = dbpk + x_off

        x_max = max(x_left, x_right)
        x_min = min(x_left, x_right)
        x_text = max(min(x_text, x_max), x_min)

        y_text = ypk + float(PEAK_Y_OFFSET_BINS)
        y_text = max(min(y_text, N_MELS - 1.0), 0.0)

        ax_fft.text(
            x_text, y_text,
            f"{fpk_hz/1000:.2f} kHz\n{dbpk:.1f} dB",
            fontsize=PEAK_TEXT_FONTSIZE,
            color="black",
            ha="left",
            va="bottom",
            zorder=31
        )

    fig.savefig(out_png, dpi=DPI, facecolor="white", bbox_inches=None)
    plt.close(fig)

# Loop principal
for u in UNITS:
    udir = unit_dir(u)
    print(f"== {u} ==")

    for tag, fname in AUDIO_TAGS:
        wav_path = udir / fname
        if not wav_path.exists():
            print(f"[SKIP] {u} no tiene {fname}")
            continue

        y, sr = sf.read(wav_path)
        if y.ndim > 1:
            y = y[:, 0]

        if sr != FS:
            y = librosa.resample(y.astype(float), orig_sr=sr, target_sr=FS)
            sr = FS
        y = y.astype(np.float32)

        # fades + silencio + fadeout
        y = apply_fadein(y, sr, FADEIN_MS)
        y = prepend_silence(y, sr, LEAD_SILENCE_S)
        y = apply_fadeout(y, sr, FADEOUT_MS)

        # normalización final
        y = normalize_peak_0dbfs(y)

        duration_s = len(y) / sr
        S_db = mel_dbfs(y, sr)

        out_png = udir / f"fig_{tag}_mel.png"
        render_static_png(y, S_db, out_png, duration_s)

        print(f"[OK] {out_png.name}")
